Repository: liangjiandeng/DLPan-Toolbox
Branch: main
Commit: a34f884af889
Files: 1396
Total size: 5.7 MB

Directory structure:
gitextract_54ils51p/

├── .gitignore
├── 01-DL-toolbox(Pytorch)/
│   ├── LICENSE
│   ├── UDL/
│   │   ├── AutoDL/
│   │   │   ├── __init__.py
│   │   │   └── trainer.py
│   │   ├── Basis/
│   │   │   ├── auxiliary/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── fp16_utils.py
│   │   │   │   ├── torchstat/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── compute_flops.py
│   │   │   │   │   ├── compute_madd.py
│   │   │   │   │   ├── compute_memory.py
│   │   │   │   │   ├── model_hook.py
│   │   │   │   │   ├── reporter.py
│   │   │   │   │   ├── stat_tree.py
│   │   │   │   │   └── statistics.py
│   │   │   │   └── utils.py
│   │   │   ├── cal_ssim.py
│   │   │   ├── config.py
│   │   │   ├── criterion_metrics.py
│   │   │   ├── dist_utils.py
│   │   │   ├── kill_dist.sh
│   │   │   ├── launch.py
│   │   │   ├── logger.py
│   │   │   ├── metrics.py
│   │   │   ├── optim.py
│   │   │   ├── option.py
│   │   │   ├── postprocess.py
│   │   │   ├── python_sub_class.py
│   │   │   ├── slurm_train.sh
│   │   │   ├── snmn_d.sh
│   │   │   └── variance_sacling_initializer.py
│   │   ├── Data/
│   │   │   └── pansharpening/
│   │   │       ├── test_data/
│   │   │       │   └── readme-test.txt
│   │   │       ├── training_data/
│   │   │       │   └── readme-test.txt
│   │   │       └── validation_data/
│   │   │           └── readme-test.txt
│   │   ├── mmcv/
│   │   │   ├── CITATION.cff
│   │   │   ├── CONTRIBUTING.md
│   │   │   ├── Dockerfile
│   │   │   ├── Jenkinsfile
│   │   │   ├── LICENSE
│   │   │   ├── LICENSES.md
│   │   │   ├── MANIFEST.in
│   │   │   ├── README_zh-CN.md
│   │   │   ├── TERMINOLOGY.md
│   │   │   ├── docs/
│   │   │   │   ├── en/
│   │   │   │   │   ├── Makefile
│   │   │   │   │   ├── _static/
│   │   │   │   │   │   └── css/
│   │   │   │   │   │       └── readthedocs.css
│   │   │   │   │   ├── api.rst
│   │   │   │   │   ├── community/
│   │   │   │   │   │   └── pr.md
│   │   │   │   │   ├── compatibility.md
│   │   │   │   │   ├── conf.py
│   │   │   │   │   ├── deployment/
│   │   │   │   │   │   ├── mmcv_ops_definition.md
│   │   │   │   │   │   ├── onnx.md
│   │   │   │   │   │   ├── onnxruntime_custom_ops.md
│   │   │   │   │   │   ├── onnxruntime_op.md
│   │   │   │   │   │   ├── tensorrt_custom_ops.md
│   │   │   │   │   │   └── tensorrt_plugin.md
│   │   │   │   │   ├── faq.md
│   │   │   │   │   ├── get_started/
│   │   │   │   │   │   ├── build.md
│   │   │   │   │   │   ├── installation.md
│   │   │   │   │   │   ├── introduction.md
│   │   │   │   │   │   └── previous_versions.md
│   │   │   │   │   ├── index.rst
│   │   │   │   │   ├── make.bat
│   │   │   │   │   └── understand_mmcv/
│   │   │   │   │       ├── cnn.md
│   │   │   │   │       ├── config.md
│   │   │   │   │       ├── data_process.md
│   │   │   │   │       ├── io.md
│   │   │   │   │       ├── ops.md
│   │   │   │   │       ├── registry.md
│   │   │   │   │       ├── runner.md
│   │   │   │   │       ├── utils.md
│   │   │   │   │       └── visualization.md
│   │   │   │   └── zh_cn/
│   │   │   │       ├── Makefile
│   │   │   │       ├── _static/
│   │   │   │       │   └── css/
│   │   │   │       │       └── readthedocs.css
│   │   │   │       ├── api.rst
│   │   │   │       ├── community/
│   │   │   │       │   ├── contributing.md
│   │   │   │       │   └── pr.md
│   │   │   │       ├── compatibility.md
│   │   │   │       ├── conf.py
│   │   │   │       ├── deployment/
│   │   │   │       │   ├── onnx.md
│   │   │   │       │   ├── onnxruntime_custom_ops.md
│   │   │   │       │   ├── onnxruntime_op.md
│   │   │   │       │   ├── tensorrt_custom_ops.md
│   │   │   │       │   └── tensorrt_plugin.md
│   │   │   │       ├── faq.md
│   │   │   │       ├── get_started/
│   │   │   │       │   ├── build.md
│   │   │   │       │   ├── installation.md
│   │   │   │       │   ├── introduction.md
│   │   │   │       │   └── previous_versions.md
│   │   │   │       ├── index.rst
│   │   │   │       ├── make.bat
│   │   │   │       └── understand_mmcv/
│   │   │   │           ├── cnn.md
│   │   │   │           ├── config.md
│   │   │   │           ├── data_process.md
│   │   │   │           ├── io.md
│   │   │   │           ├── ops.md
│   │   │   │           ├── registry.md
│   │   │   │           ├── runner.md
│   │   │   │           ├── utils.md
│   │   │   │           └── visualization.md
│   │   │   ├── examples/
│   │   │   │   └── train.py
│   │   │   ├── mmcv/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── arraymisc/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── quantization.py
│   │   │   │   ├── cnn/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── alexnet.py
│   │   │   │   │   ├── bricks/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── activation.py
│   │   │   │   │   │   ├── context_block.py
│   │   │   │   │   │   ├── conv.py
│   │   │   │   │   │   ├── conv2d_adaptive_padding.py
│   │   │   │   │   │   ├── conv_module.py
│   │   │   │   │   │   ├── conv_ws.py
│   │   │   │   │   │   ├── depthwise_separable_conv_module.py
│   │   │   │   │   │   ├── drop.py
│   │   │   │   │   │   ├── generalized_attention.py
│   │   │   │   │   │   ├── hsigmoid.py
│   │   │   │   │   │   ├── hswish.py
│   │   │   │   │   │   ├── non_local.py
│   │   │   │   │   │   ├── norm.py
│   │   │   │   │   │   ├── padding.py
│   │   │   │   │   │   ├── plugin.py
│   │   │   │   │   │   ├── registry.py
│   │   │   │   │   │   ├── scale.py
│   │   │   │   │   │   ├── swish.py
│   │   │   │   │   │   ├── transformer.py
│   │   │   │   │   │   ├── upsample.py
│   │   │   │   │   │   └── wrappers.py
│   │   │   │   │   ├── builder.py
│   │   │   │   │   ├── resnet.py
│   │   │   │   │   ├── utils/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── flops_counter.py
│   │   │   │   │   │   ├── fuse_conv_bn.py
│   │   │   │   │   │   ├── sync_bn.py
│   │   │   │   │   │   └── weight_init.py
│   │   │   │   │   └── vgg.py
│   │   │   │   ├── engine/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── test.py
│   │   │   │   ├── fileio/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── file_client.py
│   │   │   │   │   ├── handlers/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── base.py
│   │   │   │   │   │   ├── json_handler.py
│   │   │   │   │   │   ├── pickle_handler.py
│   │   │   │   │   │   └── yaml_handler.py
│   │   │   │   │   ├── io.py
│   │   │   │   │   └── parse.py
│   │   │   │   ├── image/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── colorspace.py
│   │   │   │   │   ├── geometric.py
│   │   │   │   │   ├── io.py
│   │   │   │   │   ├── misc.py
│   │   │   │   │   └── photometric.py
│   │   │   │   ├── onnx/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── info.py
│   │   │   │   │   ├── onnx_utils/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── symbolic_helper.py
│   │   │   │   │   └── symbolic.py
│   │   │   │   ├── ops/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── active_rotated_filter.py
│   │   │   │   │   ├── assign_score_withk.py
│   │   │   │   │   ├── ball_query.py
│   │   │   │   │   ├── bbox.py
│   │   │   │   │   ├── border_align.py
│   │   │   │   │   ├── box_iou_rotated.py
│   │   │   │   │   ├── carafe.py
│   │   │   │   │   ├── cc_attention.py
│   │   │   │   │   ├── contour_expand.py
│   │   │   │   │   ├── convex_iou.py
│   │   │   │   │   ├── corner_pool.py
│   │   │   │   │   ├── correlation.py
│   │   │   │   │   ├── csrc/
│   │   │   │   │   │   ├── README.md
│   │   │   │   │   │   ├── common/
│   │   │   │   │   │   │   ├── box_iou_rotated_utils.hpp
│   │   │   │   │   │   │   ├── cuda/
│   │   │   │   │   │   │   │   ├── active_rotated_filter_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── assign_score_withk_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── ball_query_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── bbox_overlaps_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── border_align_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── box_iou_rotated_cuda.cuh
│   │   │   │   │   │   │   │   ├── carafe_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── carafe_naive_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── common_cuda_helper.hpp
│   │   │   │   │   │   │   │   ├── convex_iou_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── correlation_cuda.cuh
│   │   │   │   │   │   │   │   ├── deform_conv_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── deform_roi_pool_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── furthest_point_sample_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── gather_points_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── group_points_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── iou3d_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── knn_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── masked_conv2d_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── min_area_polygons_cuda.cuh
│   │   │   │   │   │   │   │   ├── modulated_deform_conv_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── ms_deform_attn_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── nms_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── nms_rotated_cuda.cuh
│   │   │   │   │   │   │   │   ├── parrots_cudawarpfunction.cuh
│   │   │   │   │   │   │   │   ├── points_in_boxes_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── points_in_polygons_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── psamask_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── riroi_align_rotated_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── roi_align_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── roi_align_rotated_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── roi_pool_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── roiaware_pool3d_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── roipoint_pool3d_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── rotated_feature_align_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── scatter_points_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── sigmoid_focal_loss_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── softmax_focal_loss_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── sync_bn_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── three_interpolate_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── three_nn_cuda_kernel.cuh
│   │   │   │   │   │   │   │   ├── tin_shift_cuda_kernel.cuh
│   │   │   │   │   │   │   │   └── voxelization_cuda_kernel.cuh
│   │   │   │   │   │   │   ├── parrots_cpp_helper.hpp
│   │   │   │   │   │   │   ├── parrots_cuda_helper.hpp
│   │   │   │   │   │   │   ├── pytorch_cpp_helper.hpp
│   │   │   │   │   │   │   ├── pytorch_cuda_helper.hpp
│   │   │   │   │   │   │   └── pytorch_device_registry.hpp
│   │   │   │   │   │   ├── onnxruntime/
│   │   │   │   │   │   │   ├── corner_pool.h
│   │   │   │   │   │   │   ├── cpu/
│   │   │   │   │   │   │   │   ├── corner_pool.cpp
│   │   │   │   │   │   │   │   ├── deform_conv.cpp
│   │   │   │   │   │   │   │   ├── gridSample.cpp
│   │   │   │   │   │   │   │   ├── modulated_deform_conv.cpp
│   │   │   │   │   │   │   │   ├── nms.cpp
│   │   │   │   │   │   │   │   ├── onnxruntime_register.cpp
│   │   │   │   │   │   │   │   ├── reduce_ops.cpp
│   │   │   │   │   │   │   │   ├── roi_align.cpp
│   │   │   │   │   │   │   │   ├── roi_align_rotated.cpp
│   │   │   │   │   │   │   │   └── soft_nms.cpp
│   │   │   │   │   │   │   ├── deform_conv.h
│   │   │   │   │   │   │   ├── grid_sample.h
│   │   │   │   │   │   │   ├── modulated_deform_conv.h
│   │   │   │   │   │   │   ├── nms.h
│   │   │   │   │   │   │   ├── onnxruntime_register.h
│   │   │   │   │   │   │   ├── onnxruntime_session_options_config_keys.h
│   │   │   │   │   │   │   ├── ort_mmcv_utils.h
│   │   │   │   │   │   │   ├── reduce_ops.h
│   │   │   │   │   │   │   ├── roi_align.h
│   │   │   │   │   │   │   ├── roi_align_rotated.h
│   │   │   │   │   │   │   └── soft_nms.h
│   │   │   │   │   │   ├── parrots/
│   │   │   │   │   │   │   ├── active_rotated_filter.cpp
│   │   │   │   │   │   │   ├── active_rotated_filter_parrots.cpp
│   │   │   │   │   │   │   ├── active_rotated_filter_pytorch.h
│   │   │   │   │   │   │   ├── assign_score_withk.cpp
│   │   │   │   │   │   │   ├── assign_score_withk_parrots.cpp
│   │   │   │   │   │   │   ├── assign_score_withk_pytorch.h
│   │   │   │   │   │   │   ├── ball_query._parrots.cpp
│   │   │   │   │   │   │   ├── ball_query.cpp
│   │   │   │   │   │   │   ├── ball_query_pytorch.h
│   │   │   │   │   │   │   ├── bbox_overlaps.cpp
│   │   │   │   │   │   │   ├── bbox_overlaps_parrots.cpp
│   │   │   │   │   │   │   ├── bbox_overlaps_pytorch.h
│   │   │   │   │   │   │   ├── border_align.cpp
│   │   │   │   │   │   │   ├── border_align_parrots.cpp
│   │   │   │   │   │   │   ├── border_align_pytorch.h
│   │   │   │   │   │   │   ├── box_iou_rotated.cpp
│   │   │   │   │   │   │   ├── box_iou_rotated_parrots.cpp
│   │   │   │   │   │   │   ├── box_iou_rotated_pytorch.h
│   │   │   │   │   │   │   ├── carafe.cpp
│   │   │   │   │   │   │   ├── carafe_naive.cpp
│   │   │   │   │   │   │   ├── carafe_naive_parrots.cpp
│   │   │   │   │   │   │   ├── carafe_naive_pytorch.h
│   │   │   │   │   │   │   ├── carafe_parrots.cpp
│   │   │   │   │   │   │   ├── carafe_pytorch.h
│   │   │   │   │   │   │   ├── contour_expand.cpp
│   │   │   │   │   │   │   ├── contour_expand_parrots.cpp
│   │   │   │   │   │   │   ├── contour_expand_pytorch.h
│   │   │   │   │   │   │   ├── convex_iou.cpp
│   │   │   │   │   │   │   ├── convex_iou_parrots.cpp
│   │   │   │   │   │   │   ├── convex_iou_pytorch.h
│   │   │   │   │   │   │   ├── corner_pool.cpp
│   │   │   │   │   │   │   ├── corner_pool_parrots.cpp
│   │   │   │   │   │   │   ├── corner_pool_pytorch.h
│   │   │   │   │   │   │   ├── correlation.cpp
│   │   │   │   │   │   │   ├── correlation_parrots.cpp
│   │   │   │   │   │   │   ├── correlation_pytorch.h
│   │   │   │   │   │   │   ├── cudabind.cpp
│   │   │   │   │   │   │   ├── deform_conv.cpp
│   │   │   │   │   │   │   ├── deform_conv_parrots.cpp
│   │   │   │   │   │   │   ├── deform_conv_pytorch.h
│   │   │   │   │   │   │   ├── deform_roi_pool.cpp
│   │   │   │   │   │   │   ├── deform_roi_pool_parrots.cpp
│   │   │   │   │   │   │   ├── deform_roi_pool_pytorch.h
│   │   │   │   │   │   │   ├── focal_loss.cpp
│   │   │   │   │   │   │   ├── focal_loss_parrots.cpp
│   │   │   │   │   │   │   ├── focal_loss_pytorch.h
│   │   │   │   │   │   │   ├── furthest_point_sample.cpp
│   │   │   │   │   │   │   ├── furthest_point_sample_parrots.cpp
│   │   │   │   │   │   │   ├── furthest_point_sample_pytorch.h
│   │   │   │   │   │   │   ├── fused_bias_leakyrelu.cpp
│   │   │   │   │   │   │   ├── fused_bias_parrots.cpp
│   │   │   │   │   │   │   ├── gather_points.cpp
│   │   │   │   │   │   │   ├── gather_points_parrots.cpp
│   │   │   │   │   │   │   ├── gather_points_pytorch.h
│   │   │   │   │   │   │   ├── group_points.cpp
│   │   │   │   │   │   │   ├── group_points_parrots.cpp
│   │   │   │   │   │   │   ├── group_points_pytorch.h
│   │   │   │   │   │   │   ├── info.cpp
│   │   │   │   │   │   │   ├── iou3d.cpp
│   │   │   │   │   │   │   ├── iou3d_parrots.cpp
│   │   │   │   │   │   │   ├── iou3d_pytorch.h
│   │   │   │   │   │   │   ├── knn.cpp
│   │   │   │   │   │   │   ├── knn_parrots.cpp
│   │   │   │   │   │   │   ├── knn_pytorch.h
│   │   │   │   │   │   │   ├── masked_conv2d.cpp
│   │   │   │   │   │   │   ├── masked_conv2d_parrots.cpp
│   │   │   │   │   │   │   ├── masked_conv2d_pytorch.h
│   │   │   │   │   │   │   ├── min_area_polygons.cpp
│   │   │   │   │   │   │   ├── min_area_polygons_parrots.cpp
│   │   │   │   │   │   │   ├── min_area_polygons_pytorch.h
│   │   │   │   │   │   │   ├── modulated_deform_conv.cpp
│   │   │   │   │   │   │   ├── modulated_deform_conv_parrots.cpp
│   │   │   │   │   │   │   ├── modulated_deform_conv_pytorch.h
│   │   │   │   │   │   │   ├── ms_deform_attn.cpp
│   │   │   │   │   │   │   ├── ms_deform_attn_parrots.cpp
│   │   │   │   │   │   │   ├── nms.cpp
│   │   │   │   │   │   │   ├── nms_parrots.cpp
│   │   │   │   │   │   │   ├── nms_pytorch.h
│   │   │   │   │   │   │   ├── nms_rotated.cpp
│   │   │   │   │   │   │   ├── pixel_group.cpp
│   │   │   │   │   │   │   ├── pixel_group_parrots.cpp
│   │   │   │   │   │   │   ├── pixel_group_pytorch.h
│   │   │   │   │   │   │   ├── points_in_boxes.cpp
│   │   │   │   │   │   │   ├── points_in_boxes_parrots.cpp
│   │   │   │   │   │   │   ├── points_in_boxes_pytorch.h
│   │   │   │   │   │   │   ├── points_in_polygons.cpp
│   │   │   │   │   │   │   ├── points_in_polygons_parrots.cpp
│   │   │   │   │   │   │   ├── points_in_polygons_pytorch.h
│   │   │   │   │   │   │   ├── psamask.cpp
│   │   │   │   │   │   │   ├── psamask_parrots.cpp
│   │   │   │   │   │   │   ├── psamask_pytorch.h
│   │   │   │   │   │   │   ├── riroi_align_rotated.cpp
│   │   │   │   │   │   │   ├── riroi_align_rotated_parrots.cpp
│   │   │   │   │   │   │   ├── riroi_align_rotated_pytorch.h
│   │   │   │   │   │   │   ├── roi_align.cpp
│   │   │   │   │   │   │   ├── roi_align_parrots.cpp
│   │   │   │   │   │   │   ├── roi_align_pytorch.h
│   │   │   │   │   │   │   ├── roi_align_rotated.cpp
│   │   │   │   │   │   │   ├── roi_align_rotated_parrots.cpp
│   │   │   │   │   │   │   ├── roi_align_rotated_pytorch.h
│   │   │   │   │   │   │   ├── roi_pool.cpp
│   │   │   │   │   │   │   ├── roi_pool_parrots.cpp
│   │   │   │   │   │   │   ├── roi_pool_pytorch.h
│   │   │   │   │   │   │   ├── roiaware_pool3d.cpp
│   │   │   │   │   │   │   ├── roiaware_pool3d_parrots.cpp
│   │   │   │   │   │   │   ├── roiaware_pool3d_pytorch.h
│   │   │   │   │   │   │   ├── roipoint_pool3d.cpp
│   │   │   │   │   │   │   ├── roipoint_pool3d_parrots.cpp
│   │   │   │   │   │   │   ├── roipoint_pool3d_pytorch.h
│   │   │   │   │   │   │   ├── rotated_feature_align.cpp
│   │   │   │   │   │   │   ├── rotated_feature_align_parrots.cpp
│   │   │   │   │   │   │   ├── rotated_feature_align_pytorch.h
│   │   │   │   │   │   │   ├── sync_bn.cpp
│   │   │   │   │   │   │   ├── sync_bn_parrots.cpp
│   │   │   │   │   │   │   ├── sync_bn_pytorch.h
│   │   │   │   │   │   │   ├── three_interpolate.cpp
│   │   │   │   │   │   │   ├── three_interpolate_parrots.cpp
│   │   │   │   │   │   │   ├── three_interpolate_pytorch.h
│   │   │   │   │   │   │   ├── three_nn.cpp
│   │   │   │   │   │   │   ├── three_nn_parrots.cpp
│   │   │   │   │   │   │   ├── three_nn_pytorch.h
│   │   │   │   │   │   │   ├── tin_shift.cpp
│   │   │   │   │   │   │   ├── tin_shift_parrots.cpp
│   │   │   │   │   │   │   ├── tin_shift_pytorch.h
│   │   │   │   │   │   │   ├── upfirdn2d.cpp
│   │   │   │   │   │   │   ├── upfirdn2d_parrots.cpp
│   │   │   │   │   │   │   ├── voxelization.cpp
│   │   │   │   │   │   │   ├── voxelization_parrots.cpp
│   │   │   │   │   │   │   └── voxelization_pytorch.h
│   │   │   │   │   │   ├── pytorch/
│   │   │   │   │   │   │   ├── active_rotated_filter.cpp
│   │   │   │   │   │   │   ├── assign_score_withk.cpp
│   │   │   │   │   │   │   ├── ball_query.cpp
│   │   │   │   │   │   │   ├── bbox_overlaps.cpp
│   │   │   │   │   │   │   ├── border_align.cpp
│   │   │   │   │   │   │   ├── box_iou_rotated.cpp
│   │   │   │   │   │   │   ├── carafe.cpp
│   │   │   │   │   │   │   ├── carafe_naive.cpp
│   │   │   │   │   │   │   ├── contour_expand.cpp
│   │   │   │   │   │   │   ├── convex_iou.cpp
│   │   │   │   │   │   │   ├── corner_pool.cpp
│   │   │   │   │   │   │   ├── correlation.cpp
│   │   │   │   │   │   │   ├── cpu/
│   │   │   │   │   │   │   │   ├── active_rotated_filter.cpp
│   │   │   │   │   │   │   │   ├── box_iou_rotated.cpp
│   │   │   │   │   │   │   │   ├── deform_conv.cpp
│   │   │   │   │   │   │   │   ├── modulated_deform_conv.cpp
│   │   │   │   │   │   │   │   ├── nms.cpp
│   │   │   │   │   │   │   │   ├── nms_rotated.cpp
│   │   │   │   │   │   │   │   ├── pixel_group.cpp
│   │   │   │   │   │   │   │   ├── points_in_boxes.cpp
│   │   │   │   │   │   │   │   ├── psamask.cpp
│   │   │   │   │   │   │   │   ├── roi_align.cpp
│   │   │   │   │   │   │   │   ├── roi_align_rotated.cpp
│   │   │   │   │   │   │   │   └── voxelization.cpp
│   │   │   │   │   │   │   ├── cuda/
│   │   │   │   │   │   │   │   ├── active_rotated_filter_cuda.cu
│   │   │   │   │   │   │   │   ├── assign_score_withk_cuda.cu
│   │   │   │   │   │   │   │   ├── ball_query_cuda.cu
│   │   │   │   │   │   │   │   ├── bbox_overlaps_cuda.cu
│   │   │   │   │   │   │   │   ├── border_align_cuda.cu
│   │   │   │   │   │   │   │   ├── box_iou_rotated_cuda.cu
│   │   │   │   │   │   │   │   ├── carafe_cuda.cu
│   │   │   │   │   │   │   │   ├── carafe_naive_cuda.cu
│   │   │   │   │   │   │   │   ├── convex_iou.cu
│   │   │   │   │   │   │   │   ├── correlation_cuda.cu
│   │   │   │   │   │   │   │   ├── cudabind.cpp
│   │   │   │   │   │   │   │   ├── deform_conv_cuda.cu
│   │   │   │   │   │   │   │   ├── deform_roi_pool_cuda.cu
│   │   │   │   │   │   │   │   ├── focal_loss_cuda.cu
│   │   │   │   │   │   │   │   ├── furthest_point_sample_cuda.cu
│   │   │   │   │   │   │   │   ├── fused_bias_leakyrelu_cuda.cu
│   │   │   │   │   │   │   │   ├── gather_points_cuda.cu
│   │   │   │   │   │   │   │   ├── group_points_cuda.cu
│   │   │   │   │   │   │   │   ├── iou3d_cuda.cu
│   │   │   │   │   │   │   │   ├── knn_cuda.cu
│   │   │   │   │   │   │   │   ├── masked_conv2d_cuda.cu
│   │   │   │   │   │   │   │   ├── min_area_polygons.cu
│   │   │   │   │   │   │   │   ├── modulated_deform_conv_cuda.cu
│   │   │   │   │   │   │   │   ├── ms_deform_attn_cuda.cu
│   │   │   │   │   │   │   │   ├── nms_cuda.cu
│   │   │   │   │   │   │   │   ├── nms_rotated_cuda.cu
│   │   │   │   │   │   │   │   ├── points_in_boxes_cuda.cu
│   │   │   │   │   │   │   │   ├── points_in_polygons_cuda.cu
│   │   │   │   │   │   │   │   ├── psamask_cuda.cu
│   │   │   │   │   │   │   │   ├── riroi_align_rotated_cuda.cu
│   │   │   │   │   │   │   │   ├── roi_align_cuda.cu
│   │   │   │   │   │   │   │   ├── roi_align_rotated_cuda.cu
│   │   │   │   │   │   │   │   ├── roi_pool_cuda.cu
│   │   │   │   │   │   │   │   ├── roiaware_pool3d_cuda.cu
│   │   │   │   │   │   │   │   ├── roipoint_pool3d_cuda.cu
│   │   │   │   │   │   │   │   ├── rotated_feature_align_cuda.cu
│   │   │   │   │   │   │   │   ├── scatter_points_cuda.cu
│   │   │   │   │   │   │   │   ├── sync_bn_cuda.cu
│   │   │   │   │   │   │   │   ├── three_interpolate_cuda.cu
│   │   │   │   │   │   │   │   ├── three_nn_cuda.cu
│   │   │   │   │   │   │   │   ├── tin_shift_cuda.cu
│   │   │   │   │   │   │   │   ├── upfirdn2d_kernel.cu
│   │   │   │   │   │   │   │   └── voxelization_cuda.cu
│   │   │   │   │   │   │   ├── deform_conv.cpp
│   │   │   │   │   │   │   ├── deform_roi_pool.cpp
│   │   │   │   │   │   │   ├── focal_loss.cpp
│   │   │   │   │   │   │   ├── furthest_point_sample.cpp
│   │   │   │   │   │   │   ├── fused_bias_leakyrelu.cpp
│   │   │   │   │   │   │   ├── gather_points.cpp
│   │   │   │   │   │   │   ├── group_points.cpp
│   │   │   │   │   │   │   ├── info.cpp
│   │   │   │   │   │   │   ├── iou3d.cpp
│   │   │   │   │   │   │   ├── knn.cpp
│   │   │   │   │   │   │   ├── masked_conv2d.cpp
│   │   │   │   │   │   │   ├── min_area_polygons.cpp
│   │   │   │   │   │   │   ├── modulated_deform_conv.cpp
│   │   │   │   │   │   │   ├── ms_deform_attn.cpp
│   │   │   │   │   │   │   ├── nms.cpp
│   │   │   │   │   │   │   ├── nms_rotated.cpp
│   │   │   │   │   │   │   ├── pixel_group.cpp
│   │   │   │   │   │   │   ├── points_in_boxes.cpp
│   │   │   │   │   │   │   ├── points_in_polygons.cpp
│   │   │   │   │   │   │   ├── psamask.cpp
│   │   │   │   │   │   │   ├── pybind.cpp
│   │   │   │   │   │   │   ├── riroi_align_rotated.cpp
│   │   │   │   │   │   │   ├── roi_align.cpp
│   │   │   │   │   │   │   ├── roi_align_rotated.cpp
│   │   │   │   │   │   │   ├── roi_pool.cpp
│   │   │   │   │   │   │   ├── roiaware_pool3d.cpp
│   │   │   │   │   │   │   ├── roipoint_pool3d.cpp
│   │   │   │   │   │   │   ├── rotated_feature_align.cpp
│   │   │   │   │   │   │   ├── scatter_points.cpp
│   │   │   │   │   │   │   ├── sync_bn.cpp
│   │   │   │   │   │   │   ├── three_interpolate.cpp
│   │   │   │   │   │   │   ├── three_nn.cpp
│   │   │   │   │   │   │   ├── tin_shift.cpp
│   │   │   │   │   │   │   ├── upfirdn2d.cpp
│   │   │   │   │   │   │   └── voxelization.cpp
│   │   │   │   │   │   └── tensorrt/
│   │   │   │   │   │       ├── plugins/
│   │   │   │   │   │       │   ├── trt_corner_pool.cpp
│   │   │   │   │   │       │   ├── trt_corner_pool_kernel.cu
│   │   │   │   │   │       │   ├── trt_cuda_helper.cu
│   │   │   │   │   │       │   ├── trt_cummaxmin.cpp
│   │   │   │   │   │       │   ├── trt_cummaxmin_kernel.cu
│   │   │   │   │   │       │   ├── trt_deform_conv.cpp
│   │   │   │   │   │       │   ├── trt_deform_conv_kernel.cu
│   │   │   │   │   │       │   ├── trt_grid_sampler.cpp
│   │   │   │   │   │       │   ├── trt_grid_sampler_kernel.cu
│   │   │   │   │   │       │   ├── trt_instance_norm.cpp
│   │   │   │   │   │       │   ├── trt_modulated_deform_conv.cpp
│   │   │   │   │   │       │   ├── trt_modulated_deform_conv_kernel.cu
│   │   │   │   │   │       │   ├── trt_nms.cpp
│   │   │   │   │   │       │   ├── trt_nms_kernel.cu
│   │   │   │   │   │       │   ├── trt_plugin.cpp
│   │   │   │   │   │       │   ├── trt_roi_align.cpp
│   │   │   │   │   │       │   ├── trt_roi_align_kernel.cu
│   │   │   │   │   │       │   ├── trt_scatternd.cpp
│   │   │   │   │   │       │   └── trt_scatternd_kernel.cu
│   │   │   │   │   │       ├── trt_corner_pool.hpp
│   │   │   │   │   │       ├── trt_cuda_helper.cuh
│   │   │   │   │   │       ├── trt_cummaxmin.hpp
│   │   │   │   │   │       ├── trt_deform_conv.hpp
│   │   │   │   │   │       ├── trt_grid_sampler.hpp
│   │   │   │   │   │       ├── trt_instance_norm.hpp
│   │   │   │   │   │       ├── trt_modulated_deform_conv.hpp
│   │   │   │   │   │       ├── trt_nms.hpp
│   │   │   │   │   │       ├── trt_plugin.hpp
│   │   │   │   │   │       ├── trt_plugin_helper.hpp
│   │   │   │   │   │       ├── trt_roi_align.hpp
│   │   │   │   │   │       ├── trt_scatternd.hpp
│   │   │   │   │   │       └── trt_serialize.hpp
│   │   │   │   │   ├── deform_conv.py
│   │   │   │   │   ├── deform_roi_pool.py
│   │   │   │   │   ├── deprecated_wrappers.py
│   │   │   │   │   ├── focal_loss.py
│   │   │   │   │   ├── furthest_point_sample.py
│   │   │   │   │   ├── fused_bias_leakyrelu.py
│   │   │   │   │   ├── gather_points.py
│   │   │   │   │   ├── group_points.py
│   │   │   │   │   ├── info.py
│   │   │   │   │   ├── iou3d.py
│   │   │   │   │   ├── knn.py
│   │   │   │   │   ├── masked_conv.py
│   │   │   │   │   ├── merge_cells.py
│   │   │   │   │   ├── min_area_polygons.py
│   │   │   │   │   ├── modulated_deform_conv.py
│   │   │   │   │   ├── multi_scale_deform_attn.py
│   │   │   │   │   ├── nms.py
│   │   │   │   │   ├── pixel_group.py
│   │   │   │   │   ├── point_sample.py
│   │   │   │   │   ├── points_in_boxes.py
│   │   │   │   │   ├── points_in_polygons.py
│   │   │   │   │   ├── points_sampler.py
│   │   │   │   │   ├── psa_mask.py
│   │   │   │   │   ├── readme.md
│   │   │   │   │   ├── riroi_align_rotated.py
│   │   │   │   │   ├── roi_align.py
│   │   │   │   │   ├── roi_align_rotated.py
│   │   │   │   │   ├── roi_pool.py
│   │   │   │   │   ├── roiaware_pool3d.py
│   │   │   │   │   ├── roipoint_pool3d.py
│   │   │   │   │   ├── rotated_feature_align.py
│   │   │   │   │   ├── saconv.py
│   │   │   │   │   ├── scatter_points.py
│   │   │   │   │   ├── sync_bn.py
│   │   │   │   │   ├── three_interpolate.py
│   │   │   │   │   ├── three_nn.py
│   │   │   │   │   ├── tin_shift.py
│   │   │   │   │   ├── upfirdn2d.py
│   │   │   │   │   └── voxelize.py
│   │   │   │   ├── parallel/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── _functions.py
│   │   │   │   │   ├── collate.py
│   │   │   │   │   ├── data_container.py
│   │   │   │   │   ├── data_parallel.py
│   │   │   │   │   ├── distributed.py
│   │   │   │   │   ├── distributed_deprecated.py
│   │   │   │   │   ├── registry.py
│   │   │   │   │   ├── scatter_gather.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── readme.md
│   │   │   │   ├── runner/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base_module.py
│   │   │   │   │   ├── base_runner.py
│   │   │   │   │   ├── builder.py
│   │   │   │   │   ├── checkpoint.py
│   │   │   │   │   ├── default_constructor.py
│   │   │   │   │   ├── dist_utils.py
│   │   │   │   │   ├── epoch_based_runner.py
│   │   │   │   │   ├── fp16_utils.py
│   │   │   │   │   ├── hooks/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── checkpoint.py
│   │   │   │   │   │   ├── closure.py
│   │   │   │   │   │   ├── ema.py
│   │   │   │   │   │   ├── evaluation.py
│   │   │   │   │   │   ├── hook.py
│   │   │   │   │   │   ├── iter_timer.py
│   │   │   │   │   │   ├── logger/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── base.py
│   │   │   │   │   │   │   ├── dvclive.py
│   │   │   │   │   │   │   ├── mlflow.py
│   │   │   │   │   │   │   ├── neptune.py
│   │   │   │   │   │   │   ├── pavi.py
│   │   │   │   │   │   │   ├── tensorboard.py
│   │   │   │   │   │   │   ├── text.py
│   │   │   │   │   │   │   └── wandb.py
│   │   │   │   │   │   ├── lr_updater.py
│   │   │   │   │   │   ├── memory.py
│   │   │   │   │   │   ├── momentum_updater.py
│   │   │   │   │   │   ├── nni_hook.py
│   │   │   │   │   │   ├── optimizer.py
│   │   │   │   │   │   ├── profiler.py
│   │   │   │   │   │   ├── sampler_seed.py
│   │   │   │   │   │   └── sync_buffer.py
│   │   │   │   │   ├── iter_based_runner.py
│   │   │   │   │   ├── log_buffer.py
│   │   │   │   │   ├── misc.py
│   │   │   │   │   ├── optimizer/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── builder.py
│   │   │   │   │   │   └── default_constructor.py
│   │   │   │   │   ├── priority.py
│   │   │   │   │   ├── record.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── tensorrt/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── init_plugins.py
│   │   │   │   │   ├── preprocess.py
│   │   │   │   │   └── tensorrt_utils.py
│   │   │   │   ├── utils/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── config.py
│   │   │   │   │   ├── env.py
│   │   │   │   │   ├── ext_loader.py
│   │   │   │   │   ├── hub.py
│   │   │   │   │   ├── logging.py
│   │   │   │   │   ├── misc.py
│   │   │   │   │   ├── parrots_jit.py
│   │   │   │   │   ├── parrots_wrapper.py
│   │   │   │   │   ├── path.py
│   │   │   │   │   ├── progressbar.py
│   │   │   │   │   ├── registry.py
│   │   │   │   │   ├── testing.py
│   │   │   │   │   ├── timer.py
│   │   │   │   │   ├── trace.py
│   │   │   │   │   └── version_utils.py
│   │   │   │   ├── version.py
│   │   │   │   ├── video/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── io.py
│   │   │   │   │   ├── optflow.py
│   │   │   │   │   └── processing.py
│   │   │   │   └── visualization/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── color.py
│   │   │   │       ├── image.py
│   │   │   │       └── optflow.py
│   │   │   ├── readme.md
│   │   │   ├── setup.cfg
│   │   │   ├── setup.py
│   │   │   └── tests/
│   │   │       ├── test_arraymisc.py
│   │   │       ├── test_cnn/
│   │   │       │   ├── test_build_layers.py
│   │   │       │   ├── test_context_block.py
│   │   │       │   ├── test_conv2d_adaptive_padding.py
│   │   │       │   ├── test_conv_module.py
│   │   │       │   ├── test_depthwise_seperable_conv_module.py
│   │   │       │   ├── test_flops_counter.py
│   │   │       │   ├── test_fuse_conv_bn.py
│   │   │       │   ├── test_generalized_attention.py
│   │   │       │   ├── test_hsigmoid.py
│   │   │       │   ├── test_hswish.py
│   │   │       │   ├── test_model_registry.py
│   │   │       │   ├── test_non_local.py
│   │   │       │   ├── test_revert_syncbn.py
│   │   │       │   ├── test_scale.py
│   │   │       │   ├── test_swish.py
│   │   │       │   ├── test_transformer.py
│   │   │       │   ├── test_weight_init.py
│   │   │       │   └── test_wrappers.py
│   │   │       ├── test_fileclient.py
│   │   │       ├── test_fileio.py
│   │   │       ├── test_image/
│   │   │       │   ├── test_colorspace.py
│   │   │       │   ├── test_geometric.py
│   │   │       │   ├── test_image_misc.py
│   │   │       │   ├── test_io.py
│   │   │       │   └── test_photometric.py
│   │   │       ├── test_load_model_zoo.py
│   │   │       ├── test_ops/
│   │   │       │   ├── test_active_rotated_filter.py
│   │   │       │   ├── test_assign_score_withk.py
│   │   │       │   ├── test_ball_query.py
│   │   │       │   ├── test_bbox.py
│   │   │       │   ├── test_bilinear_grid_sample.py
│   │   │       │   ├── test_border_align.py
│   │   │       │   ├── test_box_iou_rotated.py
│   │   │       │   ├── test_carafe.py
│   │   │       │   ├── test_cc_attention.py
│   │   │       │   ├── test_contour_expand.py
│   │   │       │   ├── test_convex_iou.py
│   │   │       │   ├── test_corner_pool.py
│   │   │       │   ├── test_correlation.py
│   │   │       │   ├── test_deform_conv.py
│   │   │       │   ├── test_deform_roi_pool.py
│   │   │       │   ├── test_focal_loss.py
│   │   │       │   ├── test_furthest_point_sample.py
│   │   │       │   ├── test_fused_bias_leakyrelu.py
│   │   │       │   ├── test_gather_points.py
│   │   │       │   ├── test_group_points.py
│   │   │       │   ├── test_info.py
│   │   │       │   ├── test_iou3d.py
│   │   │       │   ├── test_knn.py
│   │   │       │   ├── test_masked_conv2d.py
│   │   │       │   ├── test_merge_cells.py
│   │   │       │   ├── test_min_area_polygons.py
│   │   │       │   ├── test_modulated_deform_conv.py
│   │   │       │   ├── test_ms_deformable_attn.py
│   │   │       │   ├── test_nms.py
│   │   │       │   ├── test_nms_rotated.py
│   │   │       │   ├── test_onnx.py
│   │   │       │   ├── test_pixel_group.py
│   │   │       │   ├── test_points_in_polygons.py
│   │   │       │   ├── test_psa_mask.py
│   │   │       │   ├── test_riroi_align_rotated.py
│   │   │       │   ├── test_roi_align.py
│   │   │       │   ├── test_roi_align_rotated.py
│   │   │       │   ├── test_roi_pool.py
│   │   │       │   ├── test_roiaware_pool3d.py
│   │   │       │   ├── test_roipoint_pool3d.py
│   │   │       │   ├── test_rotated_feature_align.py
│   │   │       │   ├── test_saconv.py
│   │   │       │   ├── test_scatter_points.py
│   │   │       │   ├── test_syncbn.py
│   │   │       │   ├── test_tensorrt.py
│   │   │       │   ├── test_tensorrt_preprocess.py
│   │   │       │   ├── test_three_interpolate.py
│   │   │       │   ├── test_three_nn.py
│   │   │       │   ├── test_tin_shift.py
│   │   │       │   ├── test_upfirdn2d.py
│   │   │       │   └── test_voxelization.py
│   │   │       ├── test_parallel.py
│   │   │       ├── test_runner/
│   │   │       │   ├── test_basemodule.py
│   │   │       │   ├── test_checkpoint.py
│   │   │       │   ├── test_dist_utils.py
│   │   │       │   ├── test_eval_hook.py
│   │   │       │   ├── test_fp16.py
│   │   │       │   ├── test_hooks.py
│   │   │       │   ├── test_optimizer.py
│   │   │       │   ├── test_runner.py
│   │   │       │   └── test_utils.py
│   │   │       ├── test_utils/
│   │   │       │   ├── test_config.py
│   │   │       │   ├── test_env.py
│   │   │       │   ├── test_hub.py
│   │   │       │   ├── test_logging.py
│   │   │       │   ├── test_misc.py
│   │   │       │   ├── test_parrots_jit.py
│   │   │       │   ├── test_path.py
│   │   │       │   ├── test_progressbar.py
│   │   │       │   ├── test_registry.py
│   │   │       │   ├── test_testing.py
│   │   │       │   ├── test_timer.py
│   │   │       │   ├── test_trace.py
│   │   │       │   └── test_version_utils.py
│   │   │       ├── test_video/
│   │   │       │   ├── test_optflow.py
│   │   │       │   ├── test_processing.py
│   │   │       │   └── test_reader.py
│   │   │       └── test_visualization.py
│   │   ├── pansharpening/
│   │   │   ├── common/
│   │   │   │   ├── dataset.py
│   │   │   │   ├── dataset_hp.py
│   │   │   │   ├── evaluate.py
│   │   │   │   └── psdata.py
│   │   │   ├── configs/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hook_configs.py
│   │   │   │   ├── option_bdpn.py
│   │   │   │   ├── option_dicnn.py
│   │   │   │   ├── option_drpnn.py
│   │   │   │   ├── option_fusionnet.py
│   │   │   │   ├── option_msdcnn.py
│   │   │   │   ├── option_pannet.py
│   │   │   │   └── option_pnn.py
│   │   │   ├── evaluation/
│   │   │   │   └── ps_evaluate.py
│   │   │   ├── models/
│   │   │   │   ├── APNN/
│   │   │   │   │   ├── data_qb.py
│   │   │   │   │   ├── data_single_read.py
│   │   │   │   │   ├── data_wv2.py
│   │   │   │   │   ├── data_wv3.py
│   │   │   │   │   ├── data_wv4.py
│   │   │   │   │   ├── evaluate.py
│   │   │   │   │   ├── main_pre_train_trainData_qb.py
│   │   │   │   │   ├── main_pre_train_trainData_wv2.py
│   │   │   │   │   ├── main_pre_train_trainData_wv3.py
│   │   │   │   │   ├── main_pre_train_trainData_wv4.py
│   │   │   │   │   ├── main_test_qb.py
│   │   │   │   │   ├── main_test_wv2.py
│   │   │   │   │   ├── main_test_wv3.py
│   │   │   │   │   ├── main_test_wv4.py
│   │   │   │   │   ├── model_qb.py
│   │   │   │   │   ├── model_wv2.py
│   │   │   │   │   ├── model_wv3.py
│   │   │   │   │   ├── model_wv4.py
│   │   │   │   │   ├── variance_sacling_initializer.py
│   │   │   │   │   └── wald_utilities.py
│   │   │   │   ├── BDPN/
│   │   │   │   │   ├── bdpn_main.py
│   │   │   │   │   ├── loss_utils.py
│   │   │   │   │   ├── main_train_wv3.py
│   │   │   │   │   └── model_bdpn.py
│   │   │   │   ├── DRPNN/
│   │   │   │   │   ├── drpnn_main.py
│   │   │   │   │   └── model_drpnn.py
│   │   │   │   ├── DiCNN/
│   │   │   │   │   ├── dicnn_main.py
│   │   │   │   │   └── model_dicnn.py
│   │   │   │   ├── FusionNet/
│   │   │   │   │   ├── fusionnet_main.py
│   │   │   │   │   ├── model_fusionnet.py
│   │   │   │   │   └── run_fusionnet.py
│   │   │   │   ├── MSDCNN/
│   │   │   │   │   ├── model_msdcnn.py
│   │   │   │   │   └── msdcnn_main.py
│   │   │   │   ├── PNN/
│   │   │   │   │   ├── model_pnn.py
│   │   │   │   │   └── pnn_main.py
│   │   │   │   ├── PanNet/
│   │   │   │   │   ├── model_pannet.py
│   │   │   │   │   └── pannet_main.py
│   │   │   │   └── __init__.py
│   │   │   ├── run_pansharpening.py
│   │   │   └── run_test_pansharpening.py
│   │   ├── pretrained-model/
│   │   │   ├── QB/
│   │   │   │   ├── bdpn.pth
│   │   │   │   ├── dicnn1.pth
│   │   │   │   ├── drpnn.pth
│   │   │   │   ├── fusionnet.pth
│   │   │   │   ├── msdcnn.pth
│   │   │   │   ├── panet.pth
│   │   │   │   ├── pnn.pth
│   │   │   │   └── readme.txt
│   │   │   ├── WV2/
│   │   │   │   ├── bdpn.pth
│   │   │   │   ├── dicnn1.pth
│   │   │   │   ├── drpnn.pth
│   │   │   │   ├── fusionnet.pth
│   │   │   │   ├── msdcnn.pth
│   │   │   │   ├── pannet.pth
│   │   │   │   ├── pnn.pth
│   │   │   │   └── readme.txt
│   │   │   ├── WV3/
│   │   │   │   ├── bdpn.pth
│   │   │   │   ├── dicnn1.pth
│   │   │   │   ├── drpnn.pth
│   │   │   │   ├── fusionnet.pth
│   │   │   │   ├── msdcnn.pth
│   │   │   │   ├── pannet.pth
│   │   │   │   └── pnn.pth
│   │   │   └── WV4/
│   │   │       ├── bdpn.pth
│   │   │       ├── dicnn1.pth
│   │   │       ├── drpnn.pth
│   │   │       ├── fusionnet.pth
│   │   │       ├── msdcnn.pth
│   │   │       ├── pannet.pth
│   │   │       ├── pnn.pth
│   │   │       └── readme.txt
│   │   ├── readme.md
│   │   └── results/
│   │       └── readme.txt
│   ├── readme.md
│   └── setup.py
├── 02-Test-toolbox-for-traditional-and-DL(Matlab)/
│   ├── 1_TestData/
│   │   ├── Datasets Testing/
│   │   │   └── Download link for WV3-NewYork test data.txt
│   │   ├── QB/
│   │   │   └── readme.txt
│   │   ├── WV2/
│   │   │   └── readme.txt
│   │   ├── WV3/
│   │   │   └── readme.txt
│   │   ├── WV4/
│   │   │   └── readme.txt
│   │   └── readme.txt
│   ├── 2_DL_Result/
│   │   ├── QB/
│   │   │   └── readme.txt
│   │   ├── WV2/
│   │   │   └── readme.txt
│   │   ├── WV3/
│   │   │   ├── APNN/
│   │   │   │   └── readme.txt
│   │   │   ├── BDPN/
│   │   │   │   └── readme.txt
│   │   │   ├── DRPNN/
│   │   │   │   └── readme.txt
│   │   │   ├── DiCNN1/
│   │   │   │   └── readme.txt
│   │   │   ├── Download link for the 8 DL methods on WV3 dataset.txt
│   │   │   ├── FusionNet/
│   │   │   │   └── readme.txt
│   │   │   ├── MSDCNN/
│   │   │   │   └── readme.txt
│   │   │   ├── PNN/
│   │   │   │   └── readme.txt
│   │   │   └── PanNet/
│   │   │       └── readme.txt
│   │   ├── WV4/
│   │   │   └── readme.txt
│   │   └── readme.txt
│   ├── 3_EPS/
│   │   ├── QB/
│   │   │   └── readme.txt
│   │   ├── WV2/
│   │   │   └── readme.txt
│   │   ├── WV3/
│   │   │   └── readme.txt
│   │   ├── WV4/
│   │   │   └── readme.txt
│   │   └── readme.txt
│   ├── AWLP/
│   │   └── AWLP.m
│   ├── Avg_RR_Assessment.tex
│   ├── BDSD/
│   │   ├── BDSD.m
│   │   ├── BDSD_PC.m
│   │   └── C_BDSD.m
│   ├── BT-H/
│   │   └── BroveyRegHazeMin.m
│   ├── Demo_Full_Resolution.m
│   ├── Demo_Reduced_Resolution.m
│   ├── FE-HPM/
│   │   ├── FE.m
│   │   └── FE_HPM.m
│   ├── FR_Assessment.tex
│   ├── GLP/
│   │   ├── GS2_GLP.m
│   │   ├── MTF_GLP.m
│   │   ├── MTF_GLP_FS.m
│   │   ├── MTF_GLP_HPM.m
│   │   ├── MTF_GLP_HPM_Haze_min.m
│   │   └── MTF_GLP_HPM_R.m
│   ├── GS/
│   │   ├── GS.m
│   │   ├── GSA.m
│   │   └── GS_Segm.m
│   ├── MF/
│   │   ├── MF_HG_Pansharpen.m
│   │   └── Pyr_Dec.m
│   ├── PRACS/
│   │   └── PRACS.m
│   ├── PWMBF/
│   │   ├── PWMBF.m
│   │   ├── compute_PhiTX.m
│   │   ├── compute_PhiX.m
│   │   ├── readme
│   │   └── rwt/
│   │       ├── AUTHORS
│   │       ├── CMakeLists.txt
│   │       ├── HACKING
│   │       ├── INSTALL
│   │       ├── LICENSE
│   │       ├── bin/
│   │       │   ├── HardTh.m
│   │       │   ├── SoftTh.m
│   │       │   ├── compile.m
│   │       │   ├── daubcqf.m
│   │       │   ├── denoise.m
│   │       │   ├── makesig.m
│   │       │   ├── mdwt.m
│   │       │   ├── mdwt.mexw64
│   │       │   ├── midwt.m
│   │       │   ├── midwt.mexw64
│   │       │   ├── mirdwt.m
│   │       │   ├── mirdwt.mexw64
│   │       │   ├── mrdwt.m
│   │       │   ├── mrdwt.mexw64
│   │       │   └── setopt.m
│   │       ├── dist/
│   │       │   ├── 2.01/
│   │       │   │   ├── INSTALL
│   │       │   │   ├── README
│   │       │   │   ├── RWT-2.01.tar.Z
│   │       │   │   └── doc/
│   │       │   │       └── index.html
│   │       │   └── 2.3/
│   │       │       ├── INSTALL
│   │       │       ├── INSTALL_PRECOMPILED
│   │       │       ├── LICENSE
│   │       │       └── README
│   │       ├── doc/
│   │       │   ├── CMakeLists.txt
│   │       │   └── Doxyfile.in
│   │       ├── lib/
│   │       │   ├── inc/
│   │       │   │   ├── rwt_init.h
│   │       │   │   ├── rwt_platform.h
│   │       │   │   └── rwt_transforms.h
│   │       │   └── src/
│   │       │       ├── CMakeLists.txt
│   │       │       ├── dwt.c
│   │       │       ├── idwt.c
│   │       │       ├── init.c
│   │       │       ├── irdwt.c
│   │       │       ├── platform.c
│   │       │       └── rdwt.c
│   │       ├── mex/
│   │       │   ├── mdwt.c
│   │       │   ├── midwt.c
│   │       │   ├── mirdwt.c
│   │       │   └── mrdwt.c
│   │       ├── python/
│   │       │   ├── CMakeLists.txt
│   │       │   ├── LICENSE.numpy
│   │       │   ├── numpy.i
│   │       │   ├── rwt.i
│   │       │   └── test_rwt.py
│   │       ├── readme
│   │       └── tests/
│   │           ├── matlab_xunit/
│   │           │   ├── Readme.html
│   │           │   ├── architecture/
│   │           │   │   ├── class_diagram_a.vsd
│   │           │   │   ├── class_diagram_b.vsd
│   │           │   │   ├── class_diagram_c.vsd
│   │           │   │   ├── html/
│   │           │   │   │   └── matlab_xunit_architecture.html
│   │           │   │   ├── matlab_xunit_architecture.m
│   │           │   │   └── testSample.m
│   │           │   ├── doc/
│   │           │   │   ├── +abc/
│   │           │   │   │   └── +tests/
│   │           │   │   │       ├── test_that.m
│   │           │   │   │       └── test_this.m
│   │           │   │   ├── +abc_tests/
│   │           │   │   │   ├── test_that.m
│   │           │   │   │   └── test_this.m
│   │           │   │   ├── exException.m
│   │           │   │   ├── exQuickStart.m
│   │           │   │   ├── exRunSpecificTest.m
│   │           │   │   ├── exRunTestsInADirectory.m
│   │           │   │   ├── exRunTestsInPackage.m
│   │           │   │   ├── exSilentRunning.m
│   │           │   │   ├── exSubfunctionTests.m
│   │           │   │   ├── exTestCase.m
│   │           │   │   ├── exTestCaseSearching.m
│   │           │   │   ├── exTestFixtures.m
│   │           │   │   ├── exTolerance.m
│   │           │   │   ├── example_quick_start/
│   │           │   │   │   ├── testFliplrMatrix.m
│   │           │   │   │   └── testFliplrVector.m
│   │           │   │   ├── example_subfunction_tests/
│   │           │   │   │   └── testFliplr.m
│   │           │   │   ├── examples_general/
│   │           │   │   │   ├── TestUsingTestCase.m
│   │           │   │   │   ├── testBadSinTest.m
│   │           │   │   │   ├── testCos.m
│   │           │   │   │   ├── testSetupExample.m
│   │           │   │   │   ├── testSin.m
│   │           │   │   │   └── testWithSetupError.m
│   │           │   │   ├── file_exchange_description.txt
│   │           │   │   ├── html/
│   │           │   │   │   ├── exException.html
│   │           │   │   │   ├── exQuickStart.html
│   │           │   │   │   ├── exRunSpecificTest.html
│   │           │   │   │   ├── exRunTestsInADirectory.html
│   │           │   │   │   ├── exRunTestsInPackage.html
│   │           │   │   │   ├── exSilentRunning.html
│   │           │   │   │   ├── exSubfunctionTests.html
│   │           │   │   │   ├── exTestCase.html
│   │           │   │   │   ├── exTestCaseSearching.html
│   │           │   │   │   ├── exTestFixtures.html
│   │           │   │   │   └── exTolerance.html
│   │           │   │   ├── index.html
│   │           │   │   ├── release-history.html
│   │           │   │   └── xunit_product_page.html
│   │           │   ├── license.txt
│   │           │   ├── obsolete/
│   │           │   │   ├── +mtest/
│   │           │   │   │   └── +utils/
│   │           │   │   │       ├── Contents.m
│   │           │   │   │       ├── compareFloats.m
│   │           │   │   │       ├── containsRegexp.m
│   │           │   │   │       ├── generateDoc.m
│   │           │   │   │       ├── isAlmostEqual.m
│   │           │   │   │       ├── isSetUpString.m
│   │           │   │   │       ├── isTearDownString.m
│   │           │   │   │       ├── isTestCaseSubclass.m
│   │           │   │   │       ├── isTestString.m
│   │           │   │   │       └── parseFloatAssertInputs.m
│   │           │   │   ├── assertAlmostEqual.m
│   │           │   │   ├── mtest.m
│   │           │   │   └── tests/
│   │           │   │       ├── MtestTest.m
│   │           │   │       ├── cwd_test/
│   │           │   │       │   ├── TestCaseSubclass.m
│   │           │   │       │   ├── testFoobar.m
│   │           │   │       │   └── testSubfunctions.m
│   │           │   │       ├── testAssertAlmostEqual.m
│   │           │   │       └── testIsAlmostEqual.m
│   │           │   ├── readme
│   │           │   ├── tests/
│   │           │   │   ├── +xunit/
│   │           │   │   │   └── +mocktests/
│   │           │   │   │       ├── +subpkg/
│   │           │   │   │       │   └── test_a_bit.m
│   │           │   │   │       ├── A.m
│   │           │   │   │       ├── B.m
│   │           │   │   │       ├── FooTest.m
│   │           │   │   │       ├── helper_that.m
│   │           │   │   │       ├── test_that.m
│   │           │   │   │       └── test_this.m
│   │           │   │   ├── Readme.m
│   │           │   │   ├── RuntestsTest.m
│   │           │   │   ├── TestCaseTest.m
│   │           │   │   ├── TestCaseWithAddPathTest.m
│   │           │   │   ├── TestFuncHandleTests.m
│   │           │   │   ├── TestRunLoggerTest.m
│   │           │   │   ├── TestSuiteTest.m
│   │           │   │   ├── ThrowsExceptionTest.m
│   │           │   │   ├── almost_black.tif
│   │           │   │   ├── black.tif
│   │           │   │   ├── cwd_test/
│   │           │   │   │   ├── TestCaseSubclass.m
│   │           │   │   │   ├── testFoobar.m
│   │           │   │   │   └── testSubfunctions.m
│   │           │   │   ├── dir1/
│   │           │   │   │   └── test_thatPasses.m
│   │           │   │   ├── dir2/
│   │           │   │   │   └── test_thatFails.m
│   │           │   │   ├── empty_file
│   │           │   │   ├── helper_classes/
│   │           │   │   │   ├── BadFixture.m
│   │           │   │   │   ├── Contents.m
│   │           │   │   │   ├── ExceptionNotThrownTest.m
│   │           │   │   │   ├── FailingTestCase.m
│   │           │   │   │   ├── LoggingTestCase.m
│   │           │   │   │   ├── NoTestMethods.m
│   │           │   │   │   ├── PassingExceptionTest.m
│   │           │   │   │   ├── TestsToBeDiscovered.m
│   │           │   │   │   ├── TwoPassingTests.m
│   │           │   │   │   ├── WrongExceptionThrownTest.m
│   │           │   │   │   ├── notTestString.m
│   │           │   │   │   ├── testFunctionHandlesA.m
│   │           │   │   │   ├── testFunctionHandlesB.m
│   │           │   │   │   ├── testFunctionHandlesC.m
│   │           │   │   │   ├── testFunctionHandlesD.m
│   │           │   │   │   ├── testFunctionHandlesE.m
│   │           │   │   │   ├── testFunctionHandlesTeardownNoSetup.m
│   │           │   │   │   └── testSimple.m
│   │           │   │   ├── testAssertEqual.m
│   │           │   │   ├── testAssertExceptionThrown.m
│   │           │   │   ├── testAssertFalse.m
│   │           │   │   ├── testAssertTrue.m
│   │           │   │   ├── testContainsRegexp.m
│   │           │   │   ├── testIsSetUpString.m
│   │           │   │   ├── testIsTearDownString.m
│   │           │   │   ├── testIsTestCaseSubclass.m
│   │           │   │   ├── testIsTestString.m
│   │           │   │   ├── testRuntestsWithDirectoryName.m
│   │           │   │   ├── test_TestSuiteInDir.m
│   │           │   │   ├── test_arrayToString.m
│   │           │   │   ├── test_assertElementsAlmostEqual.m
│   │           │   │   ├── test_assertFilesEqual.m
│   │           │   │   ├── test_assertVectorsAlmostEqual.m
│   │           │   │   ├── test_compareFloats.m
│   │           │   │   ├── test_comparisonMessage.m
│   │           │   │   ├── test_packageName.m
│   │           │   │   ├── test_parseFloatAssertInputs.m
│   │           │   │   └── test_stringToCellArray.m
│   │           │   └── xunit/
│   │           │       ├── +xunit/
│   │           │       │   └── +utils/
│   │           │       │       ├── Contents.m
│   │           │       │       ├── arrayToString.m
│   │           │       │       ├── compareFloats.m
│   │           │       │       ├── comparisonMessage.m
│   │           │       │       ├── containsRegexp.m
│   │           │       │       ├── generateDoc.m
│   │           │       │       ├── isAlmostEqual.m
│   │           │       │       ├── isSetUpString.m
│   │           │       │       ├── isTearDownString.m
│   │           │       │       ├── isTestCaseSubclass.m
│   │           │       │       ├── isTestString.m
│   │           │       │       ├── parseFloatAssertInputs.m
│   │           │       │       └── stringToCellArray.m
│   │           │       ├── CommandWindowTestRunDisplay.m
│   │           │       ├── Contents.m
│   │           │       ├── FunctionHandleTestCase.m
│   │           │       ├── TestCase.m
│   │           │       ├── TestCaseInDir.m
│   │           │       ├── TestCaseWithAddPath.m
│   │           │       ├── TestComponent.m
│   │           │       ├── TestComponentInDir.m
│   │           │       ├── TestRunDisplay.m
│   │           │       ├── TestRunLogger.m
│   │           │       ├── TestRunMonitor.m
│   │           │       ├── TestSuite.m
│   │           │       ├── TestSuiteInDir.m
│   │           │       ├── VerboseTestRunDisplay.m
│   │           │       ├── assertElementsAlmostEqual.m
│   │           │       ├── assertEqual.m
│   │           │       ├── assertExceptionThrown.m
│   │           │       ├── assertFalse.m
│   │           │       ├── assertFilesEqual.m
│   │           │       ├── assertTrue.m
│   │           │       ├── assertVectorsAlmostEqual.m
│   │           │       ├── initTestSuite.m
│   │           │       └── runtests.m
│   │           ├── octave/
│   │           │   ├── assertEqual.m
│   │           │   ├── assertVectorsAlmostEqual.m
│   │           │   ├── runtests.m
│   │           │   ├── test_denoise.m
│   │           │   ├── test_makesig.m
│   │           │   ├── test_mdwt.m
│   │           │   ├── test_midwt.m
│   │           │   ├── test_mirdwt.m
│   │           │   ├── test_mrdwt.m
│   │           │   └── test_setopt.m
│   │           ├── readme
│   │           ├── runtests.m
│   │           ├── test_daubcqf.m
│   │           ├── test_denoise.m
│   │           ├── test_makesig.m
│   │           ├── test_mdwt.m
│   │           ├── test_midwt.m
│   │           ├── test_mirdwt.m
│   │           ├── test_mrdwt.m
│   │           └── test_setopt.m
│   ├── Quality_Indices/
│   │   ├── D_lambda.m
│   │   ├── D_lambda_K.m
│   │   ├── D_s.m
│   │   ├── ERGAS.m
│   │   ├── HQNR.m
│   │   ├── Q.m
│   │   ├── QNR.m
│   │   ├── SAM.m
│   │   ├── SCC.m
│   │   ├── img_qi.m
│   │   ├── norm_blocco.m
│   │   ├── onion_mult.m
│   │   ├── onion_mult2D.m
│   │   ├── onions_quality.m
│   │   ├── q2n.m
│   │   └── ssim.m
│   ├── RR/
│   │   ├── RRpansharp.m
│   │   ├── manopt/
│   │   │   ├── CLA.txt
│   │   │   ├── COPYING.txt
│   │   │   ├── CREDITS.txt
│   │   │   ├── LICENSE.txt
│   │   │   ├── README.txt
│   │   │   ├── checkinstall/
│   │   │   │   └── basicexample.m
│   │   │   ├── examples/
│   │   │   │   ├── PCA_stochastic.m
│   │   │   │   ├── dominant_invariant_subspace.m
│   │   │   │   ├── dominant_invariant_subspace_complex.m
│   │   │   │   ├── elliptope_SDP.m
│   │   │   │   ├── elliptope_SDP_complex.m
│   │   │   │   ├── essential_svd.m
│   │   │   │   ├── generalized_eigenvalue_computation.m
│   │   │   │   ├── generalized_procrustes.m
│   │   │   │   ├── low_rank_dist_completion.m
│   │   │   │   ├── low_rank_matrix_completion.m
│   │   │   │   ├── low_rank_tensor_completion.m
│   │   │   │   ├── maxcut.m
│   │   │   │   ├── nonlinear_eigenspace.m
│   │   │   │   ├── packing_on_the_sphere.m
│   │   │   │   ├── positive_definite_karcher_mean.m
│   │   │   │   ├── radio_interferometric_calibration.m
│   │   │   │   ├── robust_pca.m
│   │   │   │   ├── shapefit_smoothed.m
│   │   │   │   ├── sparse_pca.m
│   │   │   │   ├── thomson_problem.m
│   │   │   │   └── truncated_svd.m
│   │   │   ├── importmanopt.m
│   │   │   ├── manopt/
│   │   │   │   ├── core/
│   │   │   │   │   ├── StoreDB.m
│   │   │   │   │   ├── applyStatsfun.m
│   │   │   │   │   ├── canGetApproxGradient.m
│   │   │   │   │   ├── canGetApproxHessian.m
│   │   │   │   │   ├── canGetCost.m
│   │   │   │   │   ├── canGetDirectionalDerivative.m
│   │   │   │   │   ├── canGetEuclideanGradient.m
│   │   │   │   │   ├── canGetGradient.m
│   │   │   │   │   ├── canGetHessian.m
│   │   │   │   │   ├── canGetLinesearch.m
│   │   │   │   │   ├── canGetPartialEuclideanGradient.m
│   │   │   │   │   ├── canGetPartialGradient.m
│   │   │   │   │   ├── canGetPrecon.m
│   │   │   │   │   ├── canGetSqrtPrecon.m
│   │   │   │   │   ├── canGetSubgradient.m
│   │   │   │   │   ├── getApproxGradient.m
│   │   │   │   │   ├── getApproxHessian.m
│   │   │   │   │   ├── getCost.m
│   │   │   │   │   ├── getCostGrad.m
│   │   │   │   │   ├── getDirectionalDerivative.m
│   │   │   │   │   ├── getEuclideanGradient.m
│   │   │   │   │   ├── getGlobalDefaults.m
│   │   │   │   │   ├── getGradient.m
│   │   │   │   │   ├── getGradientFD.m
│   │   │   │   │   ├── getHessian.m
│   │   │   │   │   ├── getHessianFD.m
│   │   │   │   │   ├── getLinesearch.m
│   │   │   │   │   ├── getPartialEuclideanGradient.m
│   │   │   │   │   ├── getPartialGradient.m
│   │   │   │   │   ├── getPrecon.m
│   │   │   │   │   ├── getSqrtPrecon.m
│   │   │   │   │   ├── getStore.m
│   │   │   │   │   ├── getSubgradient.m
│   │   │   │   │   ├── handle_light.m
│   │   │   │   │   ├── mergeOptions.m
│   │   │   │   │   ├── purgeStoredb.m
│   │   │   │   │   ├── setStore.m
│   │   │   │   │   └── stoppingcriterion.m
│   │   │   │   ├── manifolds/
│   │   │   │   │   ├── complexcircle/
│   │   │   │   │   │   ├── complexcirclefactory.m
│   │   │   │   │   │   └── realphasefactory.m
│   │   │   │   │   ├── essential/
│   │   │   │   │   │   ├── README_Essential.txt
│   │   │   │   │   │   ├── essential_costE2cost.m
│   │   │   │   │   │   ├── essential_egradE2egrad.m
│   │   │   │   │   │   ├── essential_ehessE2ehess.m
│   │   │   │   │   │   ├── essential_flat.m
│   │   │   │   │   │   ├── essential_hat3.m
│   │   │   │   │   │   ├── essential_sharp.m
│   │   │   │   │   │   ├── essentialfactory.m
│   │   │   │   │   │   └── privateessential/
│   │   │   │   │   │       ├── essential_closestRepresentative.m
│   │   │   │   │   │       ├── essential_distMinAngle.m
│   │   │   │   │   │       ├── essential_distMinAnglePair.m
│   │   │   │   │   │       ├── essential_distMinAnglePair_base.m
│   │   │   │   │   │       ├── essential_distMinAnglePair_computeDfBreak.m
│   │   │   │   │   │       ├── essential_distMinAnglePair_dfNewton.m
│   │   │   │   │   │       ├── essential_distMinAnglePair_discontinuityDistance.m
│   │   │   │   │   │       ├── essential_distMinAnglePair_ft.m
│   │   │   │   │   │       ├── essential_distMinAnglePair_ftFromQ.m
│   │   │   │   │   │       ├── essential_distMinAnglePair_test.m
│   │   │   │   │   │       └── modAngle.m
│   │   │   │   │   ├── euclidean/
│   │   │   │   │   │   ├── centeredmatrixfactory.m
│   │   │   │   │   │   ├── euclideancomplexfactory.m
│   │   │   │   │   │   ├── euclideanfactory.m
│   │   │   │   │   │   ├── shapefitfactory.m
│   │   │   │   │   │   ├── skewsymmetricfactory.m
│   │   │   │   │   │   └── symmetricfactory.m
│   │   │   │   │   ├── fixedrank/
│   │   │   │   │   │   ├── fixedrankMNquotientfactory.m
│   │   │   │   │   │   ├── fixedrankembeddedfactory.m
│   │   │   │   │   │   ├── fixedrankfactory_2factors.m
│   │   │   │   │   │   ├── fixedrankfactory_2factors_preconditioned.m
│   │   │   │   │   │   ├── fixedrankfactory_2factors_subspace_projection.m
│   │   │   │   │   │   ├── fixedrankfactory_3factors.m
│   │   │   │   │   │   └── fixedrankfactory_3factors_preconditioned.m
│   │   │   │   │   ├── fixedranktensors/
│   │   │   │   │   │   ├── fixedrankfactory_tucker_preconditioned.m
│   │   │   │   │   │   └── tucker2multiarray.m
│   │   │   │   │   ├── grassmann/
│   │   │   │   │   │   ├── grassmanncomplexfactory.m
│   │   │   │   │   │   ├── grassmannfactory.m
│   │   │   │   │   │   └── grassmanngeneralizedfactory.m
│   │   │   │   │   ├── multinomial/
│   │   │   │   │   │   └── multinomialfactory.m
│   │   │   │   │   ├── oblique/
│   │   │   │   │   │   ├── obliquecomplexfactory.m
│   │   │   │   │   │   └── obliquefactory.m
│   │   │   │   │   ├── rotations/
│   │   │   │   │   │   ├── randrot.m
│   │   │   │   │   │   ├── randskew.m
│   │   │   │   │   │   └── rotationsfactory.m
│   │   │   │   │   ├── specialeuclidean/
│   │   │   │   │   │   └── specialeuclideanfactory.m
│   │   │   │   │   ├── sphere/
│   │   │   │   │   │   ├── spherecomplexfactory.m
│   │   │   │   │   │   ├── spherefactory.m
│   │   │   │   │   │   └── spheresymmetricfactory.m
│   │   │   │   │   ├── stiefel/
│   │   │   │   │   │   ├── stiefelcomplexfactory.m
│   │   │   │   │   │   ├── stiefelfactory.m
│   │   │   │   │   │   ├── stiefelgeneralizedfactory.m
│   │   │   │   │   │   └── stiefelstackedfactory.m
│   │   │   │   │   └── symfixedrank/
│   │   │   │   │       ├── elliptopefactory.m
│   │   │   │   │       ├── spectrahedronfactory.m
│   │   │   │   │       ├── symfixedrankYYcomplexfactory.m
│   │   │   │   │       ├── symfixedrankYYfactory.m
│   │   │   │   │       └── sympositivedefinitefactory.m
│   │   │   │   ├── readme
│   │   │   │   ├── solvers/
│   │   │   │   │   ├── barzilaiborwein/
│   │   │   │   │   │   └── barzilaiborwein.m
│   │   │   │   │   ├── bfgs/
│   │   │   │   │   │   └── rlbfgs.m
│   │   │   │   │   ├── conjugategradient/
│   │   │   │   │   │   ├── conjugategradient.m
│   │   │   │   │   │   └── linear_conjugategradient.m
│   │   │   │   │   ├── gradientapproximations/
│   │   │   │   │   │   └── approxgradientFD.m
│   │   │   │   │   ├── hessianapproximations/
│   │   │   │   │   │   └── approxhessianFD.m
│   │   │   │   │   ├── linesearch/
│   │   │   │   │   │   ├── linesearch.m
│   │   │   │   │   │   ├── linesearch_adaptive.m
│   │   │   │   │   │   ├── linesearch_decrease.m
│   │   │   │   │   │   └── linesearch_hint.m
│   │   │   │   │   ├── neldermead/
│   │   │   │   │   │   ├── centroid.m
│   │   │   │   │   │   └── neldermead.m
│   │   │   │   │   ├── preconditioners/
│   │   │   │   │   │   └── preconhessiansolve.m
│   │   │   │   │   ├── pso/
│   │   │   │   │   │   └── pso.m
│   │   │   │   │   ├── steepestdescent/
│   │   │   │   │   │   └── steepestdescent.m
│   │   │   │   │   ├── stochasticgradient/
│   │   │   │   │   │   ├── stepsize_sg.m
│   │   │   │   │   │   └── stochasticgradient.m
│   │   │   │   │   └── trustregions/
│   │   │   │   │       ├── license for original GenRTR code.txt
│   │   │   │   │       ├── tCG.m
│   │   │   │   │       └── trustregions.m
│   │   │   │   └── tools/
│   │   │   │       ├── checkdiff.m
│   │   │   │       ├── checkgradient.m
│   │   │   │       ├── checkhessian.m
│   │   │   │       ├── checkretraction.m
│   │   │   │       ├── criticalpointfinder.m
│   │   │   │       ├── dexpm.m
│   │   │   │       ├── dfunm.m
│   │   │   │       ├── diagsum.m
│   │   │   │       ├── dlogm.m
│   │   │   │       ├── dsqrtm.m
│   │   │   │       ├── grammatrix.m
│   │   │   │       ├── hashmd5.m
│   │   │   │       ├── hessianextreme.m
│   │   │   │       ├── hessianmatrix.m
│   │   │   │       ├── hessianspectrum.m
│   │   │   │       ├── identify_linear_piece.m
│   │   │   │       ├── lincomb.m
│   │   │   │       ├── manoptsolve.m
│   │   │   │       ├── matrixlincomb.m
│   │   │   │       ├── multihconj.m
│   │   │   │       ├── multiherm.m
│   │   │   │       ├── multiprod.m
│   │   │   │       ├── multiprodmultitransp_license.txt
│   │   │   │       ├── multiscale.m
│   │   │   │       ├── multiskew.m
│   │   │   │       ├── multisqnorm.m
│   │   │   │       ├── multisym.m
│   │   │   │       ├── multitrace.m
│   │   │   │       ├── multitransp.m
│   │   │   │       ├── orthogonalize.m
│   │   │   │       ├── plotprofile.m
│   │   │   │       ├── powermanifold.m
│   │   │   │       ├── productmanifold.m
│   │   │   │       ├── smallestinconvexhull.m
│   │   │   │       ├── statsfunhelper.m
│   │   │   │       ├── surfprofile.m
│   │   │   │       ├── tangent2vec.m
│   │   │   │       ├── tangentorthobasis.m
│   │   │   │       ├── tangentspacefactory.m
│   │   │   │       └── tangentspherefactory.m
│   │   │   ├── manopt_version.m
│   │   │   └── readme
│   │   └── readme
│   ├── RR_Assessment.tex
│   ├── SR-D/
│   │   ├── CS.m
│   │   ├── Dict_Learn.m
│   │   ├── OMP.m
│   │   └── OMP_Rec_Detile.m
│   ├── TV/
│   │   └── TV_pansharpen.m
│   ├── Tools/
│   │   ├── LPfilter.m
│   │   ├── LPfilterGauss.m
│   │   ├── LPfilterPlusDec.m
│   │   ├── MTF.m
│   │   ├── MTF_PAN.m
│   │   ├── estimation_alpha.m
│   │   ├── genMTF.m
│   │   ├── gen_LP_image.m
│   │   ├── indexes_evaluation.m
│   │   ├── indexes_evaluation_FS.m
│   │   ├── indwt2_working.m
│   │   ├── interp23tap.m
│   │   ├── k_means_clustering.m
│   │   ├── matrix2latex.m
│   │   ├── ndwt2_working.m
│   │   ├── printAllImagesImWriteFR.m
│   │   ├── printAllImagesImWriteRR.m
│   │   ├── printImage.m
│   │   ├── rectangleonimage.m
│   │   ├── resize_images.m
│   │   ├── showImage4.m
│   │   ├── showImage4LR.m
│   │   ├── showImage4LR_zoomin.m
│   │   ├── showImage4_zoomin.m
│   │   ├── showImage8.m
│   │   ├── showImage8LR.m
│   │   ├── showImage8LR_zoomin.m
│   │   ├── showImage8_zoomin.m
│   │   ├── showImagesAll.m
│   │   ├── showImagesAllOld.m
│   │   ├── showPan.m
│   │   ├── showPan_zoomin.m
│   │   ├── tight_subplot.m
│   │   ├── viewimage.m
│   │   └── viewimage2.m
│   └── readme.md
├── 03-Data-Simulation(Matlab)/
│   ├── 01-DataSimu/
│   │   └── QB/
│   │       └── readme.md.txt
│   ├── Demo_DataSimu_qb.m
│   ├── imgs/
│   │   └── readme
│   └── segImg_new.m
├── LICENSE
├── README.md
└── docs/
    ├── en/
    │   ├── DLPanToolbox/
    │   │   ├── Evaluation.md
    │   │   ├── Example.md
    │   │   ├── PreProcess.md
    │   │   └── Simulation.md
    │   ├── Makefile
    │   ├── _static/
    │   │   └── css/
    │   │       └── readthedocs.css
    │   ├── _templates/
    │   │   └── classtemplate.rst
    │   ├── citation.md
    │   ├── conf.py
    │   ├── docutils.conf
    │   ├── faq.md
    │   ├── get_started/
    │   │   ├── Installation.md
    │   │   └── Introduction.md
    │   ├── index.rst
    │   ├── make.bat
    │   └── switch_language.md
    ├── requirements.txt
    ├── run.sh
    └── zh-cn/
        ├── DLPanToolbox/
        │   ├── Evaluation.md
        │   ├── Example.md
        │   ├── PreProcess.md
        │   └── Simulation.md
        ├── Makefile
        ├── _static/
        │   └── css/
        │       └── readthedocs.css
        ├── _templates/
        │   └── classtemplate.rst
        ├── citation.md
        ├── conf.py
        ├── docutils.conf
        ├── faq.md
        ├── get_started/
        │   ├── Installation.md
        │   └── Introduction.md
        ├── index.rst
        ├── make.bat
        ├── related.md
        └── switch_language.md

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*.log
*.pyc
*.xml
*.json
*.mat
*.eps
*.cpython-37.pyc
/DLPan-Toolbox/01-DL-toolbox(Pytorch)/results/*
/bak/*


================================================
FILE: 01-DL-toolbox(Pytorch)/LICENSE
================================================
GNU GENERAL PUBLIC LICENSE
                       Version 2, June 1991

 Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The licenses for most software are designed to take away your
freedom to share and change it.  By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users.  This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it.  (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.)  You can apply it to
your programs, too.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.

  To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.

  For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have.  You must make sure that they, too, receive or can get the
source code.  And you must show them these terms so they know their
rights.

  We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.

  Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software.  If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.

  Finally, any free program is threatened constantly by software
patents.  We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary.  To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.

  The precise terms and conditions for copying, distribution and
modification follow.

                    GNU GENERAL PUBLIC LICENSE
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION

  0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License.  The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language.  (Hereinafter, translation is included without limitation in
the term "modification".)  Each licensee is addressed as "you".

Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope.  The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.

  1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.

You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.

  2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:

    a) You must cause the modified files to carry prominent notices
    stating that you changed the files and the date of any change.

    b) You must cause any work that you distribute or publish, that in
    whole or in part contains or is derived from the Program or any
    part thereof, to be licensed as a whole at no charge to all third
    parties under the terms of this License.

    c) If the modified program normally reads commands interactively
    when run, you must cause it, when started running for such
    interactive use in the most ordinary way, to print or display an
    announcement including an appropriate copyright notice and a
    notice that there is no warranty (or else, saying that you provide
    a warranty) and that users may redistribute the program under
    these conditions, and telling the user how to view a copy of this
    License.  (Exception: if the Program itself is interactive but
    does not normally print such an announcement, your work based on
    the Program is not required to print an announcement.)

These requirements apply to the modified work as a whole.  If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works.  But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.

Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.

In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.

  3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:

    a) Accompany it with the complete corresponding machine-readable
    source code, which must be distributed under the terms of Sections
    1 and 2 above on a medium customarily used for software interchange; or,

    b) Accompany it with a written offer, valid for at least three
    years, to give any third party, for a charge no more than your
    cost of physically performing source distribution, a complete
    machine-readable copy of the corresponding source code, to be
    distributed under the terms of Sections 1 and 2 above on a medium
    customarily used for software interchange; or,

    c) Accompany it with the information you received as to the offer
    to distribute corresponding source code.  (This alternative is
    allowed only for noncommercial distribution and only if you
    received the program in object code or executable form with such
    an offer, in accord with Subsection b above.)

The source code for a work means the preferred form of the work for
making modifications to it.  For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable.  However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.

If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.

  4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License.  Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.

  5. You are not required to accept this License, since you have not
signed it.  However, nothing else grants you permission to modify or
distribute the Program or its derivative works.  These actions are
prohibited by law if you do not accept this License.  Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.

  6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions.  You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.

  7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all.  For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.

If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.

It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices.  Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.

This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.

  8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded.  In such case, this License incorporates
the limitation as if written in the body of this License.

  9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time.  Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

Each version is given a distinguishing version number.  If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation.  If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.

  10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission.  For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this.  Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.

                            NO WARRANTY

  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.

  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

Also add information on how to contact you by electronic and paper mail.

If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:

    Gnomovision version 69, Copyright (C) year name of author
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.

The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License.  Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.

You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary.  Here is a sample; alter the names:

  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
  `Gnomovision' (which makes passes at compilers) written by James Hacker.

  <signature of Ty Coon>, 1 April 1989
  Ty Coon, President of Vice

This General Public License does not permit incorporating your program into
proprietary programs.  If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library.  If this is what you want to do, use the GNU Lesser General
Public License instead of this License.


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/AutoDL/__init__.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
from UDL.Basis.python_sub_class import PanSharpeningModel, TaskDispatcher, ModelDispatcher
import UDL.Basis.option

def build_model(arch, task, cfg=None):

    if task == "pansharpening":
        from UDL.pansharpening.models import PanSharpeningModel as MODELS

        return MODELS.build_model(cfg)
    else:
        raise NotImplementedError(f"It's not supported in {task}")


def getDataSession(cfg):

    task = cfg.task

    if task in ["pansharpening"]:
        from UDL.pansharpening.common.psdata import PansharpeningSession as DataSession
    else:
        raise NotImplementedError

    return DataSession(cfg)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/AutoDL/trainer.py
================================================
# GPL License
# Copyright (C) UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import argparse
import copy
import os
import os.path as osp
import warnings
import random
import numpy as np
import torch
import torch.distributed as dist
import time

tic = time.time()
# 1.14s

import sys
sys.path.append('../..')

sys.path.append('../mmcv')

from UDL.AutoDL import build_model, getDataSession, ModelDispatcher
from UDL.Basis.auxiliary import init_random_seed, set_random_seed
from mmcv.utils.logging import print_log, create_logger
# 1.5s
from mmcv.runner import init_dist, find_latest_checkpoint
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner,
                         Fp16OptimizerHook, OptimizerHook, build_optimizer,
                         build_runner, get_dist_info)


# 10s
# from mmdet.datasets import (build_dataloader, build_dataset,
#                             replace_ImageToTensor)

def trainer(cfg, logger,
            distributed=False,
            meta=None):

    model, criterion, optimizer, scheduler = build_model(cfg.arch, cfg.task, cfg)


    if hasattr(model, 'init_weights'):
        model.init_weights()


    sess = getDataSession(cfg)

    if cfg.eval:
        cfg.workflow = [('val', 1)]
    if not any('train' in mode for mode, _ in cfg.workflow):
        cfg.eval = True

    # put model on gpus
    if distributed:
        find_unused_parameters = cfg.get('find_unused_parameters', False)
        # Sets the `find_unused_parameters` parameter in
        # torch.nn.parallel.DistributedDataParallel
        model = MMDistributedDataParallel(
            model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False,
            find_unused_parameters=find_unused_parameters)
    else:
        if not hasattr(model, 'train'):
            if isinstance(model.model, dict):
                for name, m in model.model.items():
                    model.model[name] = MMDataParallel(m, device_ids=cfg.gpu_ids)
            else:
                model.model = MMDataParallel(model.model, device_ids=cfg.gpu_ids)
        else:
            model = MMDataParallel(model, device_ids=cfg.gpu_ids)

    if cfg.get('optimizer', None) is not None:
        optimizer = build_optimizer(model, cfg.optimizer)

    if 'runner' not in cfg:
        cfg.runner = {
            'type': 'EpochBasedRunner',
            'max_epochs': cfg.epochs  # argparser
        }
        warnings.warn(
            'config is now expected to have a `runner` section, '
            'please set `runner` in your config.', UserWarning)
    else:
        if 'epochs' in cfg and 'max_iters' not in cfg.runner:
            cfg.runner['max_epochs'] = cfg.epochs
            # assert cfg.epochs == cfg.runner['max_epochs'], print(cfg.epochs, cfg.runner['max_epochs'])

    runner = build_runner(
        cfg.runner,
        default_args=dict(
            model=model,
            optimizer=optimizer,
            work_dir=cfg.work_dir,
            logger=logger,
            meta=meta,
            opt_cfg={'print_freq': cfg.print_freq,
                     'accumulated_step': cfg.accumulated_step,
                     'clip_max_norm': cfg.clip_max_norm,
                     'dataset': cfg.dataset,
                     'img_range': cfg.img_range,
                     'metrics': cfg.metrics,
                     'save_fmt': cfg.save_fmt,
                     'mode': cfg.mode,
                     'eval': cfg.eval,
                     'save_dir': cfg.work_dir + "/results"}))

    # an ugly workaround to make .log and .log.json filenames the same
    # runner.timestamp = timestamp

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(
            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
    elif distributed and 'type' not in cfg.optimizer_config:
        optimizer_config = OptimizerHook(**cfg.optimizer_config)
    else:
        optimizer_config = cfg.get('optimizer_config', None)

    ############################################################
    # register training hooks
    ############################################################
    if cfg.get('config', None) is not None:
        '''
        optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
        optimizer_config = dict(grad_clip=None)
        lr_config = dict(policy='step', step=[100, 150])
        checkpoint_config = dict(interval=1)
        log_config = dict(
            interval=100,
            hooks=[
                dict(type='TextLoggerHook'),
                # dict(type='TensorboardLoggerHook')
            ])
        '''
        runner.register_training_hooks(
            cfg.lr_config,
            optimizer_config,
            cfg.checkpoint_config,
            cfg.log_config,
            cfg.get('momentum_config', None),
            custom_hooks_config=cfg.get('custom_hooks', None))

    elif cfg.get('log_config', None) is None and len(cfg.workflow) and cfg.workflow[0][0] != 'simple_train':
        if cfg.mode == 'nni':
            runner.register_custom_hooks({'type': 'NNIHook', 'priority': 'very_low'})
        if scheduler is not None:
            runner.register_lr_hook(dict(policy=scheduler.__class__.__name__[:-2], step=scheduler.step_size))
        runner.register_checkpoint_hook(
            dict(type='ModelCheckpoint', indicator='loss', save_top_k=cfg.save_top_k, print_freq=cfg.save_print_freq))
        runner.register_optimizer_hook(dict(grad_clip=10))  # ExternOptimizer
        runner.register_timer_hook(dict(type='IterTimerHook'))
        log_config = [dict(type='TextLoggerHook')]
        if cfg.use_tfb:
            log_config.append(dict(type='TensorboardLoggerHook'))
        runner.register_logger_hooks(dict(
            interval=cfg.print_freq,
            hooks=log_config))

    else:
        runner.register_checkpoint_hook(dict(type='ModelCheckpoint', indicator='loss'))

    if distributed:
        if isinstance(runner, EpochBasedRunner):
            runner.register_hook(DistSamplerSeedHook())

    data_loaders = {}

    ############################################################
    # load data
    ############################################################

    for flow in cfg.workflow:
        mode, _ = flow
        if 'val' in mode:
            # cfg.dataset = cfg.dataset + '_OrigScale_multiExm1.h5'
            # cfg.dataset = cfg.dataset + '_multiExm1.h5'

            eval_loader, eval_sampler = sess.get_eval_dataloader(cfg.dataset[mode], distributed)

            eval_cfg = cfg.get('evaluation', {})
            eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
            from mmcv.runner import EvalHook, DistEvalHook
            eval_hook = DistEvalHook if distributed else EvalHook
            # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the
            # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.
            if mode != 'simple_val':
                runner.register_hook(
                    eval_hook(eval_loader, **eval_cfg), priority='LOW')

            data_loaders[mode] = eval_loader
            # if len(cfg.workflow) == 0:
            #     cfg.workflow.append(('val', 1))

        if 'train' in mode:
            train_loader, train_sampler = sess.get_dataloader(cfg.dataset[mode], distributed)
            if cfg.once_epoch:
                train_loader = iter(list(train_loader))
            data_loaders[mode] = train_loader

            if len(cfg.workflow) == 0:
                cfg.workflow.append(('simple_train', 1))
    ############################################################
    # load model
    ############################################################

    resume_from = None
    if cfg.get('resume_from', None) is None and cfg.get('auto_resume'):
        resume_from = find_latest_checkpoint(cfg.work_dir)
    if resume_from is not None:
        cfg.resume_from = resume_from

    # if cfg.get('resume_from', None):
    runner.resume(cfg.resume_from, cfg.resume_mode, cfg.reset_lr, cfg.lr)
    if cfg.get('load_from', None) and cfg.get('resume_from', None) is not None:
        runner.load_checkpoint(cfg.load_from, cfg.resume_mode)

    ############################################################
    # run train/val/test
    ############################################################
    runner.run(data_loaders, cfg.workflow)


def main(cfg):
    # init distributed env first, since logger depends on the dist info.
    if cfg.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(cfg.launcher, **cfg.dist_params)
        # re-set gpu_ids with distributed training mode
        _, world_size = get_dist_info()
        cfg.gpu_ids = range(world_size)

    logger, out_dir, model_save_dir, tfb_dir = create_logger(cfg, cfg.experimental_desc, 0)
    cfg.out_dir = cfg.work_dir = model_save_dir
    seed = init_random_seed(cfg.seed)
    print_log(f'Set random seed to {seed}', logger=logger)

    set_random_seed(seed)

    # if cfg.checkpoint_config is not None:
    #     # save mmdet version, config file content and class names in
    #     # checkpoints as meta data
    #     cfg.checkpoint_config.meta = dict(
    #         mmdet_version=__version__ + get_git_hash()[:7],
    #         CLASSES=datasets[0].CLASSES)
    # add an attribute for visualization convenience

    trainer(
        cfg,
        logger,
        distributed=distributed,
        meta={})


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/__init__.py
================================================
from ..auxiliary.utils import AverageMeter, accuracy, MetricLogger, SmoothedValue, set_random_seed, init_random_seed, show_memory_info

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/base.py
================================================
from nvidia.dali.plugin.pytorch import DALIGenericIterator


class DALIDataloader(DALIGenericIterator):
    def __init__(self, pipeline, size, batch_size, output_map=["data", "label"], auto_reset=True, onehot_label=False):
        # self.size = size
        self.batch_size = batch_size
        self.onehot_label = onehot_label
        self.output_map = output_map
        super().__init__(pipelines=pipeline, size=size, auto_reset=auto_reset, output_map=output_map)

    def __next__(self):
        if self._first_batch is not None:
            batch = self._first_batch
            self._first_batch = None
            return batch
        data = super().__next__()[0]
        if self.onehot_label:
            return [data[self.output_map[0]], data[self.output_map[1]].squeeze().long()]
        else:
            return [data[self.output_map[0]], data[self.output_map[1]]]

    def __len__(self):
        if self.size % self.batch_size == 0:
            return self.size // self.batch_size
        else:
            return self.size // self.batch_size + 1

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/fp16_utils.py
================================================
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors


class tofp16(nn.Module):
    """
    Utility module that implements::
        def forward(self, input):
            return input.half()
    """

    def __init__(self):
        super(tofp16, self).__init__()

    def forward(self, input):
        return input.half()


def BN_convert_float(module):
    """
    Utility function for network_to_half().
    Retained for legacy purposes.
    """
    if isinstance(module, torch.nn.modules.batchnorm._BatchNorm) and module.affine is True:
        module.float()
    for child in module.children():
        BN_convert_float(child)
    return module


def network_to_half(network):
    """
    Convert model to half precision in a batchnorm-safe way.
    Retained for legacy purposes. It is recommended to use FP16Model.
    """
    return nn.Sequential(tofp16(), BN_convert_float(network.half()))


def convert_module(module, dtype):
    """
    Converts a module's immediate parameters and buffers to dtype.
    """
    for param in module.parameters(recurse=False):
        if param is not None:
            if param.data.dtype.is_floating_point:
                param.data = param.data.to(dtype=dtype)
            if param._grad is not None and param._grad.data.dtype.is_floating_point:
                param._grad.data = param._grad.data.to(dtype=dtype)

    for buf in module.buffers(recurse=False):
        if buf is not None and buf.data.dtype.is_floating_point:
            buf.data = buf.data.to(dtype=dtype)


def convert_network(network, dtype):
    """
    Converts a network's parameters and buffers to dtype.
    """
    for module in network.modules():
        if isinstance(module, torch.nn.modules.batchnorm._BatchNorm) and module.affine is True:
            continue
        convert_module(module, dtype)
        if isinstance(module, torch.nn.RNNBase) or isinstance(module, torch.nn.modules.rnn.RNNBase):
            module.flatten_parameters()
    return network


class FP16Model(nn.Module):
    """
    Convert model to half precision in a batchnorm-safe way.
    """

    def __init__(self, network):
        super(FP16Model, self).__init__()
        self.network = convert_network(network, dtype=torch.half)

    def forward(self, *inputs):
        inputs = tuple(t.half() for t in inputs)
        return self.network(*inputs)


def backwards_debug_hook(grad):
    raise RuntimeError("master_params recieved a gradient in the backward pass!")

def prep_param_lists(model, flat_master=False):
    """
    Creates a list of FP32 master parameters for a given model, as in
    `Training Neural Networks with Mixed Precision:  Real Examples`_.
    Args:
        model (torch.nn.Module): Existing Pytorch model
        flat_master (bool, optional, default=False):  Flatten the master parameters into a single tensor, as a performance optimization.
    Returns:
        A tuple (``model_params``, ``master_params``). ``model_params`` is a list of the model's parameters for later use with :func:`model_grads_to_master_grads` and :func:`master_params_to_model_params`.  ``master_params`` is a list of FP32 master gradients.  If ``flat_master=True``, ``master_params`` will be a list with one element.
    Example::
        model_params, master_params = prep_param_lists(model)
    .. warning::
        Currently, if ``flat_master=True``, all the model's parameters must be the same type.  If the model has parameters of different types, use ``flat_master=False``, or use :class:`FP16_Optimizer`.
    .. _`Training Neural Networks with Mixed Precision:  Real Examples`:
        http://on-demand.gputechconf.com/gtc/2018/video/S81012/
    """
    model_params = [param for param in model.parameters() if param.requires_grad]

    if flat_master:
        # Give the user some more useful error messages
        try:
            # flatten_dense_tensors returns a contiguous flat array.
            # http://pytorch.org/docs/master/_modules/torch/_utils.html
            master_params = _flatten_dense_tensors([param.data for param in model_params]).float()
        except:
            print("Error in prep_param_lists:  model may contain a mixture of parameters "
                      "of different types.  Use flat_master=False, or use F16_Optimizer.")
            raise
        master_params = torch.nn.Parameter(master_params)
        master_params.requires_grad = True
        # master_params.register_hook(backwards_debug_hook)
        if master_params.grad is None:
            master_params.grad = master_params.new(*master_params.size())
        return model_params, [master_params]
    else:
        master_params = [param.clone().float().detach() for param in model_params]
        for param in master_params:
            param.requires_grad = True
        return model_params, master_params


def model_grads_to_master_grads(model_params, master_params, flat_master=False):
    """
    Copy model gradients to master gradients.
    Args:
        model_params:  List of model parameters created by :func:`prep_param_lists`.
        master_params:  List of FP32 master parameters created by :func:`prep_param_lists`.  If ``master_params`` was created with ``flat_master=True``, ``flat_master=True`` should also be supplied to :func:`model_grads_to_master_grads`.
    """
    if flat_master:
        # The flattening may incur one more deep copy than is necessary.
        master_params[0].grad.data.copy_(
            _flatten_dense_tensors([p.grad.data for p in model_params]))
    else:
        for model, master in zip(model_params, master_params):
            if model.grad is not None:
                if master.grad is None:
                    master.grad = Variable(master.data.new(*master.data.size()))
                master.grad.data.copy_(model.grad.data)
            else:
                master.grad = None


def master_params_to_model_params(model_params, master_params, flat_master=False):
    """
    Copy master parameters to model parameters.
    Args:
        model_params:  List of model parameters created by :func:`prep_param_lists`.
        master_params:  List of FP32 master parameters created by :func:`prep_param_lists`.  If ``master_params`` was created with ``flat_master=True``, ``flat_master=True`` should also be supplied to :func:`master_params_to_model_params`.
    """
    if flat_master:
        for model, master in zip(model_params,
                                 _unflatten_dense_tensors(master_params[0].data, model_params)):
            model.data.copy_(master)
    else:
        for model, master in zip(model_params, master_params):
            model.data.copy_(master.data)

# Backward compatibility fixes

def to_python_float(t):
    if hasattr(t, 'item'):
        return t.item()
    else:
        return t[0]

TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])
if TORCH_MAJOR == 0 and TORCH_MINOR <= 4:
    clip_grad_norm = torch.nn.utils.clip_grad_norm
else:
    clip_grad_norm = torch.nn.utils.clip_grad_norm_

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/__init__.py
================================================
__copyright__ = 'Copyright (C) 2018 Swall0w'
__version__ = '0.0.7'
__author__ = 'Swall0w'
__url__ = 'https://github.com/Swall0w/torchstat'

from torchstat.compute_memory import compute_memory
from torchstat.compute_madd import compute_madd
from torchstat.compute_flops import compute_flops
from torchstat.stat_tree import StatTree, StatNode
from torchstat.model_hook import ModelHook
from torchstat.reporter import report_format
from torchstat.statistics import stat, ModelStat

__all__ = ['report_format', 'StatTree', 'StatNode', 'compute_madd',
           'compute_flops', 'ModelHook', 'stat', 'ModelStat', '__main__',
           'compute_memory']


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/__main__.py
================================================
from torchstat import stat
import argparse
import importlib.util
import torch


def arg():
    parser = argparse.ArgumentParser(description='Torch model statistics')
    parser.add_argument('--file', '-f', type=str,
                        help='Module file.')
    parser.add_argument('--model', '-m', type=str,
                        help='Model name')
    parser.add_argument('--size', '-s', type=str, default='3x224x224',
                        help='Input size. channels x height x width (default: 3x224x224)')
    return parser.parse_args()


def main():
    args = arg()
    try:
        spec = importlib.util.spec_from_file_location('models', args.file)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)
        model = getattr(module, args.model)()
    except Exception:
        import traceback
        print(f'Tried to import {args.model} from {args.file}. but failed.')
        traceback.print_exc()

        import sys
        sys.exit()

    input_size = tuple(int(x) for x in args.size.split('x'))
    stat(model, input_size, query_granularity=1)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/compute_flops.py
================================================
import torch.nn as nn
import torch
import numpy as np
import inspect


def compute_flops(module, inp, out):
    # print(module.__class__)
    # if 'attn' in module.__name__:
    #     print(module.__class__)
    # print(list(filter(lambda m: not m.startswith("__") and not m.endswith("__") and callable(getattr(module, m)), dir(module))))
    if isinstance(module, nn.Conv2d):
        return compute_Conv2d_flops(module, inp, out)
    elif isinstance(module, nn.BatchNorm2d):
        return compute_BatchNorm2d_flops(module, inp, out)
    elif isinstance(module, nn.LayerNorm) or 'LayerNorm' in type(module).__name__:
        return compute_LayerNorm_flops(module, inp, out)
    elif isinstance(module, (nn.AvgPool2d, nn.MaxPool2d)):
        return compute_Pool2d_flops(module, inp, out)
    elif isinstance(module, (nn.ReLU, nn.ReLU6, nn.PReLU, nn.ELU, nn.LeakyReLU)):
        return compute_ReLU_flops(module, inp, out)
    # elif isinstance(module, nn.Upsample):
    #     return compute_Upsample_flops(module, inp, out)
    elif isinstance(module, nn.Linear):
        return compute_Linear_flops(module, inp, out)
    elif 'SwinTEB' in module.__class__.__name__:#
        return compute_WindowAttention_flops(module, inp, out)
    elif 'XCTEB' in module.__class__.__name__:
        return compute_XCA_flops(module, inp, out)
    elif 'MSA' in module.__class__.__name__:
        return compute_MSA_flops(module, inp, out)
    elif 'cGCN' == module.__class__.__name__:
        return compute_cGCN_flops(module, inp, out)
    elif 'sGCN' == module.__class__.__name__:
        return compute_sGCN_flops(module, inp, out)
    else:
        print(f"[Flops]: {module.__class__.__name__} is not supported!")
        return 0
    pass


def compute_cGCN_flops(module, inp, out):
    batch_size, dim, H, W = inp.size()
    dim = dim // 2
    L = H * W

    # N = window_size ** 2
    # num_patches = H * W // N

    # calculate flops for 1 window with token length of N
    flops = 0
    # qkv = self.qkv(x)
    # flops += N * dim * 3 * dim
    # attn = (q @ k.transpose(-2, -1)) b head c (h w) b head (h w) c
    flops += dim * (dim//2) * L
    #  x = (attn @ v)   b head c c  b head c (h w)
    flops += L * dim * (dim//2)

    return batch_size * flops


def compute_sGCN_flops(module, inp, out):

    batch_size, dim, H, W = inp.size()
    dim = dim // 2
    L = H * W

    # calculate flops for 1 window with token length of N
    flops = 0
    # qkv = self.qkv(x)
    # flops += N * dim * 3 * dim
    # attn = (q @ k.transpose(-2, -1)) b head c (h w) b head (h w) c
    flops += dim * dim * L
    #  x = (attn @ v)   b head c c  b head c (h w)
    flops += L * dim * dim

    return batch_size * flops

def compute_Conv2d_flops(module, inp, out):
    # Can have multiple inputs, getting the first one
    assert isinstance(module, nn.Conv2d)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())

    batch_size = inp.size()[0]
    in_c = inp.size()[1]
    k_h, k_w = module.kernel_size
    out_c, out_h, out_w = out.size()[1:]
    groups = module.groups

    filters_per_channel = out_c // groups
    conv_per_position_flops = k_h * k_w * in_c * filters_per_channel
    active_elements_count = batch_size * out_h * out_w

    total_conv_flops = conv_per_position_flops * active_elements_count

    bias_flops = 0
    if module.bias is not None:
        bias_flops = out_c * active_elements_count
    # k * k * c * H * W * o = (乘法 + 加法 + bias) * active_elements_count
    total_flops = total_conv_flops + bias_flops
    return total_flops


def compute_BatchNorm2d_flops(module, inp, out):
    assert isinstance(module, nn.BatchNorm2d)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
    in_c, in_h, in_w = inp.size()[1:]
    batch_flops = np.prod(inp.shape)
    if module.affine:
        batch_flops *= 2
    return batch_flops

def compute_LayerNorm_flops(module, inp, out):
    # assert isinstance(module, nn.LayerNorm)
    if len(inp.size()) == 3:
        inp = inp.unsqueeze(0)
    if len(out.size()) == 3:
        out = out.unsqueeze(0)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
    flops = np.prod(inp.shape)

    return flops

def compute_ReLU_flops(module, inp, out):
    assert isinstance(module, (nn.ReLU, nn.ReLU6, nn.PReLU, nn.ELU, nn.LeakyReLU))
    batch_size = inp.size()[0]
    active_elements_count = batch_size

    for s in inp.size()[1:]:
        active_elements_count *= s

    return active_elements_count


def compute_Pool2d_flops(module, inp, out):
    assert isinstance(module, nn.MaxPool2d) or isinstance(module, nn.AvgPool2d)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
    return np.prod(inp.shape)


def compute_Linear_flops(module, inp, out):
    assert isinstance(module, nn.Linear)
    if len(inp.size()) > 3:
        inp = inp.reshape(inp.size(0), inp.size(1), -1)
    if len(out.size()) > 3:
        out = out.reshape(out.size(0), out.size(1), -1)
    batch_size = inp.size()[0]
    if len(inp.size()) == 3:# and inp.size(0) == 1:
        inp = inp[0, ...]#.squeeze(0)
    if len(out.size()) == 3:# and out.size(0) == 1:
        out = out[0, ...]#.squeeze(0)
    assert len(inp.size()) == 2 and len(out.size()) == 2

    return batch_size * inp.size()[1] * out.size()[1]

def compute_Upsample_flops(module, inp, out):
    assert isinstance(module, nn.Upsample)
    output_size = out[0]
    batch_size = inp.size()[0]
    output_elements_count = batch_size
    # for s in output_size.

def compute_MSA_flops(module, inp, out):
    # q = inp[0]
    if isinstance(inp, tuple):
        inp = inp[0]
    if module.__class__.__name__ == "MSA":
        N, batch_size, dim = inp.size()
    elif module.__class__.__name__ == "MSA_BNC":
        batch_size, N, dim = inp.size()


    # window_size = module.window_size
    if hasattr(module, 'num_heads'):
        num_heads = module.num_heads
    elif hasattr(module, 'n_heads'):
        num_heads = module.num_heads
    num_patches = 1#H * W // N
    # num_patches = module.num_patches
    # batch_size /= num_patches# B*nH*nW
    # assert batch_size == 1, print(f"{inp.size()} is not compatiable with {num_patches}")

    # print(inp.size(), out.size(), dir(module))

    # calculate flops for 1 window with token length of N
    flops = 0
    # qkv = self.qkv(x)
    # flops += N * dim * 3 * dim
    # attn = (q @ k.transpose(-2, -1))
    flops += num_heads * N * (dim // num_heads) * N
    #  x = (attn @ v)
    flops += num_heads * N * N * (dim // num_heads)
    # x = self.proj(x)
    # flops += N * dim * dim
    return batch_size * num_patches * flops

def compute_WindowAttention_flops(module, inp, out):
    # inp = inp[0].permute(0, 3, 1, 2) # B, p, L, C
    # out = out.permute(0, 3, 1, 2)

    # dim = out.size(1)
    if isinstance(inp, tuple):
        inp = inp[0]
    # inp = inp[0]
    L = len(inp.size())
    if L == 3:
        batch_size, HW, dim = inp.size()
        H = W = int(np.sqrt(HW))
    elif L == 4:
        batch_size, dim, H, W = inp.size()

    window_size = module.window_size
    num_heads = module.num_heads
    N = window_size ** 2
    num_patches = H * W // N
    # num_patches = module.num_patches
    # batch_size /= num_patches# B*nH*nW
    # assert batch_size == 1, print(f"{inp.size()} is not compatiable with {num_patches}")


    # print(inp.size(), out.size(), dir(module))

    # calculate flops for 1 window with token length of N
    flops = 0
    # qkv = self.qkv(x)
    # flops += N * dim * 3 * dim
    # attn = (q @ k.transpose(-2, -1))
    flops += num_heads * N * (dim // num_heads) * N
    #  x = (attn @ v)
    flops += num_heads * N * N * (dim // num_heads)
    # x = self.proj(x)
    # flops += N * dim * dim
    # module.__base__ = f'{module.__class__.__name__}(dim={dim}, win_size={window_size}, nh={num_heads}, n_p={num_patches}, size=({H}, {W}))'
    # print(f'{module.__class__.__name__}, dim={dim}, win_size={window_size}, num_heads={num_heads},'
    #       f'num_patches={num_patches}, img_size=({H}, {W})')
    return batch_size * num_patches * flops


def compute_XCA_flops(module, inp, out):

    dim = out.size(1)
    batch_size, _, H, W = inp.size()
    if hasattr(module, "window_size"):
        window_size = module.window_size
        N = window_size ** 2
        num_patches = H * W // N
    else:
        num_patches = 1
        window_size = 1
        N = H * W
    # window_size = module.window_size
    num_heads = module.num_heads

    # N = window_size ** 2
    # num_patches = H * W // N

    # calculate flops for 1 window with token length of N
    flops = 0
    # qkv = self.qkv(x)
    # flops += N * dim * 3 * dim
    # attn = (q @ k.transpose(-2, -1)) b head c (h w) b head (h w) c
    flops += num_heads * (dim // num_heads) * (dim // num_heads) * N
    #  x = (attn @ v)   b head c c  b head c (h w)
    flops += num_heads * N * (dim // num_heads) * (dim // num_heads)
    # x = self.proj(x)
    # flops += N * dim * dim
    return batch_size * num_patches * flops

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/compute_madd.py
================================================
"""
compute Multiply-Adds(MAdd) of each leaf module
"""

import torch.nn as nn


def compute_Conv2d_madd(module, inp, out):
    assert isinstance(module, nn.Conv2d)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())

    in_c = inp.size()[1]
    k_h, k_w = module.kernel_size
    out_c, out_h, out_w = out.size()[1:]
    groups = module.groups

    # ops per output element
    kernel_mul = k_h * k_w * (in_c // groups)
    kernel_add = kernel_mul - 1 + (0 if module.bias is None else 1)

    kernel_mul_group = kernel_mul * out_h * out_w * (out_c // groups)
    kernel_add_group = kernel_add * out_h * out_w * (out_c // groups)

    total_mul = kernel_mul_group * groups
    total_add = kernel_add_group * groups

    return total_mul + total_add


def compute_ConvTranspose2d_madd(module, inp, out):
    assert isinstance(module, nn.ConvTranspose2d)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())

    in_c, in_h, in_w = inp.size()[1:]
    k_h, k_w = module.kernel_size
    out_c, out_h, out_w = out.size()[1:]
    groups = module.groups

    kernel_mul = k_h * k_w * (in_c // groups)
    kernel_add = kernel_mul - 1 + (0 if module.bias is None else 1)

    kernel_mul_group = kernel_mul * in_h * in_w * (out_c // groups)
    kernel_add_group = kernel_add * in_h * in_w * (out_c // groups)

    total_mul = kernel_mul_group * groups
    total_add = kernel_add_group * groups

    return total_mul + total_add

def compute_LayerNorm_madd(module, inp, out):
    # assert isinstance(module, nn.LayerNorm)
    if len(inp.size()) == 3:
        inp = inp.unsqueeze(0)
    if len(out.size()) == 3:
        out = out.unsqueeze(0)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())

    in_c, in_h, in_w = inp.size()[1:]

    # 1. sub mean
    # 2. div standard deviation
    # 3. mul alpha
    # 4. add beta
    return 4 * in_h * in_w

def compute_BatchNorm2d_madd(module, inp, out):
    assert isinstance(module, nn.BatchNorm2d)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())

    in_c, in_h, in_w = inp.size()[1:]

    # 1. sub mean
    # 2. div standard deviation
    # 3. mul alpha
    # 4. add beta
    return 4 * in_c * in_h * in_w


def compute_MaxPool2d_madd(module, inp, out):
    assert isinstance(module, nn.MaxPool2d)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())

    if isinstance(module.kernel_size, (tuple, list)):
        k_h, k_w = module.kernel_size
    else:
        k_h, k_w = module.kernel_size, module.kernel_size
    out_c, out_h, out_w = out.size()[1:]

    return (k_h * k_w - 1) * out_h * out_w * out_c


def compute_AvgPool2d_madd(module, inp, out):
    assert isinstance(module, nn.AvgPool2d)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())

    if isinstance(module.kernel_size, (tuple, list)):
        k_h, k_w = module.kernel_size
    else:
        k_h, k_w = module.kernel_size, module.kernel_size
    out_c, out_h, out_w = out.size()[1:]

    kernel_add = k_h * k_w - 1
    kernel_avg = 1

    return (kernel_add + kernel_avg) * (out_h * out_w) * out_c


def compute_ReLU_madd(module, inp, out):
    assert isinstance(module, (nn.ReLU, nn.ReLU6))

    count = 1
    for i in inp.size()[1:]:
        count *= i
    return count


def compute_Softmax_madd(module, inp, out):
    assert isinstance(module, nn.Softmax)
    assert len(inp.size()) > 1

    count = 1
    for s in inp.size()[1:]:
        count *= s
    exp = count
    add = count - 1
    div = count
    return exp + add + div


def compute_Linear_madd(module, inp, out):
    assert isinstance(module, nn.Linear)
    if len(inp.size()) > 3:
        inp = inp.reshape(inp.size(0), inp.size(1), -1)
    if len(out.size()) > 3:
        out = out.reshape(out.size(0), out.size(1), -1)
    if len(inp.size()) == 3:# and inp.size(0) == 1
        inp = inp[0, ...]#.squeeze(0)
    if len(out.size()) == 3:# and out.size(0) == 1
        out = out[0, ...]#.squeeze(0)

    assert len(inp.size()) == 2 and len(out.size()) == 2, print(inp.size(), out.size())

    num_in_features = inp.size()[1]
    num_out_features = out.size()[1]

    mul = num_in_features
    add = num_in_features - 1
    return num_out_features * (mul + add)


def compute_Bilinear_madd(module, inp1, inp2, out):
    assert isinstance(module, nn.Bilinear)
    assert len(inp1.size()) == 2 and len(inp2.size()) == 2 and len(out.size()) == 2

    num_in_features_1 = inp1.size()[1]
    num_in_features_2 = inp2.size()[1]
    num_out_features = out.size()[1]

    mul = num_in_features_1 * num_in_features_2 + num_in_features_2
    add = num_in_features_1 * num_in_features_2 + num_in_features_2 - 1
    return num_out_features * (mul + add)


def compute_madd(module, inp, out):
    if isinstance(module, nn.Conv2d):
        return compute_Conv2d_madd(module, inp, out)
    elif isinstance(module, nn.ConvTranspose2d):
        return compute_ConvTranspose2d_madd(module, inp, out)
    elif isinstance(module, nn.BatchNorm2d):
        return compute_BatchNorm2d_madd(module, inp, out)
    elif isinstance(module, nn.LayerNorm) or 'LayerNorm' in type(module).__name__:
        return compute_LayerNorm_madd(module, inp, out)
    elif isinstance(module, nn.MaxPool2d):
        return compute_MaxPool2d_madd(module, inp, out)
    elif isinstance(module, nn.AvgPool2d):
        return compute_AvgPool2d_madd(module, inp, out)
    elif isinstance(module, (nn.ReLU, nn.ReLU6)):
        return compute_ReLU_madd(module, inp, out)
    elif isinstance(module, nn.Softmax):
        return compute_Softmax_madd(module, inp, out)
    elif isinstance(module, nn.Linear):
        return compute_Linear_madd(module, inp, out)
    elif isinstance(module, nn.Bilinear):
        return compute_Bilinear_madd(module, inp[0], inp[1], out)
    else:
        print(f"[MAdd]: {type(module).__name__} is not supported!")
        return 0


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/compute_memory.py
================================================
import torch.nn as nn
import torch
import numpy as np


def compute_memory(module, inp, out):
    if isinstance(module, (nn.ReLU, nn.ReLU6, nn.ELU, nn.LeakyReLU)):
        return compute_ReLU_memory(module, inp, out)
    elif isinstance(module, nn.PReLU):
        return compute_PReLU_memory(module, inp, out)
    elif isinstance(module, nn.Conv2d):
        return compute_Conv2d_memory(module, inp, out)
    elif isinstance(module, nn.BatchNorm2d):
        return compute_BatchNorm2d_memory(module, inp, out)
    elif isinstance(module, nn.LayerNorm) or 'LayerNorm' in type(module).__name__:
        return compute_LayerNorm_memory(module, inp, out)
    elif isinstance(module, nn.Linear):
        return compute_Linear_memory(module, inp, out)
    elif isinstance(module, (nn.AvgPool2d, nn.MaxPool2d)):
        return compute_Pool2d_memory(module, inp, out)
    else:
        print(f"[Memory]: {type(module).__name__} is not supported!")
        return (0, 0)
    pass


def num_params(module):
    return sum(p.numel() for p in module.parameters() if p.requires_grad)


def compute_ReLU_memory(module, inp, out):
    assert isinstance(module, (nn.ReLU, nn.ReLU6, nn.ELU, nn.LeakyReLU))
    batch_size = inp.size()[0]
    mread = batch_size * inp.size()[1:].numel()
    mwrite = batch_size * inp.size()[1:].numel()

    return (mread, mwrite)


def compute_PReLU_memory(module, inp, out):
    assert isinstance(module, (nn.PReLU))
    batch_size = inp.size()[0]
    mread = batch_size * (inp.size()[1:].numel() + num_params(module))
    mwrite = batch_size * inp.size()[1:].numel()

    return (mread, mwrite)


def compute_Conv2d_memory(module, inp, out):
    # Can have multiple inputs, getting the first one
    assert isinstance(module, nn.Conv2d)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())

    batch_size = inp.size()[0]
    in_c = inp.size()[1]
    out_c, out_h, out_w = out.size()[1:]

    # This includes weighs with bias if the module contains it.
    mread = batch_size * (inp.size()[1:].numel() + num_params(module))
    mwrite = batch_size * out_c * out_h * out_w
    return (mread, mwrite)


def compute_BatchNorm2d_memory(module, inp, out):
    assert isinstance(module, nn.BatchNorm2d)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
    batch_size, in_c, in_h, in_w = inp.size()

    mread = batch_size * (inp.size()[1:].numel() + 2 * in_c)
    mwrite = inp.size().numel()
    return (mread, mwrite)

def compute_LayerNorm_memory(module, inp, out):
    # assert isinstance(module, nn.LayerNorm)
    if len(inp.size()) == 3:
        inp = inp.unsqueeze(0)
    if len(out.size()) == 3:
        out = out.unsqueeze(0)
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
    batch_size, in_c = inp.size()[:2]

    mread = batch_size * (inp.size()[2:].numel() + 2 * in_c)
    mwrite = inp.size().numel()
    return (mread, mwrite)

def compute_Linear_memory(module, inp, out):
    assert isinstance(module, nn.Linear)
    if len(inp.size()) > 3:
        inp = inp.reshape(inp.size(0), inp.size(1), -1)
    if len(out.size()) > 3:
        out = out.reshape(out.size(0), out.size(1), -1)

    batch_size = inp.size()[0]
    if len(inp.size()) == 3:# and inp.size(0) == 1:
        inp = inp[0, ...]#.squeeze(0)
    if len(out.size()) == 3:# and out.size(0) == 1:
        out = out[0, ...]#.squeeze(0)
    assert len(inp.size()) == 2 and len(out.size()) == 2

    mread = batch_size * (inp.size()[1:].numel() + num_params(module))
    mwrite = out.size().numel()

    return (mread, mwrite)


def compute_Pool2d_memory(module, inp, out):
    assert isinstance(module, (nn.MaxPool2d, nn.AvgPool2d))
    assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
    batch_size = inp.size()[0]
    mread = batch_size * inp.size()[1:].numel()
    mwrite = batch_size * out.size()[1:].numel()
    return (mread, mwrite)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/model_hook.py
================================================
import time
from collections import OrderedDict
import numpy as np
import torch
import torch.nn as nn
from functools import partial
from torchstat import compute_madd
from torchstat import compute_flops
from torchstat import compute_memory


class ModelHook(object):
    def __init__(self, model, input_size, device="cuda", debug_layers=[]):
        assert isinstance(model, nn.Module)
        assert isinstance(input_size, (list, tuple))
        self.leaf_modules = []
        self.debug_layers = debug_layers
        self._model = model
        self._input_size = input_size
        self._origin_call = dict()  # sub module call hook
        self.hooks = []
        self._hook_model()
        # x = [torch.rand(1, *self._input_size)]  # add module duration time
        device = device.lower()
        assert device in [
            "cuda",
            "cpu",
        ], "Input device is not valid, please specify 'cuda' or 'cpu'"

        if device == "cuda" and torch.cuda.is_available():
            dtype = torch.cuda.FloatTensor
        else:
            dtype = torch.FloatTensor
        x = [torch.rand(*in_size).type(dtype) for in_size in input_size]
        self._model.eval()
        self._model(*x)

        # if len(debug_layers) > 0:
        #     self.debug_partial_layer(debug_layers)


    @staticmethod
    def _register_buffer(module):
        assert isinstance(module, nn.Module)

        if len(list(module.children())) > 0:
            return

        module.register_buffer('input_shape', torch.zeros(3).int())
        module.register_buffer('output_shape', torch.zeros(3).int())
        module.register_buffer('parameter_quantity', torch.zeros(1).int())
        module.register_buffer('inference_memory', torch.zeros(1).long())
        module.register_buffer('MAdd', torch.zeros(1).long())
        module.register_buffer('duration', torch.zeros(1).float())
        module.register_buffer('Flops', torch.zeros(1).long())
        module.register_buffer('Memory', torch.zeros(2).long())

    def _sub_module_call_hook(self):
        def wrap_call(module, *input, **kwargs):
            assert module.__class__ in self._origin_call
            # Itemsize for memory
            try:
                itemsize = input[0].detach().numpy().itemsize
            except:
                itemsize = input[0].detach().cpu().numpy().itemsize

            start = time.time()
            output = self._origin_call[module.__class__](module, *input, **kwargs)  # 都是nn.Conv2D则有相同的_call__不需要重复存储
            end = time.time()
            module.duration = torch.from_numpy(
                np.array([end - start], dtype=np.float32))
            # c, h, w
            module.input_shape = torch.from_numpy(
                np.array(input[0].size()[1:], dtype=np.int32))
            module.output_shape = torch.from_numpy(
                np.array(output.size()[1:], dtype=np.int32))
            # print(module.name)
            parameter_quantity = 0
            inference_memory = 1
            # iterate through parameters and count num params
            if 'XCTEB' in module.__class__.__name__:
                c, h, w = module.input_shape
                num_heads = module.num_heads
                parameter_quantity += c * c * num_heads
            elif 'SwinTEB' in module.__class__.__name__:
                if len(module.input_shape) == 3:
                    # c, h, w = module.input_shape # c, h, w
                    _, N, c = module.input_shape
                    # N = h * w
                elif len(module.input_shape) == 2:
                    N = module.input_shape[0]
                num_heads = module.num_heads
                # hh = nH * h WindowAttention只减少了flops并没有减少显存占用，因此参数量按照图像大小算
                parameter_quantity += N * N * num_heads
                print(parameter_quantity, N, module.input_shape)
            elif 'MSA' == module.__class__.__name__:
                # L, B, D
                # if hasattr(module, '__name__'):
                #     print('model.body.decoder.layers.0.self_attn')
                # print(module.__name__, module.input_shape)
                module.input_shape = torch.from_numpy(
                    np.array(input[0].permute(1, 2, 0).size()[1:], dtype=np.int32))
                c, L = module.input_shape
                num_heads = module.num_heads
                parameter_quantity += L * L * num_heads
                # print(L, c)
            elif 'MSA_BNC' == module.__class__.__name__:
                # B, L, C
                module.input_shape = torch.from_numpy(
                    np.array(input[0].permute(0, 2, 1).size()[1:], dtype=np.int32))
                c, L = module.input_shape
                num_heads = module.num_heads
                parameter_quantity += L * L * num_heads
                # print(L, c)
            elif 'sGCN' == module.__class__.__name__:
                module.input_shape = torch.from_numpy(
                    np.array(input[0][0].permute(0, 2, 1).size(), dtype=np.int32))
                c, H, W = module.input_shape
                c = c // 2
                parameter_quantity += c * c
            elif 'cGCN' == module.__class__.__name__:
                module.input_shape = torch.from_numpy(
                    np.array(input[0][0].permute(0, 2, 1).size(), dtype=np.int32))
                c, H, W = module.input_shape
                c = c // 2
                parameter_quantity += c * c // 2
            else:
                for s in output.size()[1:]:
                    inference_memory *= s
                # memory += parameters_number  # exclude parameter memory
            for name, p in module._parameters.items():
                parameter_quantity += (0 if p is None else torch.numel(p.data))
            module.parameter_quantity = torch.from_numpy(
                np.array([parameter_quantity], dtype=np.long))

            inference_memory = inference_memory * 4 / (1024 ** 2)  # shown as MB unit
            module.inference_memory = torch.from_numpy(
                np.array([inference_memory], dtype=np.float32))

            if len(input) == 1:
                madd = compute_madd(module, input[0], output)
                flops = compute_flops(module, input[0], output)
                Memory = compute_memory(module, input[0], output)
            elif len(input) > 1:
                madd = compute_madd(module, input, output)
                flops = compute_flops(module, input, output)
                Memory = compute_memory(module, input, output)
            else:  # error
                madd = 0
                flops = 0
                Memory = (0, 0)
            module.MAdd = torch.from_numpy(
                np.array([madd], dtype=np.int64))
            module.Flops = torch.from_numpy(
                np.array([flops], dtype=np.int64))
            Memory = np.array(Memory, dtype=np.int64) * itemsize
            module.Memory = torch.from_numpy(Memory)

            return output

        leaf_modules = self.leaf_modules
        # for m in self._model.modules():
        #     print(m.__class__)

        for name, module in self._model.named_modules():
            if len(list(module.children())) == 0:
                module.name = name
                leaf_modules.append((name, module))
                if module.__class__ not in self._origin_call:
                    # 只记录一类与具体实例无关的__call__
                    self._origin_call[module.__class__] = module.__class__.__call__
                    module.__class__.__call__ = wrap_call
            elif name != '' and len(list(module.children())) > 0 and any([L in module.__class__.__name__ for L in self.debug_layers]):
                #name in self.debug_layers:# module.__class__.__name__  in self.debug_layers
                # if module.__class__.__name__ in self.debug_layers:
                #     print("111")
                leaf_modules.append((name, module))
                if module.__class__ not in self._origin_call:
                    self._origin_call[module.__class__] = module.__class__.__call__
                    module.__class__.__call__ = wrap_call
                    print(name, module.__class__.__name__)

        # for module in self._model.modules():
        #     if len(list(module.children())) == 0 and module.__class__ not in self._origin_call:
        #         self.hooks.append(module.register_forward_hook(wrap_call))

    def _hook_model(self):
        self._model.apply(self._register_buffer)
        self._sub_module_call_hook()

    def clear_hooks(self) -> None:
        """Clear model hooks"""

        # for handle in self.hook_handles:
        #     handle.pop()
        def unwarp_calls(module):
            if module.__class__ in self._origin_call:
                module.__class__.__call__ = self._origin_call[module.__class__]
                # module.__delattr__('__name__')

        calls = list(map(unwarp_calls, self._model.modules()))
        del calls
        # for module in self._model.modules():
        #     if module.__class__ in self._origin_call:
        #         module.__class__.__call__ = self._origin_call[module.__class__]

    # @staticmethod
    # def _retrieve_leaf_modules(model):
    #     leaf_modules = []
    #     for name, m in model.named_modules():
    #         if len(list(m.children())) == 0:
    #             leaf_modules.append((name, m))
    #     return leaf_modules

    def retrieve_leaf_modules(self):
        return OrderedDict(self.leaf_modules)
        # return OrderedDict(self._retrieve_leaf_modules(self._model))

    def debug_partial_layer(self, target_keys):
        target_layers = []
        submodule_name = dict(list(self._model.named_modules())[1:]).keys()
        for t in target_keys:
            for name in submodule_name:
                if t in name:
                    target_layers.append(name)

        return target_layers


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/reporter.py
================================================
import pandas as pd


pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 10000)


def round_value(value, binary=False):
    divisor = 1024. if binary else 1000.

    if value // divisor**4 > 0:
        return str(round(value / divisor**4, 2)) + 'T'
    elif value // divisor**3 > 0:
        return str(round(value / divisor**3, 2)) + 'G'
    elif value // divisor**2 > 0:
        return str(round(value / divisor**2, 2)) + 'M'
    elif value // divisor > 0:
        return str(round(value / divisor, 2)) + 'K'
    return str(value)


def report_format(collected_nodes):
    data = list()
    properties = list()
    for node in collected_nodes:
        name = node.name
        mtype = node.mtype
        input_shape = ' '.join(['{:>3d}'] * len(node.input_shape)).format(
            *[e for e in node.input_shape])
        output_shape = ' '.join(['{:>3d}'] * len(node.output_shape)).format(
            *[e for e in node.output_shape])
        parameter_quantity = node.parameter_quantity
        inference_memory = node.inference_memory
        MAdd = node.MAdd
        Flops = node.Flops
        mread, mwrite = [i for i in node.Memory]
        duration = node.duration
        data.append([name, input_shape, output_shape, parameter_quantity,
                     inference_memory, MAdd, duration, Flops, mread,
                     mwrite])
        properties.append(mtype)
    pd.set_option('display.max_columns', None)
    df = pd.DataFrame(data)
    df_properties = pd.DataFrame(properties)
    df.columns = ['module name', 'input shape', 'output shape',
                  'params', 'memory(MB)',
                  'MAdd', 'duration', 'Flops', 'MemRead(B)', 'MemWrite(B)']
    df['duration[%]'] = df['duration'] / (df['duration'].sum() + 1e-7)
    df['MemR+W(B)'] = df['MemRead(B)'] + df['MemWrite(B)']
    df['type'] = df_properties
    total_parameters_quantity = df['params'].sum()
    total_memory = df['memory(MB)'].sum()
    total_operation_quantity = df['MAdd'].sum()
    total_flops = df['Flops'].sum()
    total_duration = df['duration[%]'].sum()
    total_mread = df['MemRead(B)'].sum()
    total_mwrite = df['MemWrite(B)'].sum()
    total_memrw = df['MemR+W(B)'].sum()
    del df['duration']

    # Add Total row
    total_df = pd.Series([total_parameters_quantity, total_memory,
                          total_operation_quantity, total_flops,
                          total_duration, mread, mwrite, total_memrw],
                         index=['params', 'memory(MB)', 'MAdd', 'Flops', 'duration[%]',
                                'MemRead(B)', 'MemWrite(B)', 'MemR+W(B)'],
                         name='total')
    # df_properties = pd.DataFrame(properties, columns=['type'])
    df = df.append([total_df])

    df = df.fillna(' ')
    df['memory(MB)'] = df['memory(MB)'].apply(
        lambda x: '{:.2f}'.format(x))
    df['duration[%]'] = df['duration[%]'].apply(lambda x: '{:.2%}'.format(x))
    df['MAdd'] = df['MAdd'].apply(lambda x: '{:,}'.format(x))
    df['Flops'] = df['Flops'].apply(lambda x: '{:,}'.format(x))

    summary = str(df) + '\n'
    summary += "=" * len(str(df).split('\n')[0])
    summary += '\n'
    summary += "Total params: {:,}\n".format(total_parameters_quantity)

    summary += "-" * len(str(df).split('\n')[0])
    summary += '\n'
    summary += "Total memory: {:.2f}MB\n".format(total_memory)
    summary += "Total MAdd: {}MAdd\n".format(round_value(total_operation_quantity))
    summary += "Total Flops: {}Flops\n".format(round_value(total_flops))
    summary += "Total MemR+W: {}B\n".format(round_value(total_memrw, True))
    return summary


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/stat_tree.py
================================================
import queue


class StatTree(object):
    def __init__(self, root_node):
        assert isinstance(root_node, StatNode)

        self.root_node = root_node

    def get_same_level_max_node_depth(self, query_node):
        if query_node.name == self.root_node.name:
            return 0
        same_level_depth = max([child.depth for child in query_node.parent.children])
        return same_level_depth

    def update_stat_nodes_granularity(self):
        q = queue.Queue()
        q.put(self.root_node)
        while not q.empty():
            node = q.get()
            node.granularity = self.get_same_level_max_node_depth(node)
            for child in node.children:
                q.put(child)

    def get_collected_stat_nodes(self, debug_layers, query_granularity):
        self.update_stat_nodes_granularity()

        collected_nodes = []
        stack = list()
        stack.append(self.root_node)
        while len(stack) > 0:
            node = stack.pop()
            if any([L in node.mtype for L in debug_layers]): #node.name
                collected_nodes.append(node)
            for child in reversed(node.children):
                stack.append(child)
            if node.depth == query_granularity:
                collected_nodes.append(node)
            if node.depth < query_granularity <= node.granularity:
                collected_nodes.append(node)
        return collected_nodes


class StatNode(object):
    def __init__(self, name=str(), mtype=str(), parent=None):
        self._name = name
        self._mtype = mtype
        self._input_shape = None
        self._output_shape = None
        self._parameter_quantity = 0
        self._inference_memory = 0
        self._MAdd = 0
        self._Memory = (0, 0)
        self._Flops = 0
        self._duration = 0
        self._duration_percent = 0

        self._granularity = 1
        self._depth = 1
        self.parent = parent
        self.children = list()

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, name):
        self._name = name

    @property
    def mtype(self):
        return self._mtype

    @mtype.setter
    def mtype(self, mtype):
        self._mtype = mtype

    @property
    def granularity(self):
        return self._granularity

    @granularity.setter
    def granularity(self, g):
        self._granularity = g

    @property
    def depth(self):
        d = self._depth
        if len(self.children) > 0:
            d += max([child.depth for child in self.children])
        return d

    @property
    def input_shape(self):
        if len(self.children) == 0:  # leaf
            return self._input_shape
        else:
            return self.children[0].input_shape

    @input_shape.setter
    def input_shape(self, input_shape):
        assert isinstance(input_shape, (list, tuple))
        self._input_shape = input_shape

    @property
    def output_shape(self):
        if len(self.children) == 0:  # leaf
            return self._output_shape
        else:
            return self.children[-1].output_shape

    @output_shape.setter
    def output_shape(self, output_shape):
        assert isinstance(output_shape, (list, tuple))
        self._output_shape = output_shape

    @property
    def parameter_quantity(self):
        # return self.parameters_quantity
        total_parameter_quantity = self._parameter_quantity
        # for child in self.children:
        #     total_parameter_quantity += child.parameter_quantity
        return total_parameter_quantity

    @parameter_quantity.setter
    def parameter_quantity(self, parameter_quantity):
        assert parameter_quantity >= 0
        self._parameter_quantity = parameter_quantity

    @property
    def inference_memory(self):
        total_inference_memory = self._inference_memory
        for child in self.children:
            total_inference_memory += child.inference_memory
        return total_inference_memory

    @inference_memory.setter
    def inference_memory(self, inference_memory):
        self._inference_memory = inference_memory

    @property
    def MAdd(self):
        total_MAdd = self._MAdd
        # for child in self.children:
        #     total_MAdd += child.MAdd
        return total_MAdd

    @MAdd.setter
    def MAdd(self, MAdd):
        self._MAdd = MAdd

    @property
    def Flops(self):
        total_Flops = self._Flops
        # for child in self.children:
        #     total_Flops += child.Flops
        return total_Flops

    @Flops.setter
    def Flops(self, Flops):
        self._Flops = Flops

    @property
    def Memory(self):
        total_Memory = self._Memory
        # for child in self.children:
        #     total_Memory[0] += child.Memory[0]
        #     total_Memory[1] += child.Memory[1]
            # print(total_Memory)
        return total_Memory

    @Memory.setter
    def Memory(self, Memory):
        assert isinstance(Memory, (list, tuple))
        self._Memory = Memory

    @property
    def duration(self):
        total_duration = self._duration
        # for child in self.children:
        #     total_duration += child.duration
        return total_duration

    @duration.setter
    def duration(self, duration):
        self._duration = duration

    def find_child_index(self, child_name):
        assert isinstance(child_name, str)

        index = -1
        for i in range(len(self.children)):
            if child_name == self.children[i].name:
                index = i
        return index

    def add_child(self, node):
        assert isinstance(node, StatNode)

        if self.find_child_index(node.name) == -1:  # not exist
            self.children.append(node)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/torchstat/statistics.py
================================================
import torch
import torch.nn as nn
from torchstat import ModelHook
from collections import OrderedDict
from torchstat import StatTree, StatNode, report_format


def get_parent_node(root_node, stat_node_name):
    assert isinstance(root_node, StatNode)

    node = root_node
    names = stat_node_name.split('.')
    for i in range(len(names) - 1):
        node_name = '.'.join(names[0:i+1])
        child_index = node.find_child_index(node_name)
        assert child_index != -1
        node = node.children[child_index]
    return node


def convert_leaf_modules_to_stat_tree(leaf_modules):
    assert isinstance(leaf_modules, OrderedDict)

    create_index = 1
    root_node = StatNode(name='root', parent=None)
    for leaf_module_name, leaf_module in leaf_modules.items():
        if 'model.body.decoder.layers.0.self_attn' in leaf_module_name:
            print("111", leaf_module_name, leaf_module.__class__.__name__)
        names = leaf_module_name.split('.')
        for i in range(len(names)):
            create_index += 1
            stat_node_name = '.'.join(names[0:i+1])
            parent_node = get_parent_node(root_node, stat_node_name)
            node = StatNode(name=stat_node_name, mtype=leaf_module.__base__ if hasattr(leaf_module, '__base__') else leaf_module.__class__.__name__, parent=parent_node)#.__class__.__name__
            parent_node.add_child(node)
            if i == len(names) - 1:  # leaf module itself
                input_shape = leaf_module.input_shape.numpy().tolist()
                output_shape = leaf_module.output_shape.numpy().tolist()
                node.input_shape = input_shape
                node.output_shape = output_shape
                node.parameter_quantity = leaf_module.parameter_quantity.numpy()[0]
                node.inference_memory = leaf_module.inference_memory.numpy()[0]
                node.MAdd = leaf_module.MAdd.numpy()[0]
                node.Flops = leaf_module.Flops.numpy()[0]
                node.duration = leaf_module.duration.numpy()[0]
                node.Memory = leaf_module.Memory.numpy().tolist()
    return StatTree(root_node)


class ModelStat(object):
    def __init__(self, model, input_size, query_granularity=1, debug_layers=[]):
        assert isinstance(model, nn.Module)
        # assert isinstance(input_size, (tuple, list)) and len(input_size) == 3
        self._model = model
        self._input_size = input_size
        self._query_granularity = query_granularity
        self.debug_layers = debug_layers

    def _analyze_model(self):
        model_hook = ModelHook(self._model, self._input_size, debug_layers=self.debug_layers)
        leaf_modules = model_hook.retrieve_leaf_modules()
        stat_tree = convert_leaf_modules_to_stat_tree(leaf_modules)
        collected_nodes = stat_tree.get_collected_stat_nodes(self.debug_layers, self._query_granularity)
        model_hook.clear_hooks()
        return collected_nodes

    def show_report(self):
        collected_nodes = self._analyze_model()
        report = report_format(collected_nodes)
        print(report)

def stat(model, input_size, query_granularity=1, debug_layers=["MSA", "SwinTEB", "XCTEB", "MSA_BNC", 'cGCN', 'sGCN']):
    ms = ModelStat(model, input_size, query_granularity, debug_layers)
    ms.show_report()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/auxiliary/utils.py
================================================
import os
import datetime
import torch
import psutil
from collections import defaultdict, deque
import time
import sys
sys.path.append('../..')
sys.path.append('../mmcv')
from mmcv.utils.logging import print_log
import numpy as np
import random
import torch.backends.cudnn as cudnn
import torch.distributed as dist
from functools import partial

def get_dist_info():
    if dist.is_available() and dist.is_initialized():
        rank = dist.get_rank()
        world_size = dist.get_world_size()
    else:
        rank = 0
        world_size = 1
    return rank, world_size

def init_random_seed(seed=None, device='cuda'):
    """Initialize random seed.

    If the seed is not set, the seed will be automatically randomized,
    and then broadcast to all processes to prevent some potential bugs.

    Args:
        seed (int, Optional): The seed. Default to None.
        device (str): The device where the seed will be put on.
            Default to 'cuda'.

    Returns:
        int: Seed to be used.
    """
    if seed is not None:
        return seed

    # Make sure all ranks share the same random seed to prevent
    # some potential bugs. Please refer to
    # https://github.com/open-mmlab/mmdetection/issues/6339
    rank, world_size = get_dist_info()
    seed = np.random.randint(2**31)
    if world_size == 1:
        return seed

    if rank == 0:
        random_num = torch.tensor(seed, dtype=torch.int32, device=device)
    else:
        random_num = torch.tensor(0, dtype=torch.int32, device=device)
    dist.broadcast(random_num, src=0)
    return random_num.item()

def set_random_seed(seed, deterministic=True):
    """Set random seed.

    Args:
        seed (int): Seed to be used.
        deterministic (bool): Whether to set the deterministic option for
            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
            to True and `torch.backends.cudnn.benchmark` to False.
            Default: False.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    if deterministic:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# def set_random_seed(seed):
#     np.random.seed(seed)
#     random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     torch.cuda.manual_seed_all(seed)
#     cudnn.deterministic = True


def show_memory_info(hint):
    pid = os.getpid()
    p = psutil.Process(pid)

    info = p.memory_full_info()
    memory = info.uss / 1024. / 1024
    print('{} memory used: {} MB'.format(hint, memory))


# class OrderedAverageMeter(object):
#     def __init__(self):


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self, name=None, fmt=":f"):
        # self.name = name
        # self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    # def __str__(self):
    #     fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
    #     return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


# class logger():
#     def __init__(self, obj, LOG_DIR, parser):
#         logname = 'log_train' + datetime.datetime.now().strftime('%Y_%m_%d-%H_%M_%S')+'.txt'
#         self.LOG_FOUT = open(os.path.join(LOG_DIR, logname), 'w')
#         self.LOG_FOUT.write(str(parser)+'\n')
#     def __call__(self, out_str):
#          self.LOG_FOUT.write(out_str+'\n')
#          self.LOG_FOUT.flush()
#          print(out_str)

def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None, eval=False):
        if fmt is None:
            if not eval:
                fmt = "{value:.7f} (avg:{avg:.7f})"
            else:
                fmt = "{value:.7f} (avg:{avg:.7f}, std:{std:.7f})"
        self.reset(window_size)
        self.fmt = fmt

    def reset(self, window_size):
        self.deque = deque(maxlen=window_size)
        self.val = 0
        self.avg = 0
        self.total = 0
        self.count = 0

    def update(self, value, n=1):
        self.deque.append(value)
        self.val = value
        self.count += n
        self.total += value * n
        self.avg = self.total / self.count

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.val, self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.val = t[0]
        self.count = int(t[1])
        self.total = t[2]
        self.avg = self.total / self.count

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def std(self):
        return torch.tensor(list(self.deque)).std().item()

    # @property
    # def avg(self):
    #     d = torch.tensor(list(self.deque), dtype=torch.float32)
    #     return d.mean().item()

    # @property
    # def global_avg(self):
    #     return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    #
    # @property
    # def value(self):
    #     return self.deque[-1]

    def __str__(self):
        # return self.fmt.format(
        #     median=self.median,
        #     avg=self.avg,
        #     global_avg=self.global_avg,
        #     max=self.max,
        #     value=self.value)
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            max=self.max,
            value=self.val,
            std=self.std)


class MetricLogger(object):


    def __init__(self, logger=None, delimiter="\t", dist_print=0, window_size=20, eval=False):
        self.meters = defaultdict(partial(SmoothedValue, window_size=window_size, eval=eval))
        self.delimiter = delimiter
        self.dist_print = dist_print
        # self.log = get_root_logger("UDL")
        # self.logger = logger

    # {k:v}打印，对每个k都有val、avg、max、deque属性
    def update(self, **kwargs):
        # dist.barrier()
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = torch.mean(v)
                if hasattr(v, 'item'):
                    v = v.item()
            assert isinstance(v, (float, int, str)), print("type: ", type(v))
            self.meters[k].update(v)

    # {k:v}打印，对每个k都有val、avg、max、deque属性
    def update_dict(self, kwargs: dict):
        # dist.barrier()
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = torch.mean(v)
                if hasattr(v, 'item'):
                    v = v.item()
            assert isinstance(v, (float, int, str)), print("type: ", type(v))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def clear(self):
        self.meters.clear()

    def log_every(self, iterable, print_freq, header=None):
        i = 1
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}MB'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        # log_string = self.logger.info
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj, i
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable):
                eta_seconds = iter_time.avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    if self.dist_print == 0:
                        print_log(log_msg.format(
                            i, len(iterable), eta=eta_string,
                            meters=str(self),
                            time=str(iter_time), data=str(data_time),
                            memory=torch.cuda.max_memory_allocated() / MB))

                else:
                    print_log(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        if self.dist_print == 0:
            print_log('{} Total time: {} ({:.4f} s / it)'.format(
                header, total_time_str, total_time / len(iterable)))


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/cal_ssim.py
================================================
# GPL License
# Copyright (C) UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import torch
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from math import exp

def gaussian(window_size, sigma):
    gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
    return gauss/gauss.sum()

def create_window(window_size, channel, sigma=1.5):
    _1D_window = gaussian(window_size, sigma).unsqueeze(1)
    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
    window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
    return window

def _ssim(img1, img2, window, window_size, channel, size_average = True):
    mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel)
    mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel)

    mu1_sq = mu1.pow(2)
    mu2_sq = mu2.pow(2)
    mu1_mu2 = mu1*mu2

    sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq
    sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq
    sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2

    C1 = 0.01**2
    C2 = 0.03**2

    ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2))

    if size_average:
        return ssim_map.mean()
    else:
        return ssim_map.mean(1).mean(1).mean(1)

class SSIM(torch.nn.Module):
    def __init__(self, win_size=11, win_sigma=1.5, data_range=1, size_average=True, channel=3):
        super(SSIM, self).__init__()
        self.window_size = win_size
        self.size_average = size_average
        self.channel = channel
        self.window = create_window(win_size, self.channel, win_sigma)
        self.win_sigma = win_sigma

    def forward(self, img1, img2):
        #print(img1.size())
        (_, channel, _, _) = img1.size()

        if channel == self.channel and self.window.data.type() == img1.data.type():
            window = self.window
        else:
            window = create_window(self.window_size, channel, self.win_sigma)
            
            if img1.is_cuda:
                window = window.cuda(img1.get_device())
            window = window.type_as(img1)
            
            self.window = window
            self.channel = channel


        return _ssim(img1, img2, window, self.window_size, channel, self.size_average)

def ssim(img1, img2, win_size = 11, data_range=1, size_average = True):
    (_, channel, _, _) = img1.size()
    window = create_window(win_size, channel)
    
    if img1.is_cuda:
        window = window.cuda(img1.get_device())
    window = window.type_as(img1)
    
    return _ssim(img1, img2, window, win_size, channel, size_average)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/config.py
================================================
# Copyright (c) Open-MMLab. All rights reserved.
# GPL License
# Copyright (C) UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import ast
import copy
import os
import os.path as osp
import platform
import shutil
import sys
import tempfile
import uuid
import warnings
from argparse import Action, ArgumentParser, Namespace
from collections import abc
from importlib import import_module

from addict import Dict
from yapf.yapflib.yapf_api import FormatCode

# from .misc import import_modules_from_strings
# from .path import check_file_exist


def import_modules_from_strings(imports, allow_failed_imports=False):
    """Import modules from the given list of strings.

    Args:
        imports (list | str | None): The given module names to be imported.
        allow_failed_imports (bool): If True, the failed imports will return
            None. Otherwise, an ImportError is raise. Default: False.

    Returns:
        list[module] | module | None: The imported modules.

    Examples:
        >>> osp, sys = import_modules_from_strings(
        ...     ['os.path', 'sys'])
        >>> import os.path as osp_
        >>> import sys as sys_
        >>> assert osp == osp_
        >>> assert sys == sys_
    """
    if not imports:
        return
    single_import = False
    if isinstance(imports, str):
        single_import = True
        imports = [imports]
    if not isinstance(imports, list):
        raise TypeError(
            f'custom_imports must be a list but got type {type(imports)}')
    imported = []
    for imp in imports:
        if not isinstance(imp, str):
            raise TypeError(
                f'{imp} is of type {type(imp)} and cannot be imported.')
        try:
            imported_tmp = import_module(imp)
        except ImportError:
            if allow_failed_imports:
                warnings.warn(f'{imp} failed to import and is ignored.',
                              UserWarning)
                imported_tmp = None
            else:
                raise ImportError
        imported.append(imported_tmp)
    if single_import:
        imported = imported[0]
    return imported


def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
    if not osp.isfile(filename):
        raise FileNotFoundError(msg_tmpl.format(filename))

if platform.system() == 'Windows':
    import regex as re
else:
    import re

BASE_KEY = '_base_'
DELETE_KEY = '_delete_'
RESERVED_KEYS = ['filename', 'text', 'pretty_text']


class ConfigDict(Dict):

    def __missing__(self, name):
        raise KeyError(name)

    def __getattr__(self, name):
        try:
            value = super(ConfigDict, self).__getattr__(name)
        except KeyError:
            ex = AttributeError(f"'{self.__class__.__name__}' object has no "
                                f"attribute '{name}'")
        except Exception as e:
            ex = e
        else:
            return value
        raise ex


def add_args(parser, cfg, prefix=''):
    for k, v in cfg.items():
        if isinstance(v, str):
            parser.add_argument('--' + prefix + k)
        elif isinstance(v, int):
            parser.add_argument('--' + prefix + k, type=int)
        elif isinstance(v, float):
            parser.add_argument('--' + prefix + k, type=float)
        elif isinstance(v, bool):
            parser.add_argument('--' + prefix + k, action='store_true')
        elif isinstance(v, dict):
            add_args(parser, v, prefix + k + '.')
        elif isinstance(v, abc.Iterable):
            parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+')
        else:
            print(f'cannot parse key {prefix + k} of type {type(v)}')
    return parser


class Config:
    """A facility for config and config files.

    It supports common file formats as configs: python/json/yaml. The interface
    is the same as a dict object and also allows access config values as
    attributes.

    Example:
        >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1])))
        >>> cfg.a
        1
        >>> cfg.b
        {'b1': [0, 1]}
        >>> cfg.b.b1
        [0, 1]
        >>> cfg = Config.fromfile('tests/data/config/a.py')
        >>> cfg.filename
        "/home/kchen/projects/mmcv/tests/data/config/a.py"
        >>> cfg.item4
        'test'
        >>> cfg
        "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: "
        "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}"
    """

    @staticmethod
    def _validate_py_syntax(filename):
        with open(filename, 'r', encoding='utf-8') as f:
            # Setting encoding explicitly to resolve coding issue on windows
            content = f.read()
        try:
            ast.parse(content)
        except SyntaxError as e:
            raise SyntaxError('There are syntax errors in config '
                              f'file {filename}: {e}')

    @staticmethod
    def _substitute_predefined_vars(filename, temp_config_name):
        file_dirname = osp.dirname(filename)
        file_basename = osp.basename(filename)
        file_basename_no_extension = osp.splitext(file_basename)[0]
        file_extname = osp.splitext(filename)[1]
        support_templates = dict(
            fileDirname=file_dirname,
            fileBasename=file_basename,
            fileBasenameNoExtension=file_basename_no_extension,
            fileExtname=file_extname)
        with open(filename, 'r', encoding='utf-8') as f:
            # Setting encoding explicitly to resolve coding issue on windows
            config_file = f.read()
        for key, value in support_templates.items():
            regexp = r'\{\{\s*' + str(key) + r'\s*\}\}'
            value = value.replace('\\', '/')
            config_file = re.sub(regexp, value, config_file)
        with open(temp_config_name, 'w') as tmp_config_file:
            tmp_config_file.write(config_file)

    @staticmethod
    def _pre_substitute_base_vars(filename, temp_config_name):
        """Substitute base variable placehoders to string, so that parsing
        would work."""
        with open(filename, 'r', encoding='utf-8') as f:
            # Setting encoding explicitly to resolve coding issue on windows
            config_file = f.read()
        base_var_dict = {}
        regexp = r'\{\{\s*' + BASE_KEY + r'\.([\w\. ]+)\s*\}\}'
        base_vars = set(re.findall(regexp, config_file))
        for base_var in base_vars:
            randstr = f'_{base_var}_{uuid.uuid4().hex.lower()[:6]}'
            base_var_dict[randstr] = base_var
            regexp = r'\{\{\s*' + BASE_KEY + r'\.' + base_var + r'\s*\}\}'
            config_file = re.sub(regexp, f'"{randstr}"', config_file)
        with open(temp_config_name, 'w') as tmp_config_file:
            tmp_config_file.write(config_file)
        return base_var_dict

    @staticmethod
    def _substitute_base_vars(cfg, base_var_dict, base_cfg):
        """Substitute variable strings to their actual values."""
        cfg = copy.deepcopy(cfg)

        if isinstance(cfg, dict):
            for k, v in cfg.items():
                if isinstance(v, str) and v in base_var_dict:
                    new_v = base_cfg
                    for new_k in base_var_dict[v].split('.'):
                        new_v = new_v[new_k]
                    cfg[k] = new_v
                elif isinstance(v, (list, tuple, dict)):
                    cfg[k] = Config._substitute_base_vars(
                        v, base_var_dict, base_cfg)
        elif isinstance(cfg, tuple):
            cfg = tuple(
                Config._substitute_base_vars(c, base_var_dict, base_cfg)
                for c in cfg)
        elif isinstance(cfg, list):
            cfg = [
                Config._substitute_base_vars(c, base_var_dict, base_cfg)
                for c in cfg
            ]
        elif isinstance(cfg, str) and cfg in base_var_dict:
            new_v = base_cfg
            for new_k in base_var_dict[cfg].split('.'):
                new_v = new_v[new_k]
            cfg = new_v

        return cfg

    @staticmethod
    def _file2dict(filename, use_predefined_variables=True):
        filename = osp.abspath(osp.expanduser(filename))
        check_file_exist(filename)
        fileExtname = osp.splitext(filename)[1]
        if fileExtname not in ['.py', '.json', '.yaml', '.yml']:
            raise IOError('Only py/yml/yaml/json type are supported now!')

        with tempfile.TemporaryDirectory() as temp_config_dir:
            temp_config_file = tempfile.NamedTemporaryFile(
                dir=temp_config_dir, suffix=fileExtname)
            if platform.system() == 'Windows':
                temp_config_file.close()
            temp_config_name = osp.basename(temp_config_file.name)
            # Substitute predefined variables
            if use_predefined_variables:
                Config._substitute_predefined_vars(filename,
                                                   temp_config_file.name)
            else:
                shutil.copyfile(filename, temp_config_file.name)
            # Substitute base variables from placeholders to strings
            base_var_dict = Config._pre_substitute_base_vars(
                temp_config_file.name, temp_config_file.name)

            if filename.endswith('.py'):
                temp_module_name = osp.splitext(temp_config_name)[0]
                sys.path.insert(0, temp_config_dir)
                Config._validate_py_syntax(filename)
                mod = import_module(temp_module_name)
                sys.path.pop(0)
                cfg_dict = {}
                for name, value in mod.__dict__.items():
                    if not name.startswith('__'):
                        if callable(value):
                            name = 'data'
                        cfg_dict.update({
                            name: value
                        })
                # cfg_dict = {name: value for name, value in mod.__dict__.items() if not name.startswith('__')}
                # delete imported module
                del sys.modules[temp_module_name]
            elif filename.endswith(('.yml', '.yaml', '.json')):
                import mmcv
                cfg_dict = mmcv.load(temp_config_file.name)
            # close temp file
            temp_config_file.close()

        cfg_text = filename + '\n'
        with open(filename, 'r', encoding='utf-8') as f:
            # Setting encoding explicitly to resolve coding issue on windows
            cfg_text += f.read()

        if BASE_KEY in cfg_dict:
            cfg_dir = osp.dirname(filename)
            base_filename = cfg_dict.pop(BASE_KEY)
            base_filename = base_filename if isinstance(
                base_filename, list) else [base_filename]

            cfg_dict_list = list()
            cfg_text_list = list()
            for f in base_filename:
                _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f))
                cfg_dict_list.append(_cfg_dict)
                cfg_text_list.append(_cfg_text)

            base_cfg_dict = dict()
            for c in cfg_dict_list:
                if len(base_cfg_dict.keys() & c.keys()) > 0:
                    raise KeyError('Duplicate key is not allowed among bases')
                base_cfg_dict.update(c)

            # Subtitute base variables from strings to their actual values
            cfg_dict = Config._substitute_base_vars(cfg_dict, base_var_dict,
                                                    base_cfg_dict)

            base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict)
            cfg_dict = base_cfg_dict

            # merge cfg_text
            cfg_text_list.append(cfg_text)
            cfg_text = '\n'.join(cfg_text_list)

        return cfg_dict, cfg_text

    @staticmethod
    def _merge_a_into_b(a, b, allow_list_keys=False):
        """merge dict ``a`` into dict ``b`` (non-inplace).

        Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid
        in-place modifications.

        Args:
            a (dict): The source dict to be merged into ``b``.
            b (dict): The origin dict to be fetch keys from ``a``.
            allow_list_keys (bool): If True, int string keys (e.g. '0', '1')
              are allowed in source ``a`` and will replace the element of the
              corresponding index in b if b is a list. Default: False.

        Returns:
            dict: The modified dict of ``b`` using ``a``.

        Examples:
            # Normally merge a into b.
            >>> Config._merge_a_into_b(
            ...     dict(obj=dict(a=2)), dict(obj=dict(a=1)))
            {'obj': {'a': 2}}

            # Delete b first and merge a into b.
            >>> Config._merge_a_into_b(
            ...     dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1)))
            {'obj': {'a': 2}}

            # b is a list
            >>> Config._merge_a_into_b(
            ...     {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True)
            [{'a': 2}, {'b': 2}]
        """
        b = b.copy()
        for k, v in a.items():
            if allow_list_keys and k.isdigit() and isinstance(b, list):
                k = int(k)
                if len(b) <= k:
                    raise KeyError(f'Index {k} exceeds the length of list {b}')
                b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys)
            elif isinstance(v,
                            dict) and k in b and not v.pop(DELETE_KEY, False):
                allowed_types = (dict, list) if allow_list_keys else dict
                if not isinstance(b[k], allowed_types):
                    raise TypeError(
                        f'{k}={v} in child config cannot inherit from base '
                        f'because {k} is a dict in the child config but is of '
                        f'type {type(b[k])} in base config. You may set '
                        f'`{DELETE_KEY}=True` to ignore the base config')
                b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys)
            else:
                b[k] = v
        return b

    @staticmethod
    def fromfile(filename,
                 use_predefined_variables=True,
                 import_custom_modules=True):
        cfg_dict, cfg_text = Config._file2dict(filename,
                                               use_predefined_variables)
        if import_custom_modules and cfg_dict.get('custom_imports', None):
            import_modules_from_strings(**cfg_dict['custom_imports'])
        return Config(cfg_dict, cfg_text=cfg_text, filename=filename)

    @staticmethod
    def fromstring(cfg_str, file_format):
        """Generate config from config str.

        Args:
            cfg_str (str): Config str.
            file_format (str): Config file format corresponding to the
               config str. Only py/yml/yaml/json type are supported now!

        Returns:
            obj:`Config`: Config obj.
        """
        if file_format not in ['.py', '.json', '.yaml', '.yml']:
            raise IOError('Only py/yml/yaml/json type are supported now!')
        if file_format != '.py' and 'dict(' in cfg_str:
            # check if users specify a wrong suffix for python
            warnings.warn(
                'Please check "file_format", the file format may be .py')
        with tempfile.NamedTemporaryFile(
                'w', suffix=file_format, delete=False) as temp_file:
            temp_file.write(cfg_str)
            # on windows, previous implementation cause error
            # see PR 1077 for details
        cfg = Config.fromfile(temp_file.name)
        os.remove(temp_file.name)
        return cfg

    @staticmethod
    def auto_argparser(description=None):
        """Generate argparser from config file automatically (experimental)"""
        partial_parser = ArgumentParser(description=description)
        partial_parser.add_argument('--config', help='config file path', default="../../dev/config_detr.yml")
        cfg_file = partial_parser.parse_known_args()[0].config
        cfg = Config.fromfile(cfg_file)
        parser = ArgumentParser(description=description)
        parser.add_argument('config', help='config file path')
        add_args(parser, cfg)
        return parser, cfg

    @staticmethod
    def fromargparse(args):
        cfg_dict = {}
        for k, v in args._get_kwargs():
            cfg_dict.update({k: v})
        return cfg_dict

    def merge_args2cfg(self, args, allow_list_keys=True):

        cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
        option_cfg_dict = self.fromargparse(args) #cfg_dict
        super(Config, self).__setattr__(
            '_cfg_dict',
            Config._merge_a_into_b(
                option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys))


    def __init__(self, cfg_dict=None, cfg_text=None, filename=None):
        if cfg_dict is None:
            cfg_dict = dict()
        elif isinstance(cfg_dict, Namespace):
            cfg_dict = self.fromargparse(cfg_dict)
        elif not isinstance(cfg_dict, (dict, Namespace)):
            raise TypeError('cfg_dict must be a dict or Namespace, but '
                            f'got {type(cfg_dict)}')
        for key in cfg_dict:
            if key in RESERVED_KEYS:
                raise KeyError(f'{key} is reserved for config file')

        super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict))
        super(Config, self).__setattr__('_filename', filename)
        if cfg_text:
            text = cfg_text
        elif filename:
            with open(filename, 'r') as f:
                text = f.read()
        else:
            text = ''
        super(Config, self).__setattr__('_text', text)

    @property
    def filename(self):
        return self._filename

    @property
    def text(self):
        return self._text

    @property
    def pretty_text(self):

        indent = 4

        def _indent(s_, num_spaces):
            s = s_.split('\n')
            if len(s) == 1:
                return s_
            first = s.pop(0)
            s = [(num_spaces * ' ') + line for line in s]
            s = '\n'.join(s)
            s = first + '\n' + s
            return s

        def _format_basic_types(k, v, use_mapping=False):
            if isinstance(v, str):
                v_str = f"'{v}'"
            else:
                v_str = str(v)

            if use_mapping:
                k_str = f"'{k}'" if isinstance(k, str) else str(k)
                attr_str = f'{k_str}: {v_str}'
            else:
                attr_str = f'{str(k)}={v_str}'
            attr_str = _indent(attr_str, indent)

            return attr_str

        def _format_list(k, v, use_mapping=False):
            # check if all items in the list are dict
            if all(isinstance(_, dict) for _ in v):
                v_str = '[\n'
                v_str += '\n'.join(
                    f'dict({_indent(_format_dict(v_), indent)}),'
                    for v_ in v).rstrip(',')
                if use_mapping:
                    k_str = f"'{k}'" if isinstance(k, str) else str(k)
                    attr_str = f'{k_str}: {v_str}'
                else:
                    attr_str = f'{str(k)}={v_str}'
                attr_str = _indent(attr_str, indent) + ']'
            else:
                attr_str = _format_basic_types(k, v, use_mapping)
            return attr_str

        def _contain_invalid_identifier(dict_str):
            contain_invalid_identifier = False
            for key_name in dict_str:
                contain_invalid_identifier |= \
                    (not str(key_name).isidentifier())
            return contain_invalid_identifier

        def _format_dict(input_dict, outest_level=False):
            r = ''
            s = []

            use_mapping = _contain_invalid_identifier(input_dict)
            if use_mapping:
                r += '{'
            for idx, (k, v) in enumerate(input_dict.items()):
                is_last = idx >= len(input_dict) - 1
                end = '' if outest_level or is_last else ','
                if isinstance(v, dict):
                    v_str = '\n' + _format_dict(v)
                    if use_mapping:
                        k_str = f"'{k}'" if isinstance(k, str) else str(k)
                        attr_str = f'{k_str}: dict({v_str}'
                    else:
                        attr_str = f'{str(k)}=dict({v_str}'
                    attr_str = _indent(attr_str, indent) + ')' + end
                elif isinstance(v, list):
                    attr_str = _format_list(k, v, use_mapping) + end
                else:
                    attr_str = _format_basic_types(k, v, use_mapping) + end

                s.append(attr_str)
            r += '\n'.join(s)
            if use_mapping:
                r += '}'
            return r

        cfg_dict = self._cfg_dict.to_dict()
        text = _format_dict(cfg_dict, outest_level=True)
        # copied from setup.cfg
        yapf_style = dict(
            based_on_style='pep8',
            blank_line_before_nested_class_or_def=True,
            split_before_expression_after_opening_paren=True)
        text, _ = FormatCode(text.replace('\\', '/'), style_config=yapf_style, verify=True)

        return text

    def __repr__(self):
        return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}'

    def __len__(self):
        return len(self._cfg_dict)

    def __getattr__(self, name):
        return getattr(self._cfg_dict, name)

    def __delattr__(self, name):
        return delattr(self._cfg_dict, name)

    def __getitem__(self, name):
        return self._cfg_dict.__getitem__(name)

    def __setattr__(self, name, value):
        if isinstance(value, dict):
            value = ConfigDict(value)
        self._cfg_dict.__setattr__(name, value)

    def __setitem__(self, name, value):
        if isinstance(value, dict):
            value = ConfigDict(value)
        self._cfg_dict.__setitem__(name, value)

    def __iter__(self):
        return iter(self._cfg_dict)

    def __getstate__(self):
        return (self._cfg_dict, self._filename, self._text)

    def __setstate__(self, state):
        _cfg_dict, _filename, _text = state
        super(Config, self).__setattr__('_cfg_dict', _cfg_dict)
        super(Config, self).__setattr__('_filename', _filename)
        super(Config, self).__setattr__('_text', _text)

    def dump(self, file=None):
        cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict()
        if self.filename.endswith('.py'):
            if file is None:
                return self.pretty_text
            else:
                with open(file, 'w') as f:
                    f.write(self.pretty_text)
        else:
            import mmcv
            if file is None:
                file_format = self.filename.split('.')[-1]
                return mmcv.dump(cfg_dict, file_format=file_format)
            else:
                mmcv.dump(cfg_dict, file)

    def merge_from_dict(self, options, allow_list_keys=True):
        """Merge list into cfg_dict.

        Merge the dict parsed by MultipleKVAction into this cfg.

        Examples:
            >>> options = {'model.backbone.depth': 50,
            ...            'model.backbone.with_cp':True}
            >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet'))))
            >>> cfg.merge_from_dict(options)
            >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
            >>> assert cfg_dict == dict(
            ...     model=dict(backbone=dict(depth=50, with_cp=True)))

            # Merge list element
            >>> cfg = Config(dict(pipeline=[
            ...     dict(type='LoadImage'), dict(type='LoadAnnotations')]))
            >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')})
            >>> cfg.merge_from_dict(options, allow_list_keys=True)
            >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
            >>> assert cfg_dict == dict(pipeline=[
            ...     dict(type='SelfLoadImage'), dict(type='LoadAnnotations')])

        Args:
            options (dict): dict of configs to merge from.
            allow_list_keys (bool): If True, int string keys (e.g. '0', '1')
              are allowed in ``options`` and will replace the element of the
              corresponding index in the config if the config is a list.
              Default: True.
        """
        option_cfg_dict = {}
        for full_key, v in options.items():
            d = option_cfg_dict
            key_list = full_key.split('.')
            for subkey in key_list[:-1]:
                d.setdefault(subkey, ConfigDict())
                d = d[subkey]
            subkey = key_list[-1]
            d[subkey] = v

        cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
        super(Config, self).__setattr__(
            '_cfg_dict',
            Config._merge_a_into_b(
                option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys))


class DictAction(Action):
    """
    argparse action to split an argument into KEY=VALUE form
    on the first = and append to a dictionary. List options can
    be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit
    brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build
    list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]'
    """

    @staticmethod
    def _parse_int_float_bool(val):
        try:
            return int(val)
        except ValueError:
            pass
        try:
            return float(val)
        except ValueError:
            pass
        if val.lower() in ['true', 'false']:
            return True if val.lower() == 'true' else False
        return val

    @staticmethod
    def _parse_iterable(val):
        """Parse iterable values in the string.

        All elements inside '()' or '[]' are treated as iterable values.

        Args:
            val (str): Value string.

        Returns:
            list | tuple: The expanded list or tuple from the string.

        Examples:
            >>> DictAction._parse_iterable('1,2,3')
            [1, 2, 3]
            >>> DictAction._parse_iterable('[a, b, c]')
            ['a', 'b', 'c']
            >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]')
            [(1, 2, 3), ['a', 'b'], 'c']
        """

        def find_next_comma(string):
            """Find the position of next comma in the string.

            If no ',' is found in the string, return the string length. All
            chars inside '()' and '[]' are treated as one element and thus ','
            inside these brackets are ignored.
            """
            assert (string.count('(') == string.count(')')) and (
                    string.count('[') == string.count(']')), \
                f'Imbalanced brackets exist in {string}'
            end = len(string)
            for idx, char in enumerate(string):
                pre = string[:idx]
                # The string before this ',' is balanced
                if ((char == ',') and (pre.count('(') == pre.count(')'))
                        and (pre.count('[') == pre.count(']'))):
                    end = idx
                    break
            return end

        # Strip ' and " characters and replace whitespace.
        val = val.strip('\'\"').replace(' ', '')
        is_tuple = False
        if val.startswith('(') and val.endswith(')'):
            is_tuple = True
            val = val[1:-1]
        elif val.startswith('[') and val.endswith(']'):
            val = val[1:-1]
        elif ',' not in val:
            # val is a single value
            return DictAction._parse_int_float_bool(val)

        values = []
        while len(val) > 0:
            comma_idx = find_next_comma(val)
            element = DictAction._parse_iterable(val[:comma_idx])
            values.append(element)
            val = val[comma_idx + 1:]
        if is_tuple:
            values = tuple(values)
        return values

    def __call__(self, parser, namespace, values, option_string=None):
        options = {}
        for kv in values:
            key, val = kv.split('=', maxsplit=1)
            options[key] = self._parse_iterable(val)
        setattr(namespace, self.dest, options)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/criterion_metrics.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
from torch import nn
import torch
from torch import distributed as dist
from collections import OrderedDict

class SetCriterion(nn.Module):
    """ This class computes the loss for DETR.
    The process happens in two steps:
        1) we compute hungarian assignment between ground truth boxes and the outputs of the model
        2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
    """

    def __init__(self, losses, weight_dict):
        """ Create the criterion.
        Parameters:
            num_classes: n able to compute a matching between targets and proposals
            weight_dict: dict containing as key the names of the losses and as values their relative weight.
            eos_coef: relatiumber of object categories, omitting the special no-object category
            matcher: moduleve classification weight applied to the no-object category
            losses: list of all the losses to be applied. See get_loss for list of available losses.
        """
        super().__init__()
        self.weight_dict = weight_dict
        self.losses = losses
        self.loss_dicts = {}

    def forward(self, outputs, targets, *args, **kwargs):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        # Compute all the requested losses
        for k in self.losses.keys():
            # k, loss = loss_dict
            if k == 'loss':
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets)})
            else:
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets, *args)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets, *args))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets, *args)})

        return self.loss_dicts#self._parse_losses(self.loss_dicts)

    def _parse_losses(self, losses):
        log_vars = OrderedDict()
        for loss_name, loss_value in losses.items():
            if isinstance(loss_value, torch.Tensor):
                log_vars[loss_name] = loss_value.mean()
            elif isinstance(loss_value, list):
                log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
            # top-1, top-5 both belong to accuracy
            elif isinstance(loss_value, dict):
                # log_vars[loss_name] = {}
                for name, value in loss_value.items():
                    log_vars[name] = value
                    # log_vars[loss_name].update({name: value.item()})
            else:
                raise TypeError(
                    f'{loss_name} is not a tensor or list of tensors')

        assert 'loss' not in log_vars.keys(), KeyError("key: 'loss' can't be set from cfg_file.")
        loss = sum(_value for _key, _value in log_vars.items()
                   if 'top' not in _key)#if 'loss' in _key
        log_vars['loss'] = loss
        # output = log_vars.pop('acc') #get
        for loss_name, loss_value in log_vars.items():
            # reduce loss when distributed training
            if dist.is_available() and dist.is_initialized():
                loss_value = loss_value.data.clone()
                dist.all_reduce(loss_value.div_(dist.get_world_size()))
            log_vars[loss_name] = loss_value.item()
        # log_vars.update(acc=output)

        return loss, log_vars

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/dist_utils.py
================================================
import os
import subprocess
import torch
from torch import nn
import torch.multiprocessing as mp
from torch import distributed as dist
from torch.nn.parallel.distributed import DistributedDataParallel
from logging import info as log_string
try:
    from apex.parallel.distributed import DistributedDataParallel as DDP
except:
    Warning("No module named 'apex")

def scaled_all_reduce(tensors):
    """Performs the scaled all_reduce operation on the provided tensors.
    The input tensors are modified in-place. Currently supports only the sum
    reduction operator. The reduced values are scaled by the inverse size of the
    process group.
    """
    # There is no need for reduction in the single-proc case
    gpus = dist.get_world_size()
    if gpus == 1:
        return tensors
    # Queue the reductions
    reductions = []
    for tensor in tensors:
        reduction = dist.all_reduce(tensor, async_op=True)
        reductions.append(reduction)
    # Wait for reductions to finish
    for reduction in reductions:
        reduction.wait()
    # Scale the results
    for tensor in tensors:
        tensor.mul_(1.0 / gpus)
    return tensors

def init_dist(launcher, args, backend='nccl', **kwargs):
    if 'LOCAL_RANK' not in os.environ:
        os.environ['LOCAL_RANK'] = str(args.local_rank)
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    if launcher == 'pytorch':
        _init_dist_pytorch(backend, **kwargs)
    elif launcher == 'mpi':
        _init_dist_mpi(backend, **kwargs)
    elif launcher == 'slurm':
        _init_dist_slurm(backend, **kwargs)
    else:
        raise ValueError(f'Invalid launcher type: {launcher}')

def get_dist_info():
    if dist.is_available():
        initialized = dist.is_initialized()
    else:
        initialized = False
    if initialized:
        rank = dist.get_rank()
        world_size = dist.get_world_size()
    else:
        rank = 0
        world_size = 1
    # print(f"DDP: {dist.is_available()} {world_size}")
    return rank, world_size

def _init_dist_pytorch(backend, **kwargs):
    # TODO: use local_rank instead of rank % num_gpus
    rank = int(os.environ['RANK'])
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend, **kwargs)


def _init_dist_mpi(backend, **kwargs):
    # TODO: use local_rank instead of rank % num_gpus
    rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend, **kwargs)


def _init_dist_slurm(backend, port=None, **kwargs):
    """Initialize slurm distributed training environment.

    If argument ``port`` is not specified, then the master port will be system
    environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
    environment variable, then a default port ``29500`` will be used.

    Args:
        backend (str): Backend of torch.distributed.
        port (int, optional): Master port. Defaults to None.
    """
    proc_id = int(os.environ['SLURM_PROCID'])
    ntasks = int(os.environ['SLURM_NTASKS'])
    node_list = os.environ['SLURM_NODELIST']
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(proc_id % num_gpus)
    addr = subprocess.getoutput(
        f'scontrol show hostname {node_list} | head -n1')
    # print(proc_id, ntasks, node_list, addr)
    # specify master port
    if port is not None:
        os.environ['MASTER_PORT'] = str(port)
    elif 'MASTER_PORT' in os.environ:
        pass  # use MASTER_PORT in the environment variable
    else:
        # 29500 is torch.distributed default port
        os.environ['MASTER_PORT'] = '29500'
    # use MASTER_ADDR in the environment variable if it already exists
    if 'MASTER_ADDR' not in os.environ:
        os.environ['MASTER_ADDR'] = addr
    os.environ['WORLD_SIZE'] = str(ntasks)
    os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
    os.environ['RANK'] = str(proc_id)
    # print(os.environ)
    dist.init_process_group(backend=backend)

def reduce_mean(tensor, nprocs=None):
    if nprocs is None:
        _, nprocs = get_dist_info()
        if nprocs == 1:
            return tensor
    # print("reduce_mean", tensor)
    rt = tensor.clone()
    dist.all_reduce(rt, op=dist.ReduceOp.SUM)
    # print(rt, nprocs)
    rt /= nprocs
    # print(rt)
    return rt

class MMDistributedDataParallel(DistributedDataParallel):

    def __init__(self, model, device_ids):
        super(MMDistributedDataParallel, self).__init__(model, device_ids, find_unused_parameters=True)

        self.ddp = model

    def reduce_mean(self, tensor, nprocs=None):
        if nprocs is None:
            _, nprocs = get_dist_info()
        rt = tensor.clone()
        dist.all_reduce(rt, op=dist.ReduceOp.SUM)
        rt /= nprocs
        return rt

    def ddp_step(self, loss_dicts):
        losses = {}
        _, world_size = get_dist_info()
        if world_size == 1:
            return loss_dicts
        dist.barrier()
        # keys = loss_dicts.keys()
        # reduced_loss = scaled_all_reduce(loss_dicts.values())
        # losses = {k: v for k, v in zip(keys, reduced_loss)}
        for k, loss in loss_dicts.items():
            reduced_loss = self.reduce_mean(loss)
            losses.update({k: reduced_loss})
        return losses

def dist_train_v1(args, model):
    if args.mode == "DDP":
        if args.global_rank == 0:
            log_string(f'Distributed training: {args.distributed}')
        if args.distributed:
            if args.amp is not None:
                if not args.amp:
                    # delay_allreduce delays all communication to the end of the backward pass.
                    log_string("IN apex DistributedDataParallel mode.")
                    model = DDP(model, delay_allreduce=True)
            else:
                # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank])
                model = MMDistributedDataParallel(model, device_ids=[args.local_rank])
                # train_sampler = torch.auxiliary.data.distributed.DistributedSampler(train_dataset)
                # val_sampler = torch.auxiliary.data.distributed.DistributedSampler(val_dataset)
    elif args.mode == "DP":
        log_string(f'DataParallel training')
        model = nn.DataParallel(model, device_ids=args.device_ids)

    return model


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/kill_dist.sh
================================================
kill -9 $(ps aux | grep main.py | grep -v grep | awk '{print $2}')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/launch.py
================================================
r"""
`torch.distributed.launch` is a module that spawns up multiple distributed
training processes on each of the training nodes.
The utility can be used for single-node distributed training, in which one or
more processes per node will be spawned. The utility can be used for either
CPU training or GPU training. If the utility is used for GPU training,
each distributed process will be operating on a single GPU. This can achieve
well-improved single-node training performance. It can also be used in
multi-node distributed training, by spawning up multiple processes on each node
for well-improved multi-node distributed training performance as well.
This will especially be benefitial for systems with multiple Infiniband
interfaces that have direct-GPU support, since all of them can be utilized for
aggregated communication bandwidth.
In both cases of single-node distributed training or multi-node distributed
training, this utility will launch the given number of processes per node
(``--nproc_per_node``). If used for GPU training, this number needs to be less
or equal to the number of GPUs on the current system (``nproc_per_node``),
and each process will be operating on a single GPU from *GPU 0 to
GPU (nproc_per_node - 1)*.
**How to use this module:**
1. Single-Node multi-process distributed training
::
    >>> #python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE
               YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3 and all other
               arguments of your training script)
2. Multi-Node multi-process distributed training: (e.g. two nodes)
Node 1: *(IP: 192.168.1.1, and has a free port: 1234)*
::
    >>> #python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE
               --nnodes=2 --node_rank=0 --master_addr="192.168.1.1"
               --master_port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3
               and all other arguments of your training script)
Node 2:
::
    >>> #python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE
               --nnodes=2 --node_rank=1 --master_addr="192.168.1.1"
               --master_port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3
               and all other arguments of your training script)
3. To look up what optional arguments this module offers:
::
    >>> #python -m torch.distributed.launch --help
**Important Notices:**
1. This utility and multi-process distributed (single-node or
multi-node) GPU training currently only achieves the best performance using
the NCCL distributed backend. Thus NCCL backend is the recommended backend to
use for GPU training.
2. In your training program, you must parse the command-line argument:
``--local_rank=LOCAL_PROCESS_RANK``, which will be provided by this module.
If your training program uses GPUs, you should ensure that your code only
runs on the GPU device of LOCAL_PROCESS_RANK. This can be done by:
Parsing the local_rank argument
::
    # >>> import argparse
    # >>> parser = argparse.ArgumentParser()
    # >>> parser.add_argument("--local_rank", type=int)
    # >>> args = parser.parse_args()
# Set your device to local rank using either
# ::
#     >>> torch.cuda.set_device(args.local_rank)  # before your code runs
# or
# ::
#     >>> with torch.cuda.device(args.local_rank):
#     >>>    # your code to run
# 3. In your training program, you are supposed to call the following function
at the beginning to start the distributed backend. You need to make sure that
the init_method uses ``env://``, which is the only supported ``init_method``
by this module.
::
    torch.distributed.init_process_group(backend='YOUR BACKEND',
                                         init_method='env://')
4. In your training program, you can either use regular distributed functions
or use :func:`torch.nn.parallel.DistributedDataParallel` module. If your
training program uses GPUs for training and you would like to use
:func:`torch.nn.parallel.DistributedDataParallel` module,
here is how to configure it.
::
    model = torch.nn.parallel.DistributedDataParallel(model,
                                                      device_ids=[args.local_rank],
                                                      output_device=args.local_rank)
Please ensure that ``device_ids`` argument is set to be the only GPU device id
that your code will be operating on. This is generally the local rank of the
process. In other words, the ``device_ids`` needs to be ``[args.local_rank]``,
and ``output_device`` needs to be ``args.local_rank`` in order to use this
utility
5. Another way to pass ``local_rank`` to the subprocesses via environment variable
``LOCAL_RANK``. This behavior is enabled when you launch the script with
``--use_env=True``. You must adjust the subprocess example above to replace
``args.local_rank`` with ``os.environ['LOCAL_RANK']``; the launcher
will not pass ``--local_rank`` when you specify this flag.
.. warning::
    ``local_rank`` is NOT globally unique: it is only unique per process
    on a machine.  Thus, don't use it to decide if you should, e.g.,
    write to a networked filesystem.  See
    https://github.com/pytorch/pytorch/issues/12042 for an example of
    how things can go wrong if you don't do this correctly.
"""


import time
import signal
import sys
import subprocess
import os
from argparse import ArgumentParser, REMAINDER
from typing import Optional, IO, List, Any

node_local_rank_stdout_filename = "node_{}_local_rank_{}_stdout"
node_local_rank_stderr_filename = "node_{}_local_rank_{}_stderr"

def parse_args():
    """
    Helper function parsing the command line options
    @retval ArgumentParser
    """
    parser = ArgumentParser(description="PyTorch distributed training launch "
                                        "helper utility that will spawn up "
                                        "multiple distributed processes")

    # Optional arguments for the launch helper
    parser.add_argument("--nnodes", type=int, default=1,
                        help="The number of nodes to use for distributed "
                             "training")
    parser.add_argument("--node_rank", type=int, default=0,
                        help="The rank of the node for multi-node distributed "
                             "training")
    parser.add_argument("--nproc_per_node", type=int, default=1,
                        help="The number of processes to launch on each node, "
                             "for GPU training, this is recommended to be set "
                             "to the number of GPUs in your system so that "
                             "each process can be bound to a single GPU.")
    parser.add_argument("--master_addr", default="127.0.0.1", type=str,
                        help="Master node (rank 0)'s address, should be either "
                             "the IP address or the hostname of node 0, for "
                             "single node multi-proc training, the "
                             "--master_addr can simply be 127.0.0.1")
    parser.add_argument("--master_port", default=29500, type=int,
                        help="Master node (rank 0)'s free port that needs to "
                             "be used for communication during distributed "
                             "training")
    parser.add_argument("--use_env", default=False, action="store_true",
                        help="Use environment variable to pass "
                             "'local rank'. For legacy reasons, the default value is False. "
                             "If set to True, the script will not pass "
                             "--local_rank as argument, and will instead set LOCAL_RANK.")
    parser.add_argument("-m", "--module", default=False, action="store_true",
                        help="Changes each process to interpret the launch script "
                             "as a python module, executing with the same behavior as"
                             "'python -m'.")
    parser.add_argument("--no_python", default=False, action="store_true",
                        help="Do not prepend the training script with \"python\" - just exec "
                             "it directly. Useful when the script is not a Python script.")
    parser.add_argument(
        "--logdir",
        default=None,
        type=str,
        help=f"""Relative path to write subprocess logs to. Passing in a relative
        path will create a directory if needed, and write the stdout and stderr to files
        {node_local_rank_stdout_filename} and {node_local_rank_stderr_filename}. Note that
        successive runs with the  same path to write logs to will overwrite existing logs,
        so be sure to save logs as needed.""",
    )

    # positional
    parser.add_argument("training_script", type=str,
                        help="The full path to the single GPU training "
                             "program/script to be launched in parallel, "
                             "followed by all the arguments for the "
                             "training script")

    # rest from the training program
    parser.add_argument('training_script_args', nargs=REMAINDER)
    return parser.parse_args()

def main():
    args = parse_args()

    # world size in terms of number of processes
    dist_world_size = args.nproc_per_node * args.nnodes

    # set PyTorch distributed related environmental variables
    current_env = os.environ.copy()
    current_env["MASTER_ADDR"] = args.master_addr
    current_env["MASTER_PORT"] = str(args.master_port)
    current_env["WORLD_SIZE"] = str(dist_world_size)

    processes: List[Any] = []

    if 'OMP_NUM_THREADS' not in os.environ and args.nproc_per_node > 1:
        current_env["OMP_NUM_THREADS"] = str(1)
        print("*****************************************\n"
              "Setting OMP_NUM_THREADS environment variable for each process "
              "to be {} in default, to avoid your system being overloaded, "
              "please further tune the variable for optimal performance in "
              "your application as needed. \n"
              "*****************************************".format(current_env["OMP_NUM_THREADS"]))

    if args.logdir:
        # Possibly create the directory to write subprocess log output to.
        if os.path.exists(args.logdir):
            if not os.path.isdir(args.logdir):
                raise ValueError("argument --logdir must be a path to a directory.")
        else:
            # create the relative directory
            os.mkdir(os.path.join(os.getcwd(), args.logdir))

    subprocess_file_handles = []

    for local_rank in range(0, args.nproc_per_node):
        # each process's rank
        dist_rank = args.nproc_per_node * args.node_rank + local_rank
        current_env["RANK"] = str(dist_rank)
        current_env["LOCAL_RANK"] = str(local_rank)

        # spawn the processes
        with_python = not args.no_python
        cmd = []
        if with_python:
            cmd = [sys.executable, "-u"]
            if args.module:
                cmd.append("-m")
        else:
            if not args.use_env:
                raise ValueError("When using the '--no_python' flag, you must also set the '--use_env' flag.")
            if args.module:
                raise ValueError("Don't use both the '--no_python' flag and the '--module' flag at the same time.")

        cmd.append(args.training_script)

        if not args.use_env:
            cmd.append("--local_rank={}".format(local_rank))

        cmd.extend(args.training_script_args)

        stdout_handle: Optional[IO]
        stderr_handle: Optional[IO]
        if args.logdir:
            directory_path = os.path.join(os.getcwd(), args.logdir)
            node_rank = args.node_rank
            stdout_file_name = node_local_rank_stdout_filename.format(node_rank, local_rank)
            stderr_file_name = node_local_rank_stderr_filename.format(node_rank, local_rank)
            stdout_handle = open(os.path.join(directory_path, stdout_file_name), "w")
            stderr_handle = open(os.path.join(directory_path, stderr_file_name), "w")
            subprocess_file_handles.append((stdout_handle, stderr_handle))
            stdout_name = stdout_handle.name
            stderr_name = stderr_handle.name
            print(f"""Note: Stdout and stderr for node {node_rank} rank {local_rank} will
            be written to {stdout_name}, {stderr_name} respectively.""")

        sig_names = {2: "SIGINT", 15: "SIGTERM"}
        last_return_code = None

        def sigkill_handler(signum, frame):
            for process in processes:
                print(f"Killing subprocess {process.pid}")
                try:
                    process.kill()
                except Exception as e:
                    pass
            if last_return_code is not None:
                raise subprocess.CalledProcessError(returncode=last_return_code, cmd=cmd)
            if signum in sig_names:
                print(f"Main process received {sig_names[signum]}, exiting")
            sys.exit(1)

        # pass SIGINT/SIGTERM to children if the parent is being terminated
        signal.signal(signal.SIGINT, sigkill_handler)
        signal.signal(signal.SIGTERM, sigkill_handler)

        stdout_handle = None if not subprocess_file_handles else subprocess_file_handles[local_rank][0]
        stderr_handle = None if not subprocess_file_handles else subprocess_file_handles[local_rank][1]
        process = subprocess.Popen(cmd, env=current_env, stdout=stdout_handle, stderr=stderr_handle)
        processes.append(process)

    try:
        alive_processes = set(processes)
        while len(alive_processes):
            finished_processes = []
            for process in alive_processes:
                if process.poll() is None:
                    # the process is still running
                    continue
                else:
                    if process.returncode != 0:
                        last_return_code = process.returncode  # for sigkill_handler
                        sigkill_handler(signal.SIGTERM, None)  # not coming back
                    else:
                        # exited cleanly
                        finished_processes.append(process)
            alive_processes = set(alive_processes) - set(finished_processes)

            time.sleep(1)
    finally:
        # close open file descriptors
        for (stdout_handle, stderr_handle) in subprocess_file_handles:
            stdout_handle.close()
            stderr_handle.close()

if __name__ == "__main__":
    main()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/logger.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import json
from collections import defaultdict
import logging
import os
import functools
import torch.distributed as dist
import colorlog
import time
from pathlib import Path

logger_initialized = {}

log_colors_config = {
    'DEBUG': 'cyan',
    'INFO': 'white',
    'WARNING': 'yellow',
    'ERROR': 'red',
    'CRITICAL': 'red',
}


# def get_root_logger(name, log_file=None, log_level=logging.INFO):
#     return get_logger('mmcls', log_file, log_level)
def get_root_logger(name=None, cfg=None, cfg_name=None, log_level=logging.INFO):
    return get_logger(name, cfg, cfg_name, log_level)
# TODO: Depre
# the same as "get_root_logger"
def create_logger(cfg=None, cfg_name=None, dist_print=0, log_level=logging.INFO):
    return get_logger(None, cfg, cfg_name, log_level)

@functools.lru_cache()  # so that calling setup_logger multiple times won't add many handlers
def setup_logger(name, final_log_file, color=True):
    # LOG_DIR = cfg.log_dir
    # LOG_FOUT = open(final_log_file, 'w')
    # head = '%(asctime)-15s %(message)s'

    logging.basicConfig(filename=str(final_log_file).replace('\\', '/'), format='%(message)s', level=logging.INFO)
    # logger = logging.getLogger()
    # logger.setLevel(logging.INFO)
    # console = logging.StreamHandler()
    # logging.getLogger('').addHandler(console)

    logger = logging.getLogger(name)
    # if name in logger_initialized:
    #     return logger

    for handler in logger.root.handlers:
        if type(handler) is logging.StreamHandler:
            handler.setLevel(logging.ERROR)

    # stream_handler = logging.StreamHandler()
    console = colorlog.StreamHandler()
    handlers = [console]

    # logger.setLevel(logging.INFO)
    # formatter = colorlog.ColoredFormatter(
    #     '%(log_color)s[%(asctime)s] [%(filename)s:%(lineno)d] [%(module)s:%(funcName)s] [%(levelname)s]- %(message)s',
    #     log_colors=log_colors_config)  # 日志输出格式

    if dist.is_available() and dist.is_initialized():
        rank = dist.get_rank()
    else:
        rank = 0

    if rank == 0:
        # console = colorlog.StreamHandler()
        # console.setLevel(logging.DEBUG)
        handlers.append(console)
        # if color:
        #     formatter = _ColorfulFormatter(
        #         colored("%(message)s", "green")
        #     )
        # else:
    formatter = colorlog.ColoredFormatter(
        '%(log_color)s- %(message)s',
        log_colors=log_colors_config)  # 日志输出格式

    # console.setFormatter(formatter)
    # logger.addHandler(console)
    for handler in handlers:
        handler.setFormatter(formatter)
        handler.setLevel(logging.INFO)  # log_level
        logger.addHandler(handler)

    # if rank == 0:
    #     logger.setLevel(logging.INFO)  # log_level
    # else:
    #     logger.setLevel(logging.ERROR)

    logger_initialized[name] = True

    return logger


def get_logger(name=None, cfg=None, cfg_name=None, phase='train', log_level=logging.INFO, file_mode='w'):  # log_file=None,
    """Initialize and get a logger by name.

    If the logger has not been initialized, this method will initialize the
    logger by adding one or two handlers, otherwise the initialized logger will
    be directly returned. During initialization, a StreamHandler will always be
    added. If `log_file` is specified and the process rank is 0, a FileHandler
    will also be added.

    Args:
        name (str): Logger name.
        log_file (str | None): The log filename. If specified, a FileHandler
            will be added to the logger.
        log_level (int): The logger level. Note that only the process of
            rank 0 is affected, and other processes will set the level to
            "Error" thus be silent most of the time.
        file_mode (str): The file mode used in opening log file.
            Defaults to 'w'.

    Returns:
        logging.Logger: The expected logger.
    """
    if name in logger_initialized:
        if cfg is None: # cfg.use_log
            return logging.getLogger(name)
        else:
            return None
    # handle hierarchical names
    # e.g., logger "a" is initialized, then logger "a.b" will skip the
    # initialization since it is a child of "a".
    for logger_name in logger_initialized:
        if name.startswith(logger_name):
            if cfg.use_log:
                return logging.getLogger(name)
            else:
                return None

    logger = None
    tensorboard_log_dir = None
    root_output_dir = Path(cfg.out_dir)
    # set up logger in root_path
    if not root_output_dir.exists():
        # if not dist_print: #rank 0-N, 0 is False
        print('=> creating {}'.format(root_output_dir))
        root_output_dir.mkdir(parents=True, exist_ok=True)

    dataset = cfg.dataset
    model = cfg.arch
    cfg_name = os.path.basename(cfg_name).split('.')[0]
    time_str = time.strftime('%Y-%m-%d-%H-%M-%S')

    # store all output except tb_log file
    final_output_dir = root_output_dir / dataset / model / cfg_name
    if cfg.eval:
        model_save_tmp = os.path.dirname(cfg.resume).split('/')[-1]
    else:
        model_save_tmp = "model_{}".format(time_str)

    model_save_dir = final_output_dir / model_save_tmp
    # if not dist_print:
    log_string('=> creating {}'.format(final_output_dir))
    final_output_dir.mkdir(parents=True, exist_ok=True)
    model_save_dir.mkdir(parents=True, exist_ok=True)


    if cfg.use_log:
        cfg_name = '{}_{}'.format(cfg_name, time_str)
        # a logger to save results
        log_file = '{}_{}.log'.format(cfg_name, phase)
        if cfg.eval:
            final_log_file = model_save_dir / log_file
        else:
            final_log_file = final_output_dir / log_file
            # tensorboard_log
            tensorboard_log_dir = root_output_dir / Path(cfg.log_dir) / dataset / model / cfg_name
            # if not dist_print:
            print('=> creating tfb logs {}'.format(tensorboard_log_dir))
            tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
        logger = setup_logger(name, final_log_file)

    return logger, str(final_output_dir), str(model_save_dir), str(
        tensorboard_log_dir)  # logger,

def print_log(msg, logger=None, level=logging.INFO):
    """Print a log message.

    Args:
        msg (str): The message to be logged.
        logger (logging.Logger | str | None): The logger to be used.
            Some special loggers are:
            - "silent": no message will be printed.
            - other str: the logger obtained with `get_root_logger(logger)`.
            - None: The `print()` method will be used to print log messages.
        level (int): Logging level. Only available when `logger` is a Logger
            object or "root".
    """
    if logger is None:
        print(msg)
    elif isinstance(logger, logging.Logger):
        logger.log(level, msg)
    elif logger == 'silent':
        pass
    elif isinstance(logger, str):
        _logger = get_logger(logger)
        _logger.log(level, msg)
    else:
        raise TypeError(
            'logger should be either a logging.Logger object, str, '
            f'"silent" or None, but got {type(logger)}')


def load_json_log(json_log):
    """load and convert json_logs to log_dicts.

    Args:
        json_log (str): The path of the json log file.

    Returns:
        dict[int, dict[str, list]]:
            Key is the epoch, value is a sub dict. The keys in each sub dict
            are different metrics, e.g. memory, bbox_mAP, and the value is a
            list of corresponding values in all iterations in this epoch.

            .. code-block:: python

                # An example output
                {
                    1: {'iter': [100, 200, 300], 'loss': [6.94, 6.73, 6.53]},
                    2: {'iter': [100, 200, 300], 'loss': [6.33, 6.20, 6.07]},
                    ...
                }
    """
    log_dict = dict()
    with open(json_log, 'r') as log_file:
        for line in log_file:
            log = json.loads(line.strip())
            # skip lines without `epoch` field
            if 'epoch' not in log:
                continue
            epoch = log.pop('epoch')
            if epoch not in log_dict:
                log_dict[epoch] = defaultdict(list)
            for k, v in log.items():
                log_dict[epoch][k].append(v)
    return log_dict


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/metrics.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import torch
import numpy as np
import math
import torch.nn as nn


def rgb2ycbcr(img, y_only=True):
    """metrics"""
    img.astype(np.float32)
    if y_only:
        rlt = np.dot(img, [65.481, 128.553, 24.966]) / 255.0 + 16.0
    return rlt


def quantize(img, rgb_range):
    pixel_range = 255.0 / rgb_range
    return img.mul(pixel_range).clamp(0, 255).round().div(pixel_range)


def calc_psnr(sr, hr, scale, rgb_range):
    """metrics"""
    hr = np.float32(hr)
    sr = np.float32(sr)
    diff = (sr - hr) / rgb_range
    # .reshape((1, 1, 3)) / 256#
    gray_coeffs = np.array([65.738, 129.057, 25.064]).reshape((1, 3, 1, 1)) / 256
    diff = np.multiply(diff, gray_coeffs).sum(1)  # (1)
    if hr.size == 1:
        return 0
    if scale != 1:
        shave = scale
    else:
        shave = scale + 6
    if scale == 1:
        valid = diff
    else:
        valid = diff[..., shave:-shave, shave:-shave]
        # valid = diff[shave:-shave, shave:-shave, ...]
    # mse = np.mean(np.mean(pow(valid, 2), axis=[1, 2, 3]), axis=0)
    mse = np.mean(pow(valid, 2))
    if mse == 0:
        return 100
    try:
        psnr = -10 * math.log10(mse)
    except Exception:
        print(mse)

    return psnr


class PSNR_ycbcr(nn.Module):

    def __init__(self):
        super().__init__()
        self.gray_coeffs = torch.tensor([65.738, 129.057, 25.064],
                                        requires_grad=False).reshape((1, 3, 1, 1)) / 256

    def quantize(self, img, rgb_range):
        """metrics"""
        pixel_range = 255 / rgb_range
        img = torch.multiply(img, pixel_range)
        img = torch.clip(img, 0, 255)
        img = torch.round(img) / pixel_range
        return img

    @torch.no_grad()
    def forward(self, sr, hr, scale, rgb_range):
        """metrics"""
        sr = self.quantize(sr, rgb_range)
        gray_coeffs = self.gray_coeffs.to(sr.device)

        hr = hr.float()
        sr = sr.float()
        diff = (sr - hr) / rgb_range

        diff = torch.multiply(diff, gray_coeffs).sum(1)
        if hr.size == 1:
            return 0
        if scale != 1:
            shave = scale
        else:
            shave = scale + 6
        if scale == 1:
            valid = diff
        else:
            valid = diff[..., shave:-shave, shave:-shave]
        mse = torch.mean(torch.pow(valid, 2))
        return -10 * torch.log10(mse)


def sub_mean(x):
    x = x * 255.0
    red_channel_mean = 0.4488 * 255
    green_channel_mean = 0.4371 * 255
    blue_channel_mean = 0.4040 * 255
    x[:, 0, :, :] -= red_channel_mean
    x[:, 1, :, :] -= green_channel_mean
    x[:, 2, :, :] -= blue_channel_mean
    return x / 255.0


def add_mean(x):
    x = x * 255.0
    red_channel_mean = 0.4488 * 255
    green_channel_mean = 0.4371 * 255
    blue_channel_mean = 0.4040 * 255
    x[:, 0, :, :] += red_channel_mean
    x[:, 1, :, :] += green_channel_mean
    x[:, 2, :, :] += blue_channel_mean
    return x

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/optim.py
================================================
import torch
from torch import nn
import torch.optim as optim
import matplotlib.pyplot as plt


class lr_scheduler(object):

    def __init__(self, lr, epochs):
        self.epochs = epochs
        self.lr = lr
        self.lr_scheduler = None

    # 六大学习率调整策略，lr = lr * gamma
    '''
    ReduceLROnPlateau:
        mode(str)- 模式选择，有 min 和 max 两种模式， min 表示当指标不再降低(如监测loss)， max 表示当指标不再升高(如监测 accuracy)。
        factor(float)- 学习率调整倍数(等同于其它方法的 gamma)，即学习率更新为 lr = lr * factor
        patience(int)- 忍受该指标多少个 step 不变化，当忍无可忍时，调整学习率。
        verbose(bool)- 是否打印学习率信息， print(‘Epoch {:5d}: reducing learning rate of group {} to {:.4e}.’.format(epoch, i, new_lr))
        threshold_mode(str)- 选择判断指标是否达最优的模式，有两种模式， rel 和 abs。
        当 threshold_mode == rel，并且 mode == max 时， dynamic_threshold = best * ( 1 +threshold )；
        当 threshold_mode == rel，并且 mode == min 时， dynamic_threshold = best * ( 1 -threshold )；
        当 threshold_mode == abs，并且 mode== max 时， dynamic_threshold = best + threshold ；
        当 threshold_mode == rel，并且 mode == max 时， dynamic_threshold = best - threshold；
        threshold(float)- 配合 threshold_mode 使用。
        cooldown(int)- “冷却时间“，当调整学习率之后，让学习率调整策略冷静一下，让模型再训练一段时间，再重启监测模式。
        min_lr(float or list)- 学习率下限，可为 float，或者 list，当有多个参数组时，可用 list 进行设置
    '''

    def set_optimizer(self, optimizer, lr_scheduler):
        self.optimizer = optimizer
        # self.lr_scheduler = lr_scheduler
        # self.scheduler = []
        if lr_scheduler == torch.optim.lr_scheduler.StepLR:
            # 等间距阶段式衰减
            self.lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)
        elif lr_scheduler == optim.lr_scheduler.ReduceLROnPlateau:
            # Reduce learning rate when validation accuarcy plateau.
            self.lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5, verbose=True)
        elif lr_scheduler == optim.lr_scheduler.MultiStepLR:
            # milestones=[epoch1,epoch2,...] 阶段式衰减
            self.lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [100, 200, 300],
                                                               gamma=0.5)  # [50, 100, 150, 200, 250, 300, 350, 400], gamma=0.5)
        elif lr_scheduler == optim.lr_scheduler.ExponentialLR:
            # 指数衰减x, 0.1,0.01,0.001,...
            self.lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.1)
        elif lr_scheduler == optim.lr_scheduler.CosineAnnealingLR:
            # Cosine annealing learning rate.
            self.lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-7)
        elif lr_scheduler == optim.lr_scheduler.CyclicLR:
            self.lr_scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-3, max_lr=1e-4, step_size_down=30,
                                                            step_size_up=150, cycle_momentum=False)
        elif lr_scheduler == optim.lr_scheduler.LambdaLR:
            # 学习率 = 初始学习率 * lr_lambda(last_epoch）
            curves = lambda epoch: epoch // 30
            # lambda2 = lambda epoch: 0.95 ** epoch
            # lr_lambda对应optimizer中的keys，model.parameters()就只有一个lambda函数
            self.lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[curves])
        elif lr_scheduler == optim.lr_scheduler.CosineAnnealingWarmRestarts:
            # To 初始周期
            # T_mult 每次循环 周期改变倍数  T_0 = T_0*T_mult
            # Learning rate warmup by 10 epochs.
            self.lr_scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=0)
        else:
            print("self.lr_scheduler not in pytorch")

    def adjust_2_learning_rate(self, epoch):
        """编写2种形式的学习率衰减策略的组合"""
        param_groups = self.optimizer.param_groups
        if epoch <= 5:
            lr = [param_groups[0]['lr'] * 0.9]
            for param_group, val in zip(param_groups, lr):
                param_group['lr'] = val
        else:
            for param_group in param_groups:
                if epoch % 5 == 0:
                    # 0.09 0.009 0.0009
                    param_group['lr'] *= 0.9
        # print(param_group['lr'])

    def adjust_1_learning_rate(self, epoch, mini_lr=1e-6):
        """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
        if self.optimizer.param_groups[0]["lr"] < mini_lr:
            lr = 1e-5
            for param_group in self.optimizer.param_groups:
                param_group['lr'] = lr
            return
        if epoch <= 40:  # 40 20 80
            # lr = self.lr
            lr = self.lr * (0.1 ** (epoch // 20))
            for param_group in self.optimizer.param_groups:
                param_group['lr'] = lr
            return
        elif epoch == 81:  # 41
            self.lr = self.optimizer.param_groups[0]["lr"]
            # for param_group in self.optimizer.param_groups:
            #     param_group['lr'] = 1e-4
        # if epoch >= 42 and epoch % 5 ==0:
        if epoch >= 81:
            lr = self.lr * (0.9 ** (epoch // 20))
            for param_group in self.optimizer.param_groups:
                param_group['lr'] = lr
            return
        elif epoch == 81:
            lr = 1e-5
        else:
            lr = 1e-5
            # self.lr = self.lr * (0.9 ** (epoch // 50))
        # #if epoch <= 120:
        #     lr = self.lr * (0.9 ** (epoch // 50))
        # elif epoch == 121:
        #    self.lr = self.optimizer.param_groups[0]["lr"]
        #    lr = self.lr * (0.9 ** (epoch // 50))
        # else:
        #    lr = 0.01
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr

    def step(self, epoch):
        # if not end:
        #     self.optimizer.step()
        # else:
        if self.lr_scheduler is None:
            # self.optimizer.step()
            self.adjust_1_learning_rate(epoch)
        else:
            # self.optimizer.step()
            self.lr_scheduler.step(epoch)

    # preprint lr_map
    def get_lr_map(self, title, out_file=None, viz=False):
        plt.figure()
        lr = []
        print("preprint lr_scheduler")
        tmp = self.optimizer.param_groups[0]['lr']
        if self.lr_scheduler is None:
            for epoch in range(self.epochs):
                self.step(epoch)
                # TODO:按层绘制
                # print(self.optimizer.param_groups[0]['lr'])
                lr.append(self.optimizer.param_groups[0]['lr'])
        else:
            for epoch in range(self.epochs):
                self.step(epoch)
                try:
                    lr.append(self.lr_scheduler.get_last_lr())
                    # lr.append(self.lr_scheduler.get_lr())
                except:
                    # ReduceLROnPlateau没有get_lr方法
                    lr.append(self.optimizer.param_groups[0]['lr'])
        plt.plot(list(range(self.epochs)), lr)
        plt.xlabel("epoch")
        plt.ylabel("learning rate")
        plt.title(title)
        if out_file is not None:
            plt.savefig(out_file)
        if viz:
            plt.show()
        self.optimizer.param_groups[0]['lr'] = tmp
        self.lr = tmp


def tune_param():
    ...


def partial_train(model, layers: list):
    # forzen layers
    for param in model.parameters():
        if layers is not None and layers in param:
            continue
        param.requires_grad = False

    # Replace the last fc layer
    model.fc = nn.Linear(512, 100)
    return model


if __name__ == "__main__":
    from torchvision.models import AlexNet
    import matplotlib.pyplot as plt

    model = AlexNet(num_classes=2)


    class LinearRegression(nn.Module):
        def __init__(self):
            super(LinearRegression, self).__init__()
            self.linear1 = nn.Linear(1, 5)  # input and output is 1 dimension
            self.linear2 = nn.Linear(5, 1)

        def forward(self, x):
            out = self.linear1(x)
            out = self.linear2(out)
            return out


    glm = LinearRegression()

    optimizer = optim.SGD(params=glm.parameters(), lr=0.1)

    epochs = 450
    # 构造一个带warmup小学习率的optimizer，再上升到标准值，再正常周期下降
    lrs = lr_scheduler(0.1, epochs)
    # lrs.set_optimizer(optimizer, optim.lr_scheduler.MultiStepLR)
    # lrs.get_lr_map("MultiStepLR")
    # lrs.set_optimizer(optimizer, optim.lr_scheduler.ExponentialLR)
    # lrs.get_lr_map("ExponentialLR")
    # lrs.set_optimizer(optimizer, optim.lr_scheduler.StepLR)
    # lrs.get_lr_map("StepLR")
    # lrs.set_optimizer(optimizer, optim.lr_scheduler.CyclicLR)
    # lrs.get_lr_map("CyclicLR")
    # # lrs.set_optimizer(optimizer, optim.lr_scheduler.ReduceLROnPlateau)
    # # lrs.get_lr_map("ReduceLROnPlateau")
    lrs.set_optimizer(optimizer, None)
    lrs.get_lr_map("LambdaLR")
    # lrs.set_optimizer(optimizer, optim.lr_scheduler.CosineAnnealingLR)
    # lrs.get_lr_map("CosineAnnealingLR")
    # lrs.set_optimizer(optimizer, optim.lr_scheduler.CosineAnnealingWarmRestarts)
    # lrs.get_lr_map("CosineAnnealingWarmRestarts")


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/option.py
================================================
# GPL License
# Copyright (C) UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import argparse
import platform
# import warnings
import os
from UDL.AutoDL import TaskDispatcher
from UDL.Basis.config import Config
import warnings

def common_cfg():
    parser = argparse.ArgumentParser(description='PyTorch Training')
    # * Logger
    parser.add_argument('--use-log', default=True
                        , type=bool)
    parser.add_argument('--log-dir', metavar='DIR', default='logs',
                        help='path to save log')
    parser.add_argument('--tfb-dir', metavar='DIR', default=None,
                        help='useless in this script.')
    parser.add_argument('--use-tfb', default=False, type=bool)

    # * DDP
    parser.add_argument(
        '--launcher',
        choices=['none', 'pytorch', 'slurm', 'mpi'],
        default='none',
        help='job launcher')
    parser.add_argument('--local_rank', default=0, type=int,
                        help="host rank must be 0 and python -m torch.distributed.launch main.py need args.local_rank")
    parser.add_argument('--backend', default='nccl', type=str,  # gloo
                        help='distributed backend')
    parser.add_argument('--dist-url', default='env://',
                        type=str,  # 'tcp://224.66.41.62:23456'
                        help='url used to set up distributed training')
    # * AMP
    parser.add_argument('--amp', default=None, type=bool,
                        help="False is apex, besides True is  torch1.6+, which has supports amp ops to reduce gpu memory and speed up training")
    parser.add_argument('--amp-opt-level', type=str, default='O1', choices=['O0', 'O1', 'O2'],
                        help='mixed precision opt level, if O0, no amp is used')

    # * Training
    parser.add_argument('--accumulated-step', default=1, type=int)
    parser.add_argument('--clip_max_norm', default=0, type=float,
                        help='gradient clipping max norm')

    # * extra
    parser.add_argument('--seed', default=10, type=int,
                        help='seed for initializing training. ')
    parser.add_argument('--device', default='cuda',
                        help='device to use for training / testing')
    parser.add_argument('--reg', type=bool, default=True,
                        help='loss with l2 reguliarization for nn.Conv2D, '
                             'which is very important for classical panshrapening!!! ')

    parser.add_argument('--crop_batch_size', type=int, default=128,
                        help='input batch size for-'
                             ' training')
    parser.add_argument('--rgb_range', type=int, default=255,
                        help='maximum value of RGB')
    parser.add_argument('--model_style', type=str, default=None,
                        help='model_style is used to recursive/cascade or GAN training')
    parser.add_argument('--mode', type=str, default=None,
                        help='dataset file extension')
    parser.add_argument('--task', type=str, default=None,
                        help='dataset file extension')
    parser.add_argument('--arch', type=str, default='',
                        help='arch')
    args = parser.parse_args()
    args.global_rank = 0
    args.once_epoch = False
    args.reset_lr = False
    args.amp_opt_level = 'O0' if args.amp == None else args.amp_opt_level
    args.save_top_k = 5
    args.save_print_freq = 10
    args.start_epoch = 1
    assert args.accumulated_step > 0
    args.load_model_strict = True
    args.resume_mode = 'best'
    args.validate = False
    args.gpu_ids = [0]
    # args.workflow = []

    return Config(args)

class panshaprening_cfg(TaskDispatcher, name='pansharpening'):


    def __init__(self, cfg=None, arch=None):
        super(panshaprening_cfg, self).__init__()

        import UDL.pansharpening.configs
        import UDL.pansharpening.models

        if cfg is None:
            cfg = common_cfg()

        cfg.scale = [1]
        if platform.system() == 'Linux':
           cfg.data_dir = '/Datasets/pansharpening/DLPan'
        if platform.system() == "Windows":
           cfg.data_dir = 'D:/Datasets/pansharpening/DLPan'
        
        cfg.best_prec1 = 10000
        cfg.best_prec5 = 10000
        cfg.metrics = 'loss'
        cfg.task = "pansharpening"
        cfg.save_fmt = "mat" # fmt is mat or not mat
        cfg.taskhead = "pansharpening"

        # * Importantly
        warning = f"Note: FusionNet, DiCNN, PNN don't have high-pass filter"
        warnings.warn(warning)
        if arch is not None:
            cfg = self.new(cfg=cfg, arch=cfg.arch)
        self.merge_from_dict(cfg)

def nni_cfg(args):
    if args.mode == 'nni':
        import nni
        tuner_params = nni.get_next_parameter()
        print("launcher: nni is running. \n", tuner_params)
        args.merge_from_dict(tuner_params)
    return args

class get_cfg(TaskDispatcher, name='entrypoint'):
    def __init__(self, task=None, arch=None):
        super(get_cfg, self).__init__()
        args = common_cfg()
        # args.mode = 'nni'
        if arch is not None:
            args.arch = arch
        if args.mode == 'nni':
            args = nni_cfg(args)
        # args.__delattr__('workflow')

        if hasattr(args, 'task'):
            cfg = TaskDispatcher.new(cfg=args, task=task, arch=args.arch)
            cfg.merge_from_dict(args)
        elif task in TaskDispatcher._task.keys():
            cfg = TaskDispatcher.new(cfg=args, task=task, arch=args.arch)
            cfg.merge_from_dict(args)
        else:
            raise ValueError(f"nni starter don't have task={task} but expected"
                             f"one of {super()._task.keys()} in TaskDispatcher")
        # cfg.setdefault('workflow', [])
        cfg = data_cfg(cfg)
        print(cfg.pretty_text)

        self.merge_from_dict(cfg)


def data_cfg(cfg):
    if cfg.get('config', None) is not None:

        if not os.path.isfile(cfg.config):
            raise IOError(f"reading {cfg.config} failed")

        cfg.merge_from_dict(cfg.fromfile(cfg.config))
        if cfg.get('data', None) is not None and callable(cfg.data):
            data_func = cfg.pop('data')
            cfg.merge_from_dict(Config(data_func(cfg.data_dir)))

        cfg.workflow = cfg.get('workflow', [])
        if cfg.get('norm_cfg', None) is not None and cfg.launcher == 'none':
            cfg.norm_cfg = 'BN'

    # modify loading COCO from extern
    # if hasattr(cfg, 'data'):
    #     cfg.data.train['ann_file'] = cfg.data.train['ann_file'].replace('data', cfg.data_dir)
    #     cfg.data.train['img_prefix'] = cfg.data.train['img_prefix'].replace('data', cfg.data_dir)
    #     cfg.data.val['ann_file'] = cfg.data.val['ann_file'].replace('data', cfg.data_dir)
    #     cfg.data.val['img_prefix'] = cfg.data.val['img_prefix'].replace('data', cfg.data_dir)
    #     cfg.samples_per_gpu = cfg.data.samples_per_gpu
    #     cfg.workers_per_gpu = cfg.data.workers_per_gpu

    return cfg


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/postprocess.py
================================================
# GPL License
# Copyright (C) UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
from typing import Union, Optional, List, Tuple, Text, BinaryIO
from PIL import Image
import cv2
import os
import numpy as np
import io
import pathlib
import torch
import math

irange = range


def format_np_output(np_arr):
    """
        This is a (kind of) bandaid fix to streamline saving procedure.
        It converts all the outputs to the same format which is 3xWxH
        with using sucecssive if clauses.
    Args:
        im_as_arr (Numpy array): Matrix of shape 1xWxH or WxH or 3xWxH
    """
    # Phase/Case 1: The np arr only has 2 dimensions
    # Result: Add a dimension at the beginning
    if len(np_arr.shape) == 2:
        np_arr = np.expand_dims(np_arr, axis=0)
    # Phase/Case 2: Np arr has only 1 channel (assuming first dim is channel)
    # Result: Repeat first channel and convert 1xWxH to 3xWxH
    if np_arr.shape[0] == 1:
        np_arr = np.repeat(np_arr, 3, axis=0)
    # Phase/Case 3: Np arr is of shape 3xWxH
    # Result: Convert it to WxHx3 in order to make it saveable by PIL
    if np_arr.shape[0] == 3:
        np_arr = np_arr.transpose(1, 2, 0)
    # Phase/Case 4: NP arr is normalized between 0-1
    # Result: Multiply with 255 and change type to make it saveable by PIL
    if np.max(np_arr) <= 1:
        np_arr = (np_arr * 255).astype(np.uint8)
    return np_arr


def save_image(im, path):
    """
        Saves a numpy matrix or PIL image as an image
    Args:
        im_as_arr (Numpy array): Matrix of shape DxWxH
        path (str): Path to the image
    """
    if isinstance(im, (np.ndarray, np.generic)):
        im = format_np_output(im)
        im = Image.fromarray(im)
    im.save(path)


def norm_image(image, factor=255.):
    """
    标准化图像
    :param factor:
    :param image: [H,W,C]
    :return:
    """
    image = image.copy()
    image -= np.max(np.min(image), 0)
    image /= np.max(image)
    if factor == 255. or factor == 255:
        image *= factor
        return np.uint8(image)
    else:
        return image


def convert_to_grayscale(im_as_arr):
    """
        Converts 3d image to grayscale

    Args:
        im_as_arr (numpy arr): RGB image with shape (D,W,H)

    returns:
        grayscale_im (numpy_arr): Grayscale image with shape (1,W,D)
    """
    # grayscale_im = np.sum(np.abs(im_as_arr), axis=0)
    # im_max = np.percentile(grayscale_im, 99)
    # im_min = np.min(grayscale_im)
    # grayscale_im = (np.clip((grayscale_im - im_min) / (im_max - im_min), 0, 1))
    # grayscale_im = np.expand_dims(grayscale_im, axis=0)
    # return grayscale_im

    grayscale_im = np.sum(np.abs(im_as_arr), axis=-1)
    im_max = np.percentile(grayscale_im, 99)
    im_min = np.min(grayscale_im)
    grayscale_im = (np.clip((grayscale_im - im_min) / (im_max - im_min), 0, 1))
    grayscale_im = np.expand_dims(grayscale_im, axis=-1)
    return grayscale_im


def apply_gradient_images(gradient, file_name, is_save=False):
    """
        Exports the original gradient image

    Args:
        gradient (np arr): Numpy array of the gradient with shape (3, 224, 224)
        file_name (str): File name to be exported
    """
    if not os.path.exists('../results'):
        os.makedirs('../results')
    # Normalize
    gradient = gradient - gradient.min()
    gradient /= gradient.max()
    # Save image
    if is_save:
        path_to_file = os.path.join('../results', file_name + '.jpg')
        save_image(gradient, path_to_file)
        return None
    else:
        return gradient


# misc_function
import matplotlib.cm as mpl_color_map
import copy
import PIL


def gen_colormap(input_image, feature, gradient, factor=255):
    if feature.size(0) == 1:
        feature = feature.squeeze(0)
    if gradient.size(0) == 1:
        gradient = gradient.squeeze(0)
    gradient = gradient.cpu().data.numpy()  # [C,H,W]
    weight = np.mean(gradient, axis=(1, 2))  # [C]

    feature = feature.cpu().data.numpy()  # [C,H,W]

    cam = feature * weight[:, np.newaxis, np.newaxis]  # [C,H,W]
    cam = np.sum(cam, axis=0)  # [H,W]
    cam = np.maximum(cam, 0)  # ReLU

    # 数值归一化
    # cam -= np.min(cam)
    # cam /= np.max(cam)

    # cam = np.maximum(cam, 0)
    cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam))  # Normalize between 0-1
    cam = np.uint8(cam * factor)  # Scale between 0-255 to visualize
    cam = np.uint8(Image.fromarray(cam).resize((input_image.shape[2],
                                                input_image.shape[3]), Image.ANTIALIAS)) / factor
    return cam


def apply_colormap_on_image(org_im, activation, colormap_name):
    """
        Apply heatmap on image
    Args:
        org_img (PIL img): Original image
        activation_map (numpy arr): Activation map (grayscale) 0-255
        colormap_name (str): Name of the colormap
    """

    if not isinstance(org_im, np.ndarray):
        org_im = org_im[0, :3, ...].permute(1, 2, 0)
        org_im = org_im.cpu().numpy() * 255
        org_im = PIL.Image.fromarray(org_im.astype(np.uint8))
    else:
        org_im = org_im * 255
        org_im = PIL.Image.fromarray(org_im.astype(np.uint8))
    # Get colormap
    '''
    
    '''
    color_map = mpl_color_map.get_cmap(colormap_name)
    no_trans_heatmap = color_map(activation)
    # Change alpha channel in colormap to make sure original image is displayed
    heatmap = copy.copy(no_trans_heatmap)
    heatmap[..., 3] = 0.4
    heatmap = Image.fromarray((heatmap * 255).astype(np.uint8))
    heatmap = heatmap.resize(org_im.size, Image.ANTIALIAS)
    no_trans_heatmap = Image.fromarray((no_trans_heatmap * 255).astype(np.uint8))

    # Apply heatmap on image
    heatmap_on_image = Image.new("RGBA", org_im.size)
    heatmap_on_image = Image.alpha_composite(heatmap_on_image, org_im.convert('RGBA'))
    heatmap_on_image = Image.alpha_composite(heatmap_on_image, heatmap)
    return no_trans_heatmap, heatmap_on_image


def get_positive_negative_saliency(gradient):
    """
        Generates positive and negative saliency maps based on the gradient
    Args:
        gradient (numpy arr): Gradient of the operation to visualize

    returns:
        pos_saliency ( )
    """
    pos_saliency = (np.maximum(0, gradient) / gradient.max())
    neg_saliency = (np.maximum(0, -gradient) / -gradient.min())
    return pos_saliency, neg_saliency


# hook_test/Viz/main

def gen_grad_cam(image, feature, gradient):
    if feature.size(0) == 1:
        feature = feature.squeeze(0)
    if gradient.size(0) == 1:
        gradient = gradient.squeeze(0)
    gradient = gradient  # .cpu().data.numpy()  # [C,H,W]
    weight = torch.mean(gradient, dim=(1, 2))  # [C]

    # feature = feature.cpu().data.numpy()  # [C,H,W]

    # cam = feature * weight[:, np.newaxis, np.newaxis]
    cam = feature * weight[:, np.newaxis, np.newaxis]  # [C,H,W]
    cam = torch.maximum(cam, torch.zeros_like(cam))  # ReLU
    cam = torch.sum(cam, dim=0)  # [H,W]
    # cam = torch.mean(feature, dim=0)
    # cam = torch.maximum(cam, torch.zeros_like(cam))  # ReLU

    # 数值归一化
    cam -= torch.min(cam)
    cam /= (torch.max(cam) - torch.min(cam) + 1e-8)

    return cam.cpu().data.numpy()


def apply_heatmap(image, mask, factor=255):
    """
    生成CAM图
    :param image: [H,W,C],原始图像
    :param mask: [H,W],范围0~1
    :return: tuple(cam,heatmap)
    """

    # mask转为heatmap
    if not isinstance(image, np.ndarray):
        image = image[0, :3, ...].permute(1, 2, 0)
        image = image.cpu().numpy()
    if not isinstance(mask, np.ndarray):
        mask = mask.cpu().numpy()
    # heatmaps = np.tile(np.zeros_like(mask)[..., np.newaxis], [1, 1, 1, 3])
    # for c_idx in range(mask.shape[0]):
    #     c_mask = mask[c_idx, ..., np.newaxis]
    #     heatmap = cv2.applyColorMap(np.uint8(255 * c_mask), cv2.COLORMAP_JET)
    #     heatmaps[c_idx, ...] = np.float32(heatmap) / 255
    #     heatmaps = heatmaps[..., ::-1]  # gbr to rgb
    # mask转为heatmap
    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    heatmap = heatmap[..., ::-1]  # gbr to rgb
    # 合并heatmap到原始图像
    cam = cv2.resize(heatmap, image.shape[:2]) + np.float32(image)
    return norm_image(cam, 2048), (heatmap * 255).astype(np.uint8)
    # cam = heatmaps[np.newaxis, ...] + np.float32(image)
    # return norm_image(cam), (heatmaps * 255).astype(np.uint8)


def showimage8(images, unnormlize=2047.0, first_channel=False):
    assert images.shape[1] >= 3, print("input images format is not suitable")

    if isinstance(images, torch.Tensor):
        unnormlize = np.where(max(np.float(torch.max(images)), 1.0) > 1.0, 1.0, unnormlize)
        if first_channel:
            images = images.permute(1, 2, 0)
        output = images[..., [0, 2, 4]] * torch.tensor(unnormlize)
        output = torch.clamp(output, 0, 2047)
        output = output.cpu().detach().numpy()

    norm_image = linstretch(output)
    return norm_image[:, :, ::-1]


def linstretch(images, tol=None):
    '''
    NM = N*M;
    for i=1:3
        b = reshape(double(uint16(ImageToView(:,:,i))),NM,1);
        [hb,levelb] = hist(b,max(b)-min(b));
        chb = cumsum(hb);#沿第一个非单一维运算。matlab矩阵顺序 HxWxC,列的累计和
        t(1)=ceil(levelb(find(chb>NM*tol(i,1), 1 )));
        t(2)=ceil(levelb(find(chb<NM*tol(i,2), 1, 'last' )));
        %t(2) = 1;
        b(b<t(1))=t(1);
        b(b>t(2))=t(2);
        b = (b-t(1))/(t(2)-t(1));
        ImageToView(:,:,i) = reshape(b,N,M);
    end
    '''
    # images = np.random.randn(64, 64, 3) * 2047.0
    if tol is None:
        tol = [0.01, 0.995]
    if images.ndim == 3:
        h, w, channels = images.shape
    else:
        images = np.expand_dims(images, axis=-1)
        h, w, channels = images.shape
    N = h * w
    for c in range(channels):
        image = np.float32(np.round(images[:, :, c])).reshape(N, 1)
        hb, levelb = np.histogram(image, bins=math.ceil(image.max() - image.min()))
        chb = np.cumsum(hb, 0)
        levelb_center = levelb[:-1] + (levelb[1] - levelb[0]) / 2
        lbc_min, lbc_max = levelb_center[chb > N * tol[0]][0], levelb_center[chb < N * tol[1]][-1]
        image = np.clip(image, a_min=lbc_min, a_max=lbc_max)
        image = (image - lbc_min) / (lbc_max - lbc_min)
        images[..., c] = np.reshape(image, (h, w))

    images = np.squeeze(images)

    return images


def make_grid(
        tensor: Union[torch.Tensor, List[torch.Tensor]],
        mode: str = "grey",
        nrow: int = 8,
        padding: int = 2,
        normalize: bool = False,
        range: Optional[Tuple[int, int]] = None,
        scale_each: bool = False,
        pad_value: int = 0,
) -> torch.Tensor:
    """Make a grid of images.

    Args:
        tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
            or a list of images all of the same size.
        mode (str, optional): 人为设定通道模式
        nrow (int, optional): Number of images displayed in each row of the grid.
            The final grid size is ``(B / nrow, nrow)``. Default: ``8``.
        padding (int, optional): amount of padding. Default: ``2``.
        normalize (bool, optional): If True, shift the image to the range (0, 1),
            by the min and max values specified by :attr:`range`. Default: ``False``.
        range (tuple, optional): tuple (min, max) where min and max are numbers,
            then these numbers are used to normalize the image. By default, min and max
            are computed from the tensor.
        scale_each (bool, optional): If ``True``, scale each image in the batch of
            images separately rather than the (min, max) over all images. Default: ``False``.
        pad_value (float, optional): Value for the padded pixels. Default: ``0``.

    Example:
        See this notebook `here <https://gist.github.com/anonymous/bf16430f7750c023141c562f3e9f2a91>`_

    """
    if not (torch.is_tensor(tensor) or
            (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))):
        raise TypeError('tensor or list of tensors expected, got {}'.format(type(tensor)))

    # if list of tensors, convert to a 4D mini-batch Tensor
    if isinstance(tensor, list):
        tensor = torch.stack(tensor, dim=0)

    if tensor.dim() == 2:  # single image H x W
        tensor = tensor.unsqueeze(0)
    if tensor.dim() == 3:  # single image
        if tensor.size(0) == 1:  # if single-channel, convert to 3-channel
            tensor = torch.cat((tensor, tensor, tensor), 0)
        tensor = tensor.unsqueeze(0)

    if tensor.dim() == 4 and tensor.size(1) == 1:  # single-channel images
        if mode == "RGB":
            tensor = torch.cat((tensor, tensor, tensor), 1)

    if normalize is True:
        tensor = tensor.clone()  # avoid modifying tensor in-place
        if range is not None:
            assert isinstance(range, tuple), \
                "range has to be a tuple (min, max) if specified. min and max are numbers"

        def norm_ip(img, min, max):
            img.clamp_(min=min, max=max)
            img.add_(-min).div_(max - min + 1e-5)

        def norm_range(t, range):
            if range is not None:
                norm_ip(t, range[0], range[1])
            else:
                norm_ip(t, float(t.min()), float(t.max()))

        if scale_each is True:
            for t in tensor:  # loop over mini-batch dimension
                norm_range(t, range)
        else:
            norm_range(tensor, range)

    if tensor.size(0) == 1:
        return tensor.squeeze(0)

    # make the mini-batch of images into a grid
    nmaps = tensor.size(0)
    xmaps = min(nrow, nmaps)
    ymaps = int(math.ceil(float(nmaps) / xmaps))
    height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
    num_channels = tensor.size(1)
    grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value)
    k = 0
    for y in irange(ymaps):
        for x in irange(xmaps):
            if k >= nmaps:
                break
            # Tensor.copy_() is a valid method but seems to be missing from the stubs
            # https://pytorch.org/docs/stable/tensors.html#torch.Tensor.copy_
            grid.narrow(1, y * height + padding, height - padding).narrow(  # type: ignore[attr-defined]
                2, x * width + padding, width - padding
            ).copy_(tensor[k])
            k = k + 1
    return grid


def tensor_save_image(
        tensor: Union[torch.Tensor, List[torch.Tensor]],
        fp: Union[Text, pathlib.Path, BinaryIO],
        nrow: int = 8,
        padding: int = 2,
        normalize: bool = False,
        range: Optional[Tuple[int, int]] = None,
        scale_each: bool = False,
        pad_value: int = 0,
        format: Optional[str] = None,
) -> None:
    """Save a given Tensor into an image file.

    Args:
        tensor (Tensor or list): Image to be saved. If given a mini-batch tensor,
            saves the tensor as a grid of images by calling ``make_grid``.
        fp (string or file object): A filename or a file object
        format(Optional):  If omitted, the format to use is determined from the filename extension.
            If a file object was used instead of a filename, this parameter should always be used.
        **kwargs: Other arguments are documented in ``make_grid``.
    """
    from PIL import Image
    grid = make_grid(tensor, nrow=nrow, padding=padding, pad_value=pad_value,
                     normalize=normalize, range=range, scale_each=scale_each)
    # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer
    ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
    im = Image.fromarray(ndarr)
    im.save(fp, format=format)


if __name__ == "__main__":
    a = np.random.randn(3, 3)
    linstretch(a)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/python_sub_class.py
================================================
# GPL License
# Copyright (C) UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import torch
from UDL.pansharpening.evaluation.ps_evaluate import analysis_accu, save_results
from UDL.Basis.config import Config
import warnings
import scipy.io as sio

class TaskDispatcher(Config):
    _task = dict()

    def __init_subclass__(cls, name='', **kwargs):
        super().__init_subclass__(**kwargs)

        if name != '':
            cls._task[name] = cls
            cls._name = name
            # print(cls.__repr__, cls..__repr__)
        else:
            # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.')
            cls._task[cls.__name__] = cls
            cls._name = cls.__name__

    def __new__(cls, *args, **kwargs):
        if cls is TaskDispatcher:
            task = kwargs.get('task')
            try:
                cls = cls._task[task]
            except KeyError:
                raise ValueError(f'Got task={task} but expected'
                                 f'one of {cls._task.keys()}')

        instance = super().__new__(cls)

        return instance

    # def __len__(self):
    #     return len(self._cfg_dict)
    #
    # def __getattr__(self, name):
    #     return getattr(self._cfg_dict, name)
    #
    # def __delattr__(self, name):
    #     return delattr(self._cfg_dict, name)
    #
    # def __getitem__(self, name):
    #     return self._cfg_dict.__getitem__(name)
    #
    # def __iter__(self):
    #     return iter(self._cfg_dict)
    #
    # def __repr__(self):
    #     return f'TaskDispatcher {self._cfg_dict.__repr__()}'

    # def __setattr__(self, name, value):
    #     if isinstance(value, dict):
    #         value = ConfigDict(value)
    #     print("__setattr__")
    #     self._cfg_dict.__setattr__(name, value)

    # def __setitem__(self, name, value):
    #     if isinstance(value, dict):
    #         value = ConfigDict(value)
    #     print("__setitem__")
    #     self._cfg_dict.__setitem__(name, value)

    @classmethod
    def new(cls, **kwargs):
        # 需要从外部启动和从任务启动，但参数不同
        key = 'mode'
        value = kwargs.setdefault('mode', None)
        print('111', value)
        if value is None:
            # 第二、三调用层进入此函数
            key = 'task'
            if kwargs.get('task', None):
                # 二
                value = kwargs.pop('task')
                print('222', value)
            elif kwargs.get('arch', None):
                # 三
                key = 'arch'
                value = kwargs.pop('arch')
                print('333', value)
            else:
                key = 'arch'

        kwargs.pop('mode')

        try:
            cls = cls._task[value]
        except KeyError:
            warning = f'Got {key}={value} but expected ' \
                      f'one of {cls._task.keys()}'
            warnings.warn(warning)
            return Config()

        return cls(**kwargs)

class ModelDispatcher(object):
    _task = dict()

    def __init_subclass__(cls, name='', **kwargs):
        super().__init_subclass__(**kwargs)
        if name != '':
            cls._task[name] = cls
            cls._name = name
            # print(cls.__repr__, cls..__repr__)
        else:
            # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.')
            cls._task[cls.__name__] = cls
            cls._name = cls.__name__

    def __new__(cls, *args, **kwargs):
        if cls is ModelDispatcher:
            task = kwargs.get('task')
            try:
                cls = cls._task[task]
            except KeyError:
                raise ValueError(f'Got task={task} but expected'
                                 f'one of {cls._task.keys()}')

        instance = super().__new__(cls)

        return instance

    @classmethod
    def build_model(cls, cfg):

        arch = cfg.arch
        task = cfg.task
        model_style = cfg.model_style

        try:
            # 获得PansharpeningModel,进行分发
            cls = cls._task[task](None, None)
        except KeyError:
            raise ValueError(f'Got task={task} but expected '
                             f'one of {cls._task.keys()} in {cls}')
        try:
            # 获得具体的模型
            cls_arch = cls._models[arch]()
        except KeyError:
            raise ValueError(f'Got arch={arch} but expected '
                             f'one of {cls._models.keys()} in {cls}')

        model, criterion, optimizer, scheduler = cls_arch(cfg)

        if model_style is None:
            # 获得PansharpeningModel,model+head
            model_style = task

        if model_style is not None:
            try:
                # 获得具体的模型
                model = cls._task[model_style](model, criterion)
            except KeyError:
                raise ValueError(f'Got model_style={model_style} but expected '
                                 f'one of {cls._models.keys()} (merged in _models) in {cls}')

        return model, criterion, optimizer, scheduler

class PanSharpeningModel(ModelDispatcher, name='pansharpening'):

    _models = {}

    def __init__(self, model=None, criterion=None):
        super(PanSharpeningModel, self).__init__()
        self.model = model
        self.criterion = criterion
        self.reg = False
        if hasattr(self.model, 'reg'):
            self.reg = self.model.reg

    def __init_subclass__(cls, name='', **kwargs):

        # print(name, cls)
        if name != '':
            cls._models[name] = cls
            cls._name = name
        else:
            cls._models[cls.__name__] = cls
            cls._name = cls.__name__
            # warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.')

    def l2_regularization(self, loss_dict, weight_decay=1e-5, flag=False):
        regularizations = []
        for k, v in self.model.named_parameters():
            if 'conv' in k and 'weight' in k:
                # print(k)
                penality = weight_decay * ((v.data ** 2).sum() / 2)
                regularizations.append(penality)
                if flag:
                    print("{} : {}".format(k, penality))
        # r = torch.sum(regularizations)
        if isinstance(loss_dict, dict):
            loss_dict['loss'] = loss_dict['loss'] + sum(regularizations)
            loss_dict['log_vars'].update(reg_loss=loss_dict['loss'])
        else:
            loss_dict = loss_dict + sum(regularizations)

        return loss_dict

    def train_step(self, *args, **kwargs):

        loss_dict = self.model.train_step(args[0], **kwargs)

        if self.reg:
            return self.l2_regularization(loss_dict)

        return loss_dict

    def val_step(self, *args, **kwargs):
        sr, gt = self.model.val_step(*args, **kwargs)
        result_our = torch.squeeze(sr).permute(1, 2, 0)
        result_our = torch.clip(result_our, 0, 1)
        metrics = analysis_accu(gt.cuda().squeeze(0), result_our, 4)
        result_our = result_our * kwargs['img_range']

        if kwargs['save_fmt'] is not None:
            save_results(kwargs['idx'], kwargs['save_dir'], kwargs['filename'], kwargs['save_fmt'], result_our)

        return {'log_vars': metrics}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/slurm_train.sh
================================================
#!/usr/bin/env bash

set -x

#cd projects/derain

PARTITION=defq
JOB_NAME=task
#CONFIG=$3
#WORK_DIR=$4
GPUS=${GPUS:-16}
GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-8}
SRUN_ARGS=${SRUN_ARGS:-""}
#NNODE=${NNODE:-'node[004]'}
#PY_ARGS=${@:5}

#PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
srun -p ${PARTITION} \
    --job-name=${JOB_NAME} \
    --gres=gpu:0 \
    --ntasks=${GPUS} \
    --ntasks-per-node=${GPUS_PER_NODE} \
    --cpus-per-task=${CPUS_PER_TASK} \
    --kill-on-bad-exit=1 \
    --nodelist=node[004-005] \
    ${SRUN_ARGS} \
    python -u main.py --launcher="slurm" #${PY_ARGS}

#srun -p defq -J test -n 2 --nodelist=node[004-005] --ntasks-per-node=2 --export=cuda_home python -u test_slurm.py
#srun --partition=defq --job-name=rain -n 1 --nodelist=node004 --gres=gpu:8 --ntasks-per-node=8 python -u derain_main.py --launcher="slurm
#sed -i "s/\r//" slurm_train.sh
# srun -p defq -J test -n 1 --nodelist=node[004] --ntasks-per-node=1 python -u derain_main.py --launcher slurm
#srun -p defq -J test -n 2 --nodelist=node[004-005] --ntasks-per-node=1 python -u test_slurm.py


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/snmn_d.sh
================================================
python -m launch --nproc_per_node 8 --master_port 27890 main.py


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Basis/variance_sacling_initializer.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import torch
import torch.nn as nn
import math


def truncated_normal_(tensor, mean=0.0, std=1.0):
    with torch.no_grad():
        size = tensor.shape
        tmp = tensor.new_empty(size + (4,)).normal_()
        valid = (tmp < 2) & (tmp > -2)
        ind = valid.max(-1, keepdim=True)[1]
        tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
        tensor.data.mul_(std).add_(mean)
        return tensor


def variance_scaling_initializer(tensor):
    from scipy.stats import truncnorm
    def calculate_fan(shape, factor=2.0, mode='FAN_IN', uniform=False):
        # 64 9 3 3 -> 3 3 9 64
        # 64 64 3 3 -> 3 3 64 64
        if shape:
            # fan_in = float(shape[1]) if len(shape) > 1 else float(shape[0])
            # fan_out = float(shape[0])
            fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1])
            fan_out = float(shape[-1])
        else:
            fan_in = 1.0
            fan_out = 1.0
        for dim in shape[:-2]:
            fan_in *= float(dim)
            fan_out *= float(dim)
        if mode == 'FAN_IN':
            # Count only number of input connections.
            n = fan_in
        elif mode == 'FAN_OUT':
            # Count only number of output connections.
            n = fan_out
        elif mode == 'FAN_AVG':
            # Average number of inputs and output connections.
            n = (fan_in + fan_out) / 2.0
        if uniform:
            raise NotImplemented
            # # To get stddev = math.sqrt(factor / n) need to adjust for uniform.
            # limit = math.sqrt(3.0 * factor / n)
            # return random_ops.random_uniform(shape, -limit, limit,
            #                                  dtype, seed=seed)
        else:
            # To get stddev = math.sqrt(factor / n) need to adjust for truncated.
            trunc_stddev = math.sqrt(1.3 * factor / n)
        return fan_in, fan_out, trunc_stddev

    def variance_scaling(x, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None):
        # fan_in, fan_out = torch.nn.init._calculate_fan_in_and_fan_out(x)
        x = x.permute(3, 2, 1, 0)  # .permute(2, 3, 1, 0)
        fan_in, fan_out, trunc_stddev = calculate_fan(x.shape)
        # print(trunc_stddev) # debug
        # if mode == "fan_in":
        #     scale /= max(1., fan_in)
        # elif mode == "fan_out":
        #     scale /= max(1., fan_out)
        # else:
        #     scale /= max(1., (fan_in + fan_out) / 2.)
        # if distribution == "normal" or distribution == "truncated_normal":
        #     # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
        #     stddev = math.sqrt(scale) / .87962566103423978
        # print(fan_in,fan_out,scale,stddev)#100,100,0.01,0.1136
        truncated_normal_(x, 0.0, trunc_stddev)  # 0.001)
        x = x.permute(3, 2, 0, 1)
        # print(x.min(), x.max())) # debug
        return x  # /10*1.28

    variance_scaling(tensor)

    return tensor


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Data/pansharpening/test_data/readme-test.txt
================================================


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Data/pansharpening/training_data/readme-test.txt
================================================


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/Data/pansharpening/validation_data/readme-test.txt
================================================


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/CITATION.cff
================================================
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
  - name: "MMCV Contributors"
title: "OpenMMLab Computer Vision Foundation"
date-released: 2018-08-22
url: "https://github.com/open-mmlab/mmcv"
license: Apache-2.0


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/CONTRIBUTING.md
================================================
## Contributing to OpenMMLab

All kinds of contributions are welcome, including but not limited to the following.

- Fix typo or bugs
- Add documentation or translate the documentation into other languages
- Add new features and components

### Workflow

1. fork and pull the latest OpenMMLab repository
2. checkout a new branch (do not use master branch for PRs)
3. commit your changes
4. create a PR

```{note}
If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first.
```
### Code style

#### Python

We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.

We use the following tools for linting and formatting:

- [flake8](http://flake8.pycqa.org/en/latest/): A wrapper around some linter tools.
- [yapf](https://github.com/google/yapf): A formatter for Python files.
- [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports.
- [markdownlint](https://github.com/markdownlint/markdownlint): A linter to check markdown files and flag style issues.
- [docformatter](https://github.com/myint/docformatter): A formatter to format docstring.

Style configurations of yapf and isort can be found in [setup.cfg](./setup.cfg).

We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`,
fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit.
The config for a pre-commit hook is stored in [.pre-commit-config](./.pre-commit-config.yaml).

After you clone the repository, you will need to install initialize pre-commit hook.

```shell
pip install -U pre-commit
```

From the repository folder

```shell
pre-commit install
```

Try the following steps to install ruby when you encounter an issue on installing markdownlint

```shell
# install rvm
curl -L https://get.rvm.io | bash -s -- --autolibs=read-fail
[[ -s "$HOME/.rvm/scripts/rvm" ]] && source "$HOME/.rvm/scripts/rvm"
rvm autolibs disable

# install ruby
rvm install 2.7.1
```

Or refer to [this repo](https://github.com/innerlee/setup) and take [`zzruby.sh`](https://github.com/innerlee/setup/blob/master/zzruby.sh) according its instruction.

After this on every commit check code linters and formatter will be enforced.

>Before you create a PR, make sure that your code lints and is formatted by yapf.

#### C++ and CUDA

We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/Dockerfile
================================================
FROM python:3.7

WORKDIR /mmcv

COPY . /mmcv

RUN pip install -e .


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/Jenkinsfile
================================================
def docker_images = ["registry.cn-hangzhou.aliyuncs.com/sensetime/openmmlab:cuda10.1-cudnn7-devel-ubuntu18.04-py37-pt1.3",
                     "registry.cn-hangzhou.aliyuncs.com/sensetime/openmmlab:cuda10.2-cudnn7-devel-ubuntu18.04-py37-pt1.5"]
def torch_versions = ["1.3.0", "1.5.0"]
def torchvision_versions = ["0.4.2", "0.6.0"]


def get_stages(docker_image, folder) {
    def pip_mirror = "-i https://mirrors.aliyun.com/pypi/simple"
    stages = {
        docker.image(docker_image).inside('-u root --gpus all --net host') {
            sh "rm -rf ${env.WORKSPACE}-${folder} ${env.WORKSPACE}-${folder}@tmp"
            sh "cp -r ${env.WORKSPACE} ${env.WORKSPACE}-${folder}"
            try {
                dir("${env.WORKSPACE}-${folder}") {
                    stage("before_install") {
                        sh "apt-get update && apt-get install -y ninja-build"
                    }
                    stage("dependencies") {
                        // torch and torchvision are pre-installed in dockers
                        sh "pip list | grep torch"
                        sh "apt-get install -y ffmpeg libturbojpeg"
                        sh "pip install pytest coverage lmdb PyTurboJPEG Cython ${pip_mirror}"
                    }
                    stage("build") {
                        sh "MMCV_WITH_OPS=1 pip install -e . ${pip_mirror}"
                    }
                    stage("test") {
                        sh "coverage run --branch --source=mmcv -m pytest tests/"
                        sh "coverage xml"
                        sh "coverage report -m"
                    }
                }
            } finally {
                sh "rm -rf ${env.WORKSPACE}-${folder} ${env.WORKSPACE}-${folder}@tmp"
            }
        }
    }
    return stages
}


node('master') {
    // fetch latest change from SCM (Source Control Management)
    checkout scm

    def stages = [:]
    for (int i = 0; i < docker_images.size(); i++) {
        def docker_image = docker_images[i]
        def torch = torch_versions[i]
        def torchvision = torchvision_versions[i]
        def tag = docker_image + '_' + torch + '_' + torchvision
        def folder = "${i}"
        stages[tag] = get_stages(docker_image, folder)
    }
    parallel stages
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/LICENSE
================================================
Copyright (c) OpenMMLab. All rights reserved

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright 2018-2020 Open-MMLab. All rights reserved.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/LICENSES.md
================================================
# Licenses for special operations

In this file, we list the operations with other licenses instead of Apache 2.0. Users should be careful about adopting these operations in any commercial matters.

|    Operation     |                                                                    Files                                                                              |    License     |
| :--------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :------------: |
|    upfirdn2d     |          [mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu)          | NVIDIA License |
| fused_leaky_relu | [mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu) | NVIDIA License |


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/MANIFEST.in
================================================
include requirements/runtime.txt
include mmcv/model_zoo/open_mmlab.json mmcv/model_zoo/deprecated.json mmcv/model_zoo/mmcls.json
include mmcv/ops/csrc/common/cuda/*.cuh mmcv/ops/csrc/common/cuda/*.hpp mmcv/ops/csrc/common/*.hpp
include mmcv/ops/csrc/pytorch/*.cpp mmcv/ops/csrc/pytorch/cuda/*.cu mmcv/ops/csrc/pytorch/cuda/*.cpp mmcv/ops/csrc/pytorch/cpu/*.cpp
include mmcv/ops/csrc/parrots/*.h mmcv/ops/csrc/parrots/*.cpp


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/README_zh-CN.md
================================================
<div align="center">
  <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/mmcv-logo.png" width="300"/>
  <div>&nbsp;</div>
  <div align="center">
    <b><font size="5">OpenMMLab 官网</font></b>
    <sup>
      <a href="https://openmmlab.com">
        <i><font size="4">HOT</font></i>
      </a>
    </sup>
    &nbsp;&nbsp;&nbsp;&nbsp;
    <b><font size="5">OpenMMLab 开放平台</font></b>
    <sup>
      <a href="https://platform.openmmlab.com">
        <i><font size="4">TRY IT OUT</font></i>
      </a>
    </sup>
  </div>
  <div>&nbsp;</div>
</div>

[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmcv.readthedocs.io/zh_CN/latest/)
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mmcv)](https://pypi.org/project/mmcv/)
[![PyPI](https://img.shields.io/pypi/v/mmcv)](https://pypi.org/project/mmcv)
[![badge](https://github.com/open-mmlab/mmcv/workflows/build/badge.svg)](https://github.com/open-mmlab/mmcv/actions)
[![codecov](https://codecov.io/gh/open-mmlab/mmcv/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmcv)
[![license](https://img.shields.io/github/license/open-mmlab/mmcv.svg)](https://github.com/open-mmlab/mmcv/blob/master/LICENSE)

[English](README.md) | 简体中文

## 简介

MMCV 是一个面向计算机视觉的基础库，它支持了很多开源项目，例如：

- [MIM](https://github.com/open-mmlab/mim): OpenMMLab 项目、算法、模型的统一入口
- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图像分类工具箱与测试基准
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 检测工具箱与测试基准
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用3D目标检测平台
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱与测试基准
- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱与测试基准
- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱与测试基准
- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图像视频编辑工具箱
- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包
- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 新一代生成模型工具箱
- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab 光流估计工具箱与测试基准
- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准
- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 人体参数化模型工具箱与测试基准
- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监督学习工具箱与测试基准
- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准
- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架

MMCV 提供了如下众多功能：

- 通用的 IO 接口
- 图像和视频处理
- 图像和标注结果可视化
- 常用小工具（进度条，计时器等）
- 基于 PyTorch 的通用训练框架
- 多种 CNN 网络结构
- 高质量实现的常见 CUDA 算子

如想了解更多特性和使用，请参考[文档](http://mmcv.readthedocs.io/zh_CN/latest)。

提示: MMCV 需要 Python 3.6 以上版本。

## 安装

MMCV 有两个版本：

- **mmcv-full**: 完整版，包含所有的特性以及丰富的开箱即用的 CUDA 算子。注意完整版本可能需要更长时间来编译。
- **mmcv**: 精简版，不包含 CUDA 算子但包含其余所有特性和功能，类似 MMCV 1.0 之前的版本。如果你不需要使用 CUDA 算子的话，精简版可以作为一个考虑选项。

**注意**: 请不要在同一个环境中安装两个版本，否则可能会遇到类似 `ModuleNotFound` 的错误。在安装一个版本之前，需要先卸载另一个。`如果CUDA可用，强烈推荐安装mmcv-full`。

a. 安装完整版

在安装 mmcv-full 之前，请确保 PyTorch 已经成功安装在环境中，可以参考 PyTorch [官方文档](https://pytorch.org/)。

我们提供了不同 PyTorch 和 CUDA 版本的 mmcv-full 预编译包，可以大大简化用户安装编译过程。强烈推荐通过预编译包来安装。另外，安装完成后可以运行 [check_installation.py](.dev_scripts/check_installation.py) 脚本检查 mmcv-full 是否安装成功。

i. 安装最新版本

如下是安装最新版 ``mmcv-full`` 的命令

```shell
pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
```

请将链接中的 ``{cu_version}`` 和 ``{torch_version}`` 根据自身需求替换成实际的版本号，例如想安装和 ``CUDA 11.1``、``PyTorch 1.9.0`` 兼容的最新版 ``mmcv-full``，使用如下替换过的命令

```shell
pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
```

**注意**: PyTorch 在 1.x.0 和 1.x.1 之间通常是兼容的，故 mmcv-full 只提供 1.x.0 的编译包。如果你的 PyTorch 版本是 1.x.1，你可以放心地安装在 1.x.0 版本编译的 mmcv-full。例如，如果你的 PyTorch 版本是 1.8.1、CUDA 版本是 11.1，你可以使用以下命令安装 mmcv-full。

```shell
pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
```

如果想知道更多 CUDA 和 PyTorch 版本的命令，可以参考下面的表格，将链接中的 ``=={mmcv_version}`` 删去即可。

ii. 安装特定的版本

如下是安装特定版本 ``mmcv-full`` 的命令

```shell
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
```

首先请参考版本发布信息找到想要安装的版本号，将 ``{mmcv_version}`` 替换成该版本号，例如 ``1.3.9``。
然后将链接中的 ``{cu_version}`` 和 ``{torch_version}`` 根据自身需求替换成实际的版本号，例如想安装和 ``CUDA 11.1``、``PyTorch 1.9.0`` 兼容的 ``mmcv-full`` 1.3.9 版本，使用如下替换过的命令

```shell
pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
```

对于更多的 PyTorch 和 CUDA 版本组合，请参考下表：

<table class="docutils">
  <tbody>
    <tr>
      <th width="80"> CUDA </th>
      <th valign="bottom" align="left" width="120">torch 1.10</th>
      <th valign="bottom" align="left" width="120">torch 1.9</th>
      <th valign="bottom" align="left" width="120">torch 1.8</th>
      <th valign="bottom" align="left" width="120">torch 1.7</th>
      <th valign="bottom" align="left" width="120">torch 1.6</th>
      <th valign="bottom" align="left" width="120">torch 1.5</th>
    </tr>
    <tr>
      <td align="left">11.3</td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"></td>
      <td align="left"></code></pre> </details> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
    </tr>
    <tr>
      <td align="left">11.1</td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
    </tr>
    <tr>
      <td align="left">11.0</td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"> </td>
      <td align="left"> </td>
    </tr>
    <tr>
      <td align="left">10.2</td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
    <tr>
      <td align="left">10.1</td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
    <tr>
      <td align="left">9.2</td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
    <tr>
      <td align="left">cpu</td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html</code></pre> </details> </td>
       <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
  </tbody>
</table>

**注意**：以上提供的预编译包并不囊括所有的 mmcv-full 版本，你可以点击对应链接查看支持的版本。例如，点击 [cu102-torch1.8.0](https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html)，可以看到 `cu102-torch1.8.0` 只提供了 1.3.0 及以上的 mmcv-full 版本。另外，从 `mmcv v1.3.17` 开始，我们不再提供`PyTorch 1.3 & 1.4` 对应的 mmcv-full 预编译包。你可以在 [这](./docs/zh_cn/get_started/previous_versions.md) 找到 `PyTorch 1.3 & 1.4` 对应的预编包。虽然我们不再提供 `PyTorch 1.3 & 1.4` 对应的预编译包，但是我们依然在 CI 中保证对它们的兼容持续到下一年。

除了使用预编译包之外，另一种方式是在本地进行编译，直接运行下述命令

```python
pip install mmcv-full
```

但注意本地编译可能会耗时 10 分钟以上。

b. 安装精简版

```python
pip install mmcv
```

c. 安装完整版并且编译 onnxruntime 的自定义算子

- 详细的指南请查看[这里](docs/zh_cn/deployment/onnxruntime_op.md)。

如果想从源码编译 MMCV，请参考[该文档](https://mmcv.readthedocs.io/zh_CN/latest/get_started/build.html)。

## FAQ

如果你遇到了安装问题，CUDA 相关的问题或者 RuntimeErrors，可以首先参考[问题解决页面](https://mmcv.readthedocs.io/zh_CN/latest/faq.html) 看是否已经有解决方案。

## 贡献指南

我们感谢所有的贡献者为改进和提升 MMCV 所作出的努力。请参考[贡献指南](CONTRIBUTING.md)来了解参与项目贡献的相关指引。

## 许可证

`MMCV` 目前以 Apache 2.0 的许可证发布，但是其中有一部分功能并不是使用的 Apache2.0 许可证，我们在 [许可证](LICENSES.md) 中详细地列出了这些功能以及他们对应的许可证，如果您正在从事盈利性活动，请谨慎参考此文档。
## 欢迎加入 OpenMMLab 社区

扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab)，加入 OpenMMLab 团队的 [官方交流 QQ 群](https://jq.qq.com/?_wv=1027&k=GJP18SjI)

<div align="center">
<img src="docs/en/_static/zhihu_qrcode.jpg" height="400" />  <img src="docs/en/_static/qq_group_qrcode.jpg" height="400" /> <img src="docs/en/_static/wechat_qrcode.jpg" height="400" />
</div>

我们会在 OpenMMLab 社区为大家

- 📢 分享 AI 框架的前沿核心技术
- 💻 解读 PyTorch 常用模块源码
- 📰 发布 OpenMMLab 的相关新闻
- 🚀 介绍 OpenMMLab 开发的前沿算法
- 🏃 获取更高效的问题答疑和意见反馈
- 🔥 提供与各行各业开发者充分交流的平台

干货满满 📘，等你来撩 💗，OpenMMLab 社区期待您的加入 👬


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/TERMINOLOGY.md
================================================
# English-Chinese terminology comparison (英汉术语对照)

This document is used as a reference for English-Chinese terminology translation.

该文档用作中英文翻译对照参考。

| English | 中文 |
| :-----: | :---:|
| annotation | 标注 |
| backbone | 主干网络 |
| benchmark | 基准测试 |
| checkpoint | 模型权重文件 |
| classifier | 分类器 |
| cls_head | 分类头 |
| decoder | 解码器 |
| detector | 检测器 |
| encoder | 编码器 |
| finetune | 微调 |
| ground truth | 真实标签 |
| hook | 钩子 |
| localizer | 定位器 |
| neck | 模型颈部 |
| pipeline | 流水线 |
| recognizer | 识别器 |
| register | 注册器 |
| schedule | 调整 |
| scheduler | 调度器 |
| segmentor | 分割器 |
| tensor | 张量 |
| training schedule | 训练策略 |


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/_static/css/readthedocs.css
================================================
.header-logo {
    background-image: url("../image/mmcv-logo.png");
    background-size: 85px 40px;
    height: 40px;
    width: 85px;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/api.rst
================================================
fileio
-------
.. automodule:: mmcv.fileio
    :members:

image
------
.. automodule:: mmcv.image
    :members:

video
------
.. automodule:: mmcv.video
    :members:

arraymisc
---------
.. automodule:: mmcv.arraymisc
    :members:

visualization
--------------
.. automodule:: mmcv.visualization
    :members:

utils
-----
.. automodule:: mmcv.utils
    :members:

cnn
----
.. automodule:: mmcv.cnn
    :members:

runner
------
.. automodule:: mmcv.runner
    :members:

engine
------
.. automodule:: mmcv.engine
    :members:

ops
------
.. automodule:: mmcv.ops
    :members:


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/community/pr.md
================================================
## Pull Request (PR)

### What is PR

`PR` is the abbreviation of `Pull Request`. Here's the definition of `PR` in the [official document](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests) of Github.

```
Pull requests let you tell others about changes you have pushed to a branch in a repository on GitHub. Once a pull request is opened, you can discuss and review the potential changes with collaborators and add follow-up commits before your changes are merged into the base branch.
```

### Basic Workflow

1. Get the most recent codebase
2. Checkout a new branch from the master branch
3. Commit your changes
4. Push your changes and create a PR
5. Discuss and review your code
6. Merge your branch to the master branch

### Procedures in detail

#### 1. Get the most recent codebase

+ When you work on your first PR

  Fork the OpenMMLab repository: click the **fork** button at the top right corner of Github page
    ![avatar](../_static/community/1.png)

  Clone forked repository to local

  ```bash
  git clone git@github.com:XXX/mmcv.git
  ```

  Add source repository to upstream

  ```bash
  git remote add upstream git@github.com:open-mmlab/mmcv
  ```

+ After your first PR

    Checkout master branch of the local repository and pull the latest master branch of the source repository

    ```bash
    git checkout master
    git pull upstream master
    ```

#### 2. Checkout a new branch from the master branch

```bash
git checkout -b branchname
```

```{tip}
To make commit history clear, we strongly recommend you checkout the master branch before create a new branch.
```

#### 3. Commit your changes

```bash
# coding
git add [files]
git commit -m 'messages'
```

#### 4. Push your changes to the forked repository and create a PR

+ Push the branch to your forked remote repository

    ```bash
    git push origin branchname
    ```

+ Create a PR
![avatar](../_static/community/2.png)

+ Revise PR message template to describe your motivation and modifications made in this PR. You can also link the related issue to the PR manually in the PR message (For more information, checkout the [official guidance](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue)).

#### 5. Discuss and review your code

+ After creating a pull request, you can ask a specific person to review the changes you've proposed
![avatar](../_static/community/3.png)

+ Modify your codes according to reviewers' suggestions and then push your changes

#### 6.  Merge your branch to the master branch and delete the branch

```bash
git branch -d branchname # delete local branch
git push origin --delete branchname # delete remote branch
```

### PR Specs

1. Use [pre-commit](https://pre-commit.com) hook to avoid issues of code style

2. One short-time branch should be matched with only one PR

3. Accomplish a detailed change in one PR. Avoid large PR

   + Bad: Support Faster R-CNN
   + Acceptable: Add a box head to Faster R-CNN
   + Good: Add a parameter to box head to support custom conv-layer number

4. Provide clear and significant commit message

5. Provide clear and meaningful PR description

   + Task name should be clarified in title. The general format is: [Prefix] Short description of the PR (Suffix)
   + Prefix: add new feature [Feature], fix bug [Fix], related to documents [Docs], in developing [WIP] (which will not be reviewed temporarily)
   + Introduce main changes, results and influences on other modules in short description
   + Associate related issues and pull requests with a milestone


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/compatibility.md
================================================
### v1.3.18

Some ops have different implementations on different devices. Lots of macros and type checks are scattered in several files, which makes the code hard to maintain. For example:

```c++
  if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
    CHECK_CUDA_INPUT(input);
    CHECK_CUDA_INPUT(rois);
    CHECK_CUDA_INPUT(output);
    CHECK_CUDA_INPUT(argmax_y);
    CHECK_CUDA_INPUT(argmax_x);

    roi_align_forward_cuda(input, rois, output, argmax_y, argmax_x,
                           aligned_height, aligned_width, spatial_scale,
                           sampling_ratio, pool_mode, aligned);
#else
    AT_ERROR("RoIAlign is not compiled with GPU support");
#endif
  } else {
    CHECK_CPU_INPUT(input);
    CHECK_CPU_INPUT(rois);
    CHECK_CPU_INPUT(output);
    CHECK_CPU_INPUT(argmax_y);
    CHECK_CPU_INPUT(argmax_x);
    roi_align_forward_cpu(input, rois, output, argmax_y, argmax_x,
                          aligned_height, aligned_width, spatial_scale,
                          sampling_ratio, pool_mode, aligned);
  }
```

Registry and dispatcher are added to manage these implementations.

```c++

void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
                                       Tensor argmax_y, Tensor argmax_x,
                                       int aligned_height, int aligned_width,
                                       float spatial_scale, int sampling_ratio,
                                       int pool_mode, bool aligned);

void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned) {
  ROIAlignForwardCUDAKernelLauncher(
      input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width,
      spatial_scale, sampling_ratio, pool_mode, aligned);
}

// register cuda implementation
void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned);
REGISTER_DEVICE_IMPL(roi_align_forward_impl, CUDA, roi_align_forward_cuda);

// roi_align.cpp
// use the dispatcher to invoke different implementation depending on device type of input tensors.
void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned) {
  DISPATCH_DEVICE_IMPL(roi_align_forward_impl, input, rois, output, argmax_y,
                       argmax_x, aligned_height, aligned_width, spatial_scale,
                       sampling_ratio, pool_mode, aligned);
}

```

### v1.3.11

In order to flexibly support more backends and hardwares like `NVIDIA GPUs` and `AMD GPUs`, the directory of `mmcv/ops/csrc` is refactored. Note that this refactoring will not affect the usage in API. For related information, please refer to [PR1206](https://github.com/open-mmlab/mmcv/pull/1206).

The original directory was organized as follows.

```
.
├── common_cuda_helper.hpp
├── ops_cuda_kernel.cuh
├── pytorch_cpp_helper.hpp
├── pytorch_cuda_helper.hpp
├── parrots_cpp_helper.hpp
├── parrots_cuda_helper.hpp
├── parrots_cudawarpfunction.cuh
├── onnxruntime
│   ├── onnxruntime_register.h
│   ├── onnxruntime_session_options_config_keys.h
│   ├── ort_mmcv_utils.h
│   ├── ...
│   ├── onnx_ops.h
│   └── cpu
│       ├── onnxruntime_register.cpp
│       ├── ...
│       └── onnx_ops_impl.cpp
├── parrots
│   ├── ...
│   ├── ops.cpp
│   ├── ops_cuda.cu
│   ├── ops_parrots.cpp
│   └── ops_pytorch.h
├── pytorch
│   ├── ...
│   ├── ops.cpp
│   ├── ops_cuda.cu
│   ├── pybind.cpp
└── tensorrt
    ├── trt_cuda_helper.cuh
    ├── trt_plugin_helper.hpp
    ├── trt_plugin.hpp
    ├── trt_serialize.hpp
    ├── ...
    ├── trt_ops.hpp
    └── plugins
        ├── trt_cuda_helper.cu
        ├── trt_plugin.cpp
        ├── ...
        ├── trt_ops.cpp
        └── trt_ops_kernel.cu
```

After refactored, it is organized as follows.

```
.
├── common
│   ├── box_iou_rotated_utils.hpp
│   ├── parrots_cpp_helper.hpp
│   ├── parrots_cuda_helper.hpp
│   ├── pytorch_cpp_helper.hpp
│   ├── pytorch_cuda_helper.hpp
│   └── cuda
│       ├── common_cuda_helper.hpp
│       ├── parrots_cudawarpfunction.cuh
│       ├── ...
│       └── ops_cuda_kernel.cuh
├── onnxruntime
│   ├── onnxruntime_register.h
│   ├── onnxruntime_session_options_config_keys.h
│   ├── ort_mmcv_utils.h
│   ├── ...
│   ├── onnx_ops.h
│   └── cpu
│       ├── onnxruntime_register.cpp
│       ├── ...
│       └── onnx_ops_impl.cpp
├── parrots
│   ├── ...
│   ├── ops.cpp
│   ├── ops_parrots.cpp
│   └── ops_pytorch.h
├── pytorch
│   ├── info.cpp
│   ├── pybind.cpp
│   ├── ...
│   ├── ops.cpp
│   └── cuda
│       ├── ...
│       └── ops_cuda.cu
└── tensorrt
    ├── trt_cuda_helper.cuh
    ├── trt_plugin_helper.hpp
    ├── trt_plugin.hpp
    ├── trt_serialize.hpp
    ├── ...
    ├── trt_ops.hpp
    └── plugins
        ├── trt_cuda_helper.cu
        ├── trt_plugin.cpp
        ├── ...
        ├── trt_ops.cpp
        └── trt_ops_kernel.cu
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/conf.py
================================================
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys

import pytorch_sphinx_theme
from sphinx.builders.html import StandaloneHTMLBuilder

sys.path.insert(0, os.path.abspath('../..'))

version_file = '../../mmcv/version.py'
with open(version_file, 'r') as f:
    exec(compile(f.read(), version_file, 'exec'))
__version__ = locals()['__version__']

# -- Project information -----------------------------------------------------

project = 'mmcv'
copyright = '2018-2021, OpenMMLab'
author = 'MMCV Authors'

# The short X.Y version
version = __version__
# The full version, including alpha/beta/rc tags
release = __version__

# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.

extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.napoleon',
    'sphinx.ext.viewcode',
    'sphinx_markdown_tables',
    'myst_parser',
    'sphinx_copybutton',
]  # yapf: disable

autodoc_mock_imports = ['mmcv._ext', 'mmcv.utils.ext_loader', 'torchvision']

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {
    '.rst': 'restructuredtext',
    '.md': 'markdown',
}

# The master toctree document.
master_doc = 'index'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
# html_theme = 'sphinx_rtd_theme'
html_theme = 'pytorch_sphinx_theme'
html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
    'menu': [
        {
            'name': 'GitHub',
            'url': 'https://github.com/open-mmlab/mmcv'
        },
    ],
    # Specify the language of shared menu
    'menu_lang': 'en',
}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = ['css/readthedocs.css']

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}

# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'mmcvdoc'

# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',

    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',

    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',

    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'mmcv.tex', 'mmcv Documentation', 'MMCV Contributors',
     'manual'),
]

# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, 'mmcv', 'mmcv Documentation', [author], 1)]

# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (master_doc, 'mmcv', 'mmcv Documentation', author, 'mmcv',
     'One line description of project.', 'Miscellaneous'),
]

# -- Options for Epub output -------------------------------------------------

# Bibliographic Dublin Core info.
epub_title = project

# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''

# A unique identification for the text.
#
# epub_uid = ''

# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']

# set priority when building html
StandaloneHTMLBuilder.supported_image_types = [
    'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
]
# -- Extension configuration -------------------------------------------------
# Ignore >>> when copying code
copybutton_prompt_text = r'>>> |\.\.\. '
copybutton_prompt_is_regexp = True


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/mmcv_ops_definition.md
================================================
# MMCV Operators

To make custom operators in MMCV more standard, precise definitions of each operator are listed in this document.

<!-- TOC -->
- [MMCV Operators](#mmcv-operators)
  - [MMCVBorderAlign](#mmcvborderalign)
    - [Description](#description)
    - [Parameters](#parameters)
    - [Inputs](#inputs)
    - [Outputs](#outputs)
    - [Type Constraints](#type-constraints)
  - [MMCVCARAFE](#mmcvcarafe)
    - [Description](#description-1)
    - [Parameters](#parameters-1)
    - [Inputs](#inputs-1)
    - [Outputs](#outputs-1)
    - [Type Constraints](#type-constraints-1)
  - [MMCVCAWeight](#mmcvcaweight)
    - [Description](#description-2)
    - [Parameters](#parameters-2)
    - [Inputs](#inputs-2)
    - [Outputs](#outputs-2)
    - [Type Constraints](#type-constraints-2)
  - [MMCVCAMap](#mmcvcamap)
    - [Description](#description-3)
    - [Parameters](#parameters-3)
    - [Inputs](#inputs-3)
    - [Outputs](#outputs-3)
    - [Type Constraints](#type-constraints-3)
  - [MMCVCornerPool](#mmcvcornerpool)
    - [Description](#description-4)
    - [Parameters](#parameters-4)
    - [Inputs](#inputs-4)
    - [Outputs](#outputs-4)
    - [Type Constraints](#type-constraints-4)
  - [MMCVDeformConv2d](#mmcvdeformconv2d)
    - [Description](#description-5)
    - [Parameters](#parameters-5)
    - [Inputs](#inputs-5)
    - [Outputs](#outputs-5)
    - [Type Constraints](#type-constraints-5)
  - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d)
    - [Description](#description-6)
    - [Parameters](#parameters-6)
    - [Inputs](#inputs-6)
    - [Outputs](#outputs-6)
    - [Type Constraints](#type-constraints-6)
  - [MMCVDeformRoIPool](#mmcvdeformroipool)
    - [Description](#description-7)
    - [Parameters](#parameters-7)
    - [Inputs](#inputs-7)
    - [Outputs](#outputs-7)
    - [Type Constraints](#type-constraints-7)
  - [MMCVMaskedConv2d](#mmcvmaskedconv2d)
    - [Description](#description-8)
    - [Parameters](#parameters-8)
    - [Inputs](#inputs-8)
    - [Outputs](#outputs-8)
    - [Type Constraints](#type-constraints-8)
  - [MMCVPSAMask](#mmcvpsamask)
    - [Description](#description-9)
    - [Parameters](#parameters-9)
    - [Inputs](#inputs-9)
    - [Outputs](#outputs-9)
    - [Type Constraints](#type-constraints-9)
  - [NonMaxSuppression](#nonmaxsuppression)
    - [Description](#description-10)
    - [Parameters](#parameters-10)
    - [Inputs](#inputs-10)
    - [Outputs](#outputs-10)
    - [Type Constraints](#type-constraints-10)
  - [MMCVRoIAlign](#mmcvroialign)
    - [Description](#description-11)
    - [Parameters](#parameters-11)
    - [Inputs](#inputs-11)
    - [Outputs](#outputs-11)
    - [Type Constraints](#type-constraints-11)
  - [MMCVRoIAlignRotated](#mmcvroialignrotated)
    - [Description](#description-12)
    - [Parameters](#parameters-12)
    - [Inputs](#inputs-12)
    - [Outputs](#outputs-12)
    - [Type Constraints](#type-constraints-12)
  - [grid_sampler*](#grid_sampler)
    - [Description](#description-13)
    - [Parameters](#parameters-13)
    - [Inputs](#inputs-13)
    - [Outputs](#outputs-13)
    - [Type Constraints](#type-constraints-13)
  - [cummax*](#cummax)
    - [Description](#description-14)
    - [Parameters](#parameters-14)
    - [Inputs](#inputs-14)
    - [Outputs](#outputs-14)
    - [Type Constraints](#type-constraints-14)
  - [cummin*](#cummin)
    - [Description](#description-15)
    - [Parameters](#parameters-15)
    - [Inputs](#inputs-15)
    - [Outputs](#outputs-15)
    - [Type Constraints](#type-constraints-15)
  - [Reminders](#reminders)
<!-- TOC -->

## MMCVBorderAlign

### Description

Applies `border_align` over the input feature based on predicted bboxes.

For each border line (e.g. top, left, bottom or right) of each box,
border_align does the following:

- uniformly samples `pool_size`+1 positions on this line, involving the start and end points.
- the corresponding features on these points are computed by bilinear interpolation.
- max pooling over all the `pool_size`+1 positions are used for computing pooled feature.

Read [BorderDet: Border Feature for Dense Object Detection](ttps://arxiv.org/abs/2007.11056) for more detailed information.

### Parameters

| Type  | Parameter   | Description                                                                         |
|-------|-------------|-------------------------------------------------------------------------------------|
| `int` | `pool_size` | number of positions sampled over the boxes' borders(e.g. top, bottom, left, right). |

### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Features with shape [N,4C,H,W]. Channels ranged in [0,C), [C,2C), [2C,3C), [3C,4C) represent the top, left, bottom, right features respectively</dd>
<dt><tt>boxes</tt>: T</dt>
<dd>Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2).</dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Pooled features with shape [N,C,H*W,4]. The order is(top,left,bottom,right) for the last dimension.</dd>
</dl>

### Type Constraints

- T:tensor(float32)

## MMCVCARAFE

### Description

CARAFE operator performs feature upsampling.

Read [CARAFE: Content-Aware ReAssembly of FEatures](https://arxiv.org/abs/1905.02188) for more detailed information.

### Parameters

| Type    | Parameter      | Description                                   |
|---------|----------------|-----------------------------------------------|
| `int`   | `kernel_size`  | reassemble kernel size, should be odd integer |
| `int`   | `group_size`   | reassemble group size                         |
| `float` | `scale_factor` | upsample ratio(>=1)                           |

### Inputs

<dl>
<dt><tt>features</tt>: T</dt>
<dd>Input features. 4-D tensor of shape (N, C, H, W). N is the batch size.</dd>
<dt><tt>masks</tt>: T</dt>
<dd>The input mask</dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>The upsampled features. 4-D tensor of shape (N, C, H * scale_factor, W * scale_factor). N is the batch size.</dd>
</dl>

### Type Constraints

- T:tensor(float32)

## MMCVCAWeight

### Description

Operator for Criss-Cross Attention
Read [CCNet: Criss-Cross Attention for SemanticSegmentation](https://arxiv.org/pdf/1811.11721.pdf) for more detailed information.

### Parameters

None

### Inputs

<dl>
<dt><tt>t</tt>: T</dt>
<dd>The query matrix of shape (N, C', H, W).</dd>
<dt><tt>f</tt>: T</dt>
<dd>The key matrix of shape (N, C', H, W).</dd>
</dl>

### Outputs

<dl>
<dt><tt>weight</tt>: T</dt>
<dd>The attention map of shape (N, H+W-1, H, W).</dd>
</dl>

### Type Constraints

- T:tensor(float32)

## MMCVCAMap

### Description

Operator for Criss-Cross Attention
Read [CCNet: Criss-Cross Attention for SemanticSegmentation](https://arxiv.org/pdf/1811.11721.pdf) for more detailed information.

### Parameters

None

### Inputs

<dl>
<dt><tt>weight</tt>: T</dt>
<dd>Output from the operator MMCVCAWeight.</dd>
<dt><tt>value</tt>: T</dt>
<dd>The value matrix of shape (N, C, H, W).</dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output tensor of aggregated contextual information</dd>
</dl>

### Type Constraints

- T:tensor(float32)


## MMCVCornerPool

### Description

Perform CornerPool on `input` features. Read [CornerNet -- Detecting Objects as Paired Keypoints](https://arxiv.org/abs/1808.01244) for more details.

### Parameters

| Type  | Parameter | Description                                                      |
|-------|-----------|------------------------------------------------------------------|
| `int` | `mode`    | corner pool mode, (0: `top`, 1: `bottom`, 2: `left`, 3: `right`) |

### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Input features. 4-D tensor of shape (N, C, H, W). N is the batch size.</dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>The pooled features. 4-D tensor of shape (N, C, H, W).</dd>
</dl>

### Type Constraints

- T:tensor(float32)

## MMCVDeformConv2d

### Description

Applies a deformable 2D convolution over an input signal composed of several input planes.

Read [Deformable Convolutional Networks](https://arxiv.org/pdf/1703.06211.pdf) for detail.

### Parameters

| Type           | Parameter           | Description                                                                                                       |
|----------------|---------------------|-------------------------------------------------------------------------------------------------------------------|
| `list of ints` | `stride`            | The stride of the convolving kernel, (sH, sW). Defaults to `(1, 1)`.                                              |
| `list of ints` | `padding`           | Paddings on both sides of the input, (padH, padW).  Defaults to `(0, 0)`.                                         |
| `list of ints` | `dilation`          | The spacing between kernel elements (dH, dW). Defaults to `(1, 1)`.                                               |
| `int`          | `groups`            | Split input into groups. `input_channel` should be divisible by the number of groups. Defaults to `1`.            |
| `int`          | `deformable_groups` | Groups of deformable offset. Defaults to `1`.                                                                     |
| `int`          | `bias`              | Whether to add a learnable bias to the output. `0` stands for `False` and `1` stands for `True`. Defaults to `0`. |
| `int`          | `im2col_step`       | Groups of deformable offset. Defaults to `32`.                                                                    |

### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.</dd>
<dt><tt>offset</tt>: T</dt>
<dd>Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW are the height and width of weight, outH and outW is the height and width of offset and output.</dd>
<dt><tt>weight</tt>: T</dt>
<dd>Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).</dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output feature; 4-D tensor of shape (N, output_channel, outH, outW).</dd>
</dl>

### Type Constraints

- T:tensor(float32, Linear)

## MMCVModulatedDeformConv2d

### Description

Perform Modulated Deformable Convolution on input feature, read [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline) for detail.

### Parameters

| Type           | Parameter           | Description                                                                           |
|----------------|---------------------|---------------------------------------------------------------------------------------|
| `list of ints` | `stride`            | The stride of the convolving kernel. (sH, sW)                                         |
| `list of ints` | `padding`           | Paddings on both sides of the input. (padH, padW)                                     |
| `list of ints` | `dilation`          | The spacing between kernel elements. (dH, dW)                                         |
| `int`          | `deformable_groups` | Groups of deformable offset.                                                          |
| `int`          | `groups`            | Split input into groups. `input_channel` should be divisible by the number of groups. |

### Inputs

<dl>
<dt><tt>feature</tt>: T</dt>
<dd>Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.</dd>
<dt><tt>offset</tt>: T</dt>
<dd>Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW are the height and width of weight, outH and outW are the height and width of offset and output.</dd>
<dt><tt>mask</tt>: T</dt>
<dd>Input mask; 4-D tensor of shape (N, deformable_group* kH* kW, outH, outW), where kH and kW are the height and width of weight, outH and outW are the height and width of offset and output.</dd>
<dt><tt>weight]</tt>: T</dt>
<dd>Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).</dd>
<dt><tt>bias</tt>: T, optional</dt>
<dd>Input bias; 1-D tensor of shape (output_channel).</dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output feature; 4-D tensor of shape (N, output_channel, outH, outW).</dd>
</dl>

### Type Constraints

- T:tensor(float32, Linear)

## MMCVDeformRoIPool

### Description

Deformable roi pooling layer

### Parameters

| Type    | Parameter        | Description                                                                                                   |
|---------|------------------|---------------------------------------------------------------------------------------------------------------|
| `int`   | `output_height`  | height of output roi                                                                                          |
| `int`   | `output_width`   | width of output roi                                                                                           |
| `float` | `spatial_scale`  | used to scale the input boxes                                                                                 |
| `int`   | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. |
| `float` | `gamma`          | gamma                                                                                                         |

### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.</dd>
<dt><tt>rois</tt>: T</dt>
<dd>RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of input.</dd>
<dt><tt>offset</tt>: T</dt>
<dd>offset of height and width. Defaults to a tensor of zero</dd>
</dl>

### Outputs

<dl>
<dt><tt>feat</tt>: T</dt>
<dd>RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].<dd>
</dl>

### Type Constraints

- T:tensor(float32)

## MMCVMaskedConv2d

### Description

Performs a masked 2D convolution from PixelRNN
Read [Pixel Recurrent Neural Networks](https://arxiv.org/abs/1601.06759) for more detailed information.

### Parameters

| Type           | Parameter | Description                                                                      |
|----------------|-----------|----------------------------------------------------------------------------------|
| `list of ints` | `stride`  | The stride of the convolving kernel. (sH, sW). **Only support stride=1 in mmcv** |
| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW). Defaults to `(0, 0)`.         |

### Inputs

<dl>
<dt><tt>features</tt>: T</dt>
<dd>Input features; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.</dd>
<dt><tt>mask</tt>: T</dt>
<dd>Input mask; 3D tensor of shape (N, H, W)</dd>
<dt><tt>weight</tt>: T</dt>
<dd>The learnable weights of the module</dd>
<dt><tt>bias</tt>: T</dt>
<dd>The learnable bias of the module</dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>The output convolved feature</dd>
</dl>

### Type Constraints

- T:tensor(float32)

## MMCVPSAMask

### Description

An operator from PSANet.

Read [PSANet: Point-wise Spatial Attention Network for Scene Parsing](https://hszhao.github.io/papers/eccv18_psanet.pdf) for more detailed information.

### Parameters

| Type           | Parameter   | Description                                  |
|----------------|-------------|----------------------------------------------|
| `int`          | `psa_type`  | `0` means collect and `1` means `distribute` |
| `list of ints` | `mask_size` | The size of mask                             |

### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Input feature; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.</dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output tensor of shape (N, H * W, H, W)</dd>
</dl>

### Type Constraints

- T:tensor(float32)

## NonMaxSuppression

### Description

Filter out boxes has high IoU overlap with previously selected boxes or low score. Output the indices of valid boxes.

Note this definition is slightly different with [onnx: NonMaxSuppression](https://github.com/onnx/onnx/blob/master/docs/Operators.md#nonmaxsuppression)

### Parameters

| Type    | Parameter                    | Description                                                                                                                          |
|---------|------------------------------|--------------------------------------------------------------------------------------------------------------------------------------|
| `int`   | `center_point_box`           | 0 - the box data is supplied as [y1, x1, y2, x2], 1-the box data is supplied as [x_center, y_center, width, height].                 |
| `int`   | `max_output_boxes_per_class` | The maximum number of boxes to be selected per batch per class. Default to 0, number of output boxes equal to number of input boxes. |
| `float` | `iou_threshold`              | The threshold for deciding whether boxes overlap too much with respect to IoU. Value range [0, 1]. Default to 0.                     |
| `float` | `score_threshold`            | The threshold for deciding when to remove boxes based on score.                                                                      |
| `int`   | `offset`                     | 0 or 1, boxes' width or height is (x2 - x1 + offset).                                                                                |

### Inputs

<dl>
<dt><tt>boxes</tt>: T</dt>
<dd>Input boxes. 3-D tensor of shape (num_batches, spatial_dimension, 4).</dd>
<dt><tt>scores</tt>: T</dt>
<dd>Input scores. 3-D tensor of shape (num_batches, num_classes, spatial_dimension).</dd>
</dl>

### Outputs

<dl>
<dt><tt>indices</tt>: tensor(int32, Linear)</dt>
<dd>Selected indices. 2-D tensor of shape (num_selected_indices, 3) as [[batch_index, class_index, box_index], ...].</dd>
<dd>num_selected_indices=num_batches* num_classes* min(max_output_boxes_per_class, spatial_dimension).</dd>
<dd>All invalid indices will be filled with -1.</dd>
</dl>

### Type Constraints

- T:tensor(float32, Linear)

## MMCVRoIAlign

### Description

Perform RoIAlign on output feature, used in bbox_head of most two-stage detectors.

### Parameters

| Type    | Parameter        | Description                                                                                                   |
|---------|------------------|---------------------------------------------------------------------------------------------------------------|
| `int`   | `output_height`  | height of output roi                                                                                          |
| `int`   | `output_width`   | width of output roi                                                                                           |
| `float` | `spatial_scale`  | used to scale the input boxes                                                                                 |
| `int`   | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. |
| `str`   | `mode`           | pooling mode in each bin. `avg` or `max`                                                                      |
| `int`   | `aligned`        | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly.         |

### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.</dd>
<dt><tt>rois</tt>: T</dt>
<dd>RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of input.</dd>
</dl>

### Outputs

<dl>
<dt><tt>feat</tt>: T</dt>
<dd>RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].<dd>
</dl>

### Type Constraints

- T:tensor(float32)

## MMCVRoIAlignRotated

### Description

Perform RoI align pooling for rotated proposals

### Parameters

| Type    | Parameter        | Description                                                                                                   |
|---------|------------------|---------------------------------------------------------------------------------------------------------------|
| `int`   | `output_height`  | height of output roi                                                                                          |
| `int`   | `output_width`   | width of output roi                                                                                           |
| `float` | `spatial_scale`  | used to scale the input boxes                                                                                 |
| `int`   | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. |
| `str`   | `mode`           | pooling mode in each bin. `avg` or `max`                                                                      |
| `int`   | `aligned`        | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly.         |
| `int`   | `clockwise`      | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly.         |

### Inputs

<dl>
<dt><tt>features</tt>: T</dt>
<dd>Input feature map; 4D tensor of shape (N, C, H, W)</dd>
<dt><tt>rois</tt>: T</dt>
<dd>RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of input.</dd>
</dl>

### Outputs

<dl>
<dd>RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].<dd>
</dl>

### Type Constraints

- T:tensor(float32)

## grid_sampler*

### Description

Perform sample from `input` with pixel locations from `grid`.

Check [torch.nn.functional.grid_sample](https://pytorch.org/docs/stable/generated/torch.nn.functional.grid_sample.html?highlight=grid_sample#torch.nn.functional.grid_sample) for more information.

### Parameters

| Type  | Parameter            | Description                                                                                                                                                                                                                                                                                     |
|-------|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `int` | `interpolation_mode` | Interpolation mode to calculate output values. (0: `bilinear` , 1: `nearest`)                                                                                                                                                                                                                   |
| `int` | `padding_mode`       | Padding mode for outside grid values. (0: `zeros`, 1: `border`, 2: `reflection`)                                                                                                                                                                                                                |
| `int` | `align_corners`      | If `align_corners=1`, the extrema (`-1` and `1`) are considered as referring to the center points of the input's corner pixels. If `align_corners=0`, they are instead considered as referring to the corner points of the input's corner pixels, making the sampling more resolution agnostic. |

### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.</dd>
<dt><tt>grid</tt>: T</dt>
<dd>Input offset; 4-D tensor of shape (N, outH, outW, 2), where outH and outW are the height and width of offset and output. </dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output feature; 4-D tensor of shape (N, C, outH, outW).</dd>
</dl>

### Type Constraints

- T:tensor(float32, Linear)

## cummax*

### Description

Returns a tuple (`values`, `indices`) where `values` is the cumulative maximum elements of `input` in the dimension `dim`. And `indices` is the index location of each maximum value found in the dimension `dim`. Read [torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html) for more details.

### Parameters

| Type  | Parameter | Description                            |
|-------|-----------|----------------------------------------|
| `int` | `dim`     | the dimension to do the operation over |

### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>The input tensor with various shapes. Tensor with empty element is also supported.</dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output the cumulative maximum elements of `input` in the dimension `dim`, with the same shape and dtype as `input`.</dd>
<dt><tt>indices</tt>: tensor(int64)</dt>
<dd>Output the index location of each cumulative maximum value found in the dimension `dim`, with the same shape as `input`.</dd>
</dl>

### Type Constraints

- T:tensor(float32)

## cummin*

### Description

Returns a tuple (`values`, `indices`) where `values` is the cumulative minimum elements of `input` in the dimension `dim`. And `indices` is the index location of each minimum value found in the dimension `dim`. Read [torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html) for more details.

### Parameters

| Type  | Parameter | Description                            |
|-------|-----------|----------------------------------------|
| `int` | `dim`     | the dimension to do the operation over |

### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>The input tensor with various shapes. Tensor with empty element is also supported.</dd>
</dl>

### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output the cumulative minimum elements of `input` in the dimension `dim`, with the same shape and dtype as `input`.</dd>
<dt><tt>indices</tt>: tensor(int64)</dt>
<dd>Output the index location of each cumulative minimum value found in the dimension `dim`, with the same shape as `input`.</dd>
</dl>

### Type Constraints

- T:tensor(float32)

## Reminders

- Operators endwith `*` are defined in Torch and are included here for the conversion to ONNX.


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/onnx.md
================================================
## Introduction of mmcv.onnx module

### register_extra_symbolics

Some extra symbolic functions need to be registered before exporting PyTorch model to ONNX.

#### Example

```python
import mmcv
from mmcv.onnx import register_extra_symbolics

opset_version = 11
register_extra_symbolics(opset_version)
```

#### Reminder

- *Please note that this feature is experimental and may change in the future.*

#### FAQs

- None


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/onnxruntime_custom_ops.md
================================================
## ONNX Runtime Custom Ops

<!-- TOC -->

- [ONNX Runtime Custom Ops](#onnx-runtime-custom-ops)
  - [SoftNMS](#softnms)
    - [Description](#description)
    - [Parameters](#parameters)
    - [Inputs](#inputs)
    - [Outputs](#outputs)
    - [Type Constraints](#type-constraints)
  - [RoIAlign](#roialign)
    - [Description](#description-1)
    - [Parameters](#parameters-1)
    - [Inputs](#inputs-1)
    - [Outputs](#outputs-1)
    - [Type Constraints](#type-constraints-1)
  - [NMS](#nms)
    - [Description](#description-2)
    - [Parameters](#parameters-2)
    - [Inputs](#inputs-2)
    - [Outputs](#outputs-2)
    - [Type Constraints](#type-constraints-2)
  - [grid_sampler](#grid_sampler)
    - [Description](#description-3)
    - [Parameters](#parameters-3)
    - [Inputs](#inputs-3)
    - [Outputs](#outputs-3)
    - [Type Constraints](#type-constraints-3)
  - [CornerPool](#cornerpool)
    - [Description](#description-4)
    - [Parameters](#parameters-4)
    - [Inputs](#inputs-4)
    - [Outputs](#outputs-4)
    - [Type Constraints](#type-constraints-4)
  - [cummax](#cummax)
    - [Description](#description-5)
    - [Parameters](#parameters-5)
    - [Inputs](#inputs-5)
    - [Outputs](#outputs-5)
    - [Type Constraints](#type-constraints-5)
  - [cummin](#cummin)
    - [Description](#description-6)
    - [Parameters](#parameters-6)
    - [Inputs](#inputs-6)
    - [Outputs](#outputs-6)
    - [Type Constraints](#type-constraints-6)
  - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d)
    - [Description](#description-7)
    - [Parameters](#parameters-7)
    - [Inputs](#inputs-7)
    - [Outputs](#outputs-7)
    - [Type Constraints](#type-constraints-7)
  - [MMCVDeformConv2d](#mmcvdeformconv2d)
    - [Description](#description-8)
    - [Parameters](#parameters-8)
    - [Inputs](#inputs-8)
    - [Outputs](#outputs-8)
    - [Type Constraints](#type-constraints-8)

<!-- TOC -->

### SoftNMS

#### Description

Perform soft NMS on `boxes` with `scores`. Read [Soft-NMS -- Improving Object Detection With One Line of Code](https://arxiv.org/abs/1704.04503) for detail.

#### Parameters

| Type    | Parameter       | Description                                                    |
|---------|-----------------|----------------------------------------------------------------|
| `float` | `iou_threshold` | IoU threshold for NMS                                          |
| `float` | `sigma`         | hyperparameter for gaussian method                             |
| `float` | `min_score`     | score filter threshold                                         |
| `int`   | `method`        | method to do the nms, (0: `naive`, 1: `linear`, 2: `gaussian`) |
| `int`   | `offset`        | `boxes` width or height is (x2 - x1 + offset). (0 or 1)        |

#### Inputs

<dl>
<dt><tt>boxes</tt>: T</dt>
<dd>Input boxes. 2-D tensor of shape (N, 4). N is the number of boxes.</dd>
<dt><tt>scores</tt>: T</dt>
<dd>Input scores. 1-D tensor of shape (N, ).</dd>
</dl>

#### Outputs

<dl>
<dt><tt>dets</tt>: T</dt>
<dd>Output boxes and scores. 2-D tensor of shape (num_valid_boxes, 5), [[x1, y1, x2, y2, score], ...]. num_valid_boxes is the number of valid boxes.</dd>
<dt><tt>indices</tt>: tensor(int64)</dt>
<dd>Output indices. 1-D tensor of shape (num_valid_boxes, ).</dd>
</dl>

#### Type Constraints

- T:tensor(float32)

### RoIAlign

#### Description

Perform RoIAlign on output feature, used in bbox_head of most two-stage detectors.

#### Parameters

| Type    | Parameter        | Description                                                                                                   |
|---------|------------------|---------------------------------------------------------------------------------------------------------------|
| `int`   | `output_height`  | height of output roi                                                                                          |
| `int`   | `output_width`   | width of output roi                                                                                           |
| `float` | `spatial_scale`  | used to scale the input boxes                                                                                 |
| `int`   | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. |
| `str`   | `mode`           | pooling mode in each bin. `avg` or `max`                                                                      |
| `int`   | `aligned`        | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly.         |

#### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.</dd>
<dt><tt>rois</tt>: T</dt>
<dd>RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of input.</dd>
</dl>

#### Outputs

<dl>
<dt><tt>feat</tt>: T</dt>
<dd>RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].<dd>
</dl>

#### Type Constraints

- T:tensor(float32)

### NMS

#### Description

Filter out boxes has high IoU overlap with previously selected boxes.

#### Parameters

| Type    | Parameter       | Description                                                                                                      |
|---------|-----------------|------------------------------------------------------------------------------------------------------------------|
| `float` | `iou_threshold` | The threshold for deciding whether boxes overlap too much with respect to IoU. Value range [0, 1]. Default to 0. |
| `int`   | `offset`        | 0 or 1, boxes' width or height is (x2 - x1 + offset).                                                            |

#### Inputs

<dl>
<dt><tt>bboxes</tt>: T</dt>
<dd>Input boxes. 2-D tensor of shape (num_boxes, 4). num_boxes is the number of input boxes.</dd>
<dt><tt>scores</tt>: T</dt>
<dd>Input scores. 1-D tensor of shape (num_boxes, ).</dd>
</dl>

#### Outputs

<dl>
<dt><tt>indices</tt>: tensor(int32, Linear)</dt>
<dd>Selected indices. 1-D tensor of shape (num_valid_boxes, ). num_valid_boxes is the number of valid boxes.</dd>
</dl>

#### Type Constraints

- T:tensor(float32)

### grid_sampler

#### Description

Perform sample from `input` with pixel locations from `grid`.

#### Parameters

| Type  | Parameter            | Description                                                                                                                                                                                                                                                                                     |
|-------|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `int` | `interpolation_mode` | Interpolation mode to calculate output values. (0: `bilinear` , 1: `nearest`)                                                                                                                                                                                                                   |
| `int` | `padding_mode`       | Padding mode for outside grid values. (0: `zeros`, 1: `border`, 2: `reflection`)                                                                                                                                                                                                                |
| `int` | `align_corners`      | If `align_corners=1`, the extrema (`-1` and `1`) are considered as referring to the center points of the input's corner pixels. If `align_corners=0`, they are instead considered as referring to the corner points of the input's corner pixels, making the sampling more resolution agnostic. |

#### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.</dd>
<dt><tt>grid</tt>: T</dt>
<dd>Input offset; 4-D tensor of shape (N, outH, outW, 2), where outH and outW is the height and width of offset and output. </dd>
</dl>

#### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output feature; 4-D tensor of shape (N, C, outH, outW).</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)

### CornerPool

#### Description

Perform CornerPool on `input` features. Read [CornerNet -- Detecting Objects as Paired Keypoints](https://arxiv.org/abs/1808.01244) for more details.

#### Parameters

| Type  | Parameter | Description                                                      |
|-------|-----------|------------------------------------------------------------------|
| `int` | `mode`    | corner pool mode, (0: `top`, 1: `bottom`, 2: `left`, 3: `right`) |

#### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Input features. 4-D tensor of shape (N, C, H, W). N is the batch size.</dd>
</dl>

#### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output the pooled features. 4-D tensor of shape (N, C, H, W).</dd>
</dl>

#### Type Constraints

- T:tensor(float32)

### cummax

#### Description

Returns a tuple (`values`, `indices`) where `values` is the cumulative maximum elements of `input` in the dimension `dim`. And `indices` is the index location of each maximum value found in the dimension `dim`. Read [torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html) for more details.

#### Parameters

| Type  | Parameter | Description                            |
|-------|-----------|----------------------------------------|
| `int` | `dim`     | the dimension to do the operation over |

#### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>The input tensor with various shapes. Tensor with empty element is also supported.</dd>
</dl>

#### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output the cumulative maximum elements of `input` in the dimension `dim`, with the same shape and dtype as `input`.</dd>
<dt><tt>indices</tt>: tensor(int64)</dt>
<dd>Output the index location of each cumulative maximum value found in the dimension `dim`, with the same shape as `input`.</dd>
</dl>

#### Type Constraints

- T:tensor(float32)

### cummin

#### Description

Returns a tuple (`values`, `indices`) where `values` is the cumulative minimum elements of `input` in the dimension `dim`. And `indices` is the index location of each minimum value found in the dimension `dim`. Read [torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html) for more details.

#### Parameters

| Type  | Parameter | Description                            |
|-------|-----------|----------------------------------------|
| `int` | `dim`     | the dimension to do the operation over |

#### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>The input tensor with various shapes. Tensor with empty element is also supported.</dd>
</dl>

#### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>Output the cumulative minimum elements of `input` in the dimension `dim`, with the same shape and dtype as `input`.</dd>
<dt><tt>indices</tt>: tensor(int64)</dt>
<dd>Output the index location of each cumulative minimum value found in the dimension `dim`, with the same shape as `input`.</dd>
</dl>

#### Type Constraints

- T:tensor(float32)

### MMCVModulatedDeformConv2d

#### Description

Perform Modulated Deformable Convolution on input feature, read [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline) for detail.

#### Parameters

| Type           | Parameter           | Description                                                                           |
|----------------|---------------------|---------------------------------------------------------------------------------------|
| `list of ints` | `stride`            | The stride of the convolving kernel. (sH, sW)                                         |
| `list of ints` | `padding`           | Paddings on both sides of the input. (padH, padW)                                     |
| `list of ints` | `dilation`          | The spacing between kernel elements. (dH, dW)                                         |
| `int`          | `deformable_groups` | Groups of deformable offset.                                                          |
| `int`          | `groups`            | Split input into groups. `input_channel` should be divisible by the number of groups. |

#### Inputs

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.</dd>
<dt><tt>inputs[2]</tt>: T</dt>
<dd>Input mask; 4-D tensor of shape (N, deformable_group* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.</dd>
<dt><tt>inputs[3]</tt>: T</dt>
<dd>Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).</dd>
<dt><tt>inputs[4]</tt>: T, optional</dt>
<dd>Input bias; 1-D tensor of shape (output_channel).</dd>
</dl>

#### Outputs

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>Output feature; 4-D tensor of shape (N, output_channel, outH, outW).</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)

### MMCVDeformConv2d

#### Description

Perform Deformable Convolution on input feature, read [Deformable Convolutional Network](https://arxiv.org/abs/1703.06211) for detail.

#### Parameters

| Type           | Parameter          | Description                                                                                                                       |
|----------------|--------------------|-----------------------------------------------------------------------------------------------------------------------------------|
| `list of ints` | `stride`           | The stride of the convolving kernel. (sH, sW)                                                                                     |
| `list of ints` | `padding`          | Paddings on both sides of the input. (padH, padW)                                                                                 |
| `list of ints` | `dilation`         | The spacing between kernel elements. (dH, dW)                                                                                     |
| `int`          | `deformable_group` | Groups of deformable offset.                                                                                                      |
| `int`          | `group`            | Split input into groups. `input_channel` should be divisible by the number of groups.                                             |
| `int`          | `im2col_step`      | DeformableConv2d use im2col to compute convolution. im2col_step is used to split input and offset, reduce memory usage of column. |

#### Inputs

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.</dd>
<dt><tt>inputs[2]</tt>: T</dt>
<dd>Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).</dd>
</dl>

#### Outputs

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>Output feature; 4-D tensor of shape (N, output_channel, outH, outW).</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/onnxruntime_op.md
================================================
## ONNX Runtime Deployment

### Introduction of ONNX Runtime

**ONNX Runtime** is a cross-platform inferencing and training accelerator compatible with many popular ML/DNN frameworks. Check its [github](https://github.com/microsoft/onnxruntime) for more information.

### Introduction of ONNX

**ONNX** stands for **Open Neural Network Exchange**, which acts as *Intermediate Representation(IR)* for ML/DNN models from many frameworks. Check its [github](https://github.com/onnx/onnx) for more information.

### Why include custom operators for ONNX Runtime in MMCV

- To verify the correctness of exported ONNX models in ONNX Runtime.
- To ease the deployment of ONNX models with custom operators from `mmcv.ops` in ONNX Runtime.

### List of operators for ONNX Runtime supported in MMCV

| Operator                                               | CPU | GPU | MMCV Releases |
|:-------------------------------------------------------|:---:|:---:|:-------------:|
| [SoftNMS](onnxruntime_custom_ops.md#softnms)           |  Y  |  N  |     1.2.3     |
| [RoIAlign](onnxruntime_custom_ops.md#roialign)         |  Y  |  N  |     1.2.5     |
| [NMS](onnxruntime_custom_ops.md#nms)                   |  Y  |  N  |     1.2.7     |
| [grid_sampler](onnxruntime_custom_ops.md#grid_sampler) |  Y  |  N  |     1.3.1     |
| [CornerPool](onnxruntime_custom_ops.md#cornerpool)     |  Y  |  N  |     1.3.4     |
| [cummax](onnxruntime_custom_ops.md#cummax)             |  Y  |  N  |     1.3.4     |
| [cummin](onnxruntime_custom_ops.md#cummin)             |  Y  |  N  |     1.3.4     |

### How to build custom operators for ONNX Runtime

*Please be noted that only **onnxruntime>=1.8.1** of CPU version on Linux platform is tested by now.*

#### Prerequisite

- Clone repository

```bash
git clone https://github.com/open-mmlab/mmcv.git
```

- Download `onnxruntime-linux` from ONNX Runtime [releases](https://github.com/microsoft/onnxruntime/releases/tag/v1.8.1), extract it, expose `ONNXRUNTIME_DIR` and finally add the lib path to `LD_LIBRARY_PATH` as below:

```bash
wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz

tar -zxvf onnxruntime-linux-x64-1.8.1.tgz
cd onnxruntime-linux-x64-1.8.1
export ONNXRUNTIME_DIR=$(pwd)
export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH
```

#### Build on Linux

```bash
cd mmcv ## to MMCV root directory
MMCV_WITH_OPS=1 MMCV_WITH_ORT=1 python setup.py develop
```

### How to do inference using exported ONNX models with custom operators in ONNX Runtime in python

Install ONNX Runtime with `pip`

```bash
pip install onnxruntime==1.8.1
```

Inference Demo

```python
import os

import numpy as np
import onnxruntime as ort

from mmcv.ops import get_onnxruntime_op_path

ort_custom_op_path = get_onnxruntime_op_path()
assert os.path.exists(ort_custom_op_path)
session_options = ort.SessionOptions()
session_options.register_custom_ops_library(ort_custom_op_path)
## exported ONNX model with custom operators
onnx_file = 'sample.onnx'
input_data = np.random.randn(1, 3, 224, 224).astype(np.float32)
sess = ort.InferenceSession(onnx_file, session_options)
onnx_results = sess.run(None, {'input' : input_data})
```

### How to add a new custom operator for ONNX Runtime in MMCV

#### Reminder

- *Please note that this feature is experimental and may change in the future. Strongly suggest users always try with the latest master branch.*

- The custom operator is not included in [supported operator list](https://github.com/microsoft/onnxruntime/blob/master/docs/OperatorKernels.md) in ONNX Runtime.
- The custom operator should be able to be exported to ONNX.

#### Main procedures

Take custom operator `soft_nms` for example.

1. Add header `soft_nms.h` to ONNX Runtime include directory `mmcv/ops/csrc/onnxruntime/`
2. Add source `soft_nms.cpp` to ONNX Runtime source directory `mmcv/ops/csrc/onnxruntime/cpu/`
3. Register `soft_nms` operator in [onnxruntime_register.cpp](../../mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp)

    ```c++
    #include "soft_nms.h"

    SoftNmsOp c_SoftNmsOp;

    if (auto status = ortApi->CustomOpDomain_Add(domain, &c_SoftNmsOp)) {
    return status;
    }
    ```

4. Add unit test into `tests/test_ops/test_onnx.py`
   Check [here](../../tests/test_ops/test_onnx.py) for examples.

**Finally, welcome to send us PR of adding custom operators for ONNX Runtime in MMCV.** :nerd_face:

### Known Issues

- "RuntimeError: tuple appears in op that does not forward tuples, unsupported kind: `prim::PythonOp`."
   1. Note generally `cummax` or `cummin` is exportable to ONNX as long as the torch version >= 1.5.0, since `torch.cummax` is only supported with torch >= 1.5.0. But when `cummax` or `cummin` serves as an intermediate component whose outputs is used as inputs for another modules, it's expected that torch version must be >= 1.7.0. Otherwise the above error might arise, when running exported ONNX model with onnxruntime.
   2. Solution: update the torch version to 1.7.0 or higher.

### References

- [How to export Pytorch model with custom op to ONNX and run it in ONNX Runtime](https://github.com/onnx/tutorials/blob/master/PyTorchCustomOperator/README.md)
- [How to add a custom operator/kernel in ONNX Runtime](https://github.com/microsoft/onnxruntime/blob/master/docs/AddingCustomOp.md)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/tensorrt_custom_ops.md
================================================
## TensorRT Custom Ops

<!-- TOC -->

- [TensorRT Custom Ops](#tensorrt-custom-ops)
  - [MMCVRoIAlign](#mmcvroialign)
    - [Description](#description)
    - [Parameters](#parameters)
    - [Inputs](#inputs)
    - [Outputs](#outputs)
    - [Type Constraints](#type-constraints)
  - [ScatterND](#scatternd)
    - [Description](#description-1)
    - [Parameters](#parameters-1)
    - [Inputs](#inputs-1)
    - [Outputs](#outputs-1)
    - [Type Constraints](#type-constraints-1)
  - [NonMaxSuppression](#nonmaxsuppression)
    - [Description](#description-2)
    - [Parameters](#parameters-2)
    - [Inputs](#inputs-2)
    - [Outputs](#outputs-2)
    - [Type Constraints](#type-constraints-2)
  - [MMCVDeformConv2d](#mmcvdeformconv2d)
    - [Description](#description-3)
    - [Parameters](#parameters-3)
    - [Inputs](#inputs-3)
    - [Outputs](#outputs-3)
    - [Type Constraints](#type-constraints-3)
  - [grid_sampler](#grid_sampler)
    - [Description](#description-4)
    - [Parameters](#parameters-4)
    - [Inputs](#inputs-4)
    - [Outputs](#outputs-4)
    - [Type Constraints](#type-constraints-4)
  - [cummax](#cummax)
    - [Description](#description-5)
    - [Parameters](#parameters-5)
    - [Inputs](#inputs-5)
    - [Outputs](#outputs-5)
    - [Type Constraints](#type-constraints-5)
  - [cummin](#cummin)
    - [Description](#description-6)
    - [Parameters](#parameters-6)
    - [Inputs](#inputs-6)
    - [Outputs](#outputs-6)
    - [Type Constraints](#type-constraints-6)
  - [MMCVInstanceNormalization](#mmcvinstancenormalization)
    - [Description](#description-7)
    - [Parameters](#parameters-7)
    - [Inputs](#inputs-7)
    - [Outputs](#outputs-7)
    - [Type Constraints](#type-constraints-7)
  - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d)
    - [Description](#description-8)
    - [Parameters](#parameters-8)
    - [Inputs](#inputs-8)
    - [Outputs](#outputs-8)
    - [Type Constraints](#type-constraints-8)

<!-- TOC -->

### MMCVRoIAlign

#### Description

Perform RoIAlign on output feature, used in bbox_head of most two stage
detectors.

#### Parameters

| Type    | Parameter        | Description                                                                                                   |
| ------- | ---------------- | ------------------------------------------------------------------------------------------------------------- |
| `int`   | `output_height`  | height of output roi                                                                                          |
| `int`   | `output_width`   | width of output roi                                                                                           |
| `float` | `spatial_scale`  | used to scale the input boxes                                                                                 |
| `int`   | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. |
| `str`   | `mode`           | pooling mode in each bin. `avg` or `max`                                                                      |
| `int`   | `aligned`        | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly.         |

#### Inputs

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of inputs[0].</dd>
</dl>

#### Outputs

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element output[0][r-1] is a pooled feature map corresponding to the r-th RoI inputs[1][r-1].<dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)

### ScatterND

#### Description

ScatterND takes three inputs `data` tensor of rank r >= 1, `indices` tensor of rank q >= 1, and `updates` tensor of rank q + r - indices.shape[-1] - 1. The output of the operation is produced by creating a copy of the input `data`, and then updating its value to values specified by updates at specific index positions specified by `indices`. Its output shape is the same as the shape of `data`. Note that `indices` should not have duplicate entries. That is, two or more updates for the same index-location is not supported.

The `output` is calculated via the following equation:

```python
  output = np.copy(data)
  update_indices = indices.shape[:-1]
  for idx in np.ndindex(update_indices):
      output[indices[idx]] = updates[idx]
```

#### Parameters

None

#### Inputs

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>Tensor of rank r>=1.</dd>

<dt><tt>inputs[1]</tt>: tensor(int32, Linear)</dt>
<dd>Tensor of rank q>=1.</dd>

<dt><tt>inputs[2]</tt>: T</dt>
<dd>Tensor of rank q + r - indices_shape[-1] - 1.</dd>
</dl>

#### Outputs

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>Tensor of rank r >= 1.</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear), tensor(int32, Linear)

### NonMaxSuppression

#### Description

Filter out boxes has high IoU overlap with previously selected boxes or low score. Output the indices of valid boxes. Indices of invalid boxes will be filled with -1.

#### Parameters

| Type    | Parameter                    | Description                                                                                                                          |
| ------- | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ |
| `int`   | `center_point_box`           | 0 - the box data is supplied as [y1, x1, y2, x2], 1-the box data is supplied as [x_center, y_center, width, height].                 |
| `int`   | `max_output_boxes_per_class` | The maximum number of boxes to be selected per batch per class. Default to 0, number of output boxes equal to number of input boxes. |
| `float` | `iou_threshold`              | The threshold for deciding whether boxes overlap too much with respect to IoU. Value range [0, 1]. Default to 0.                     |
| `float` | `score_threshold`            | The threshold for deciding when to remove boxes based on score.                                                                      |
| `int`   | `offset`                     | 0 or 1, boxes' width or height is (x2 - x1 + offset).                                                                                |

#### Inputs

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>Input boxes. 3-D tensor of shape (num_batches, spatial_dimension, 4).</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>Input scores. 3-D tensor of shape (num_batches, num_classes, spatial_dimension).</dd>
</dl>

#### Outputs

<dl>
<dt><tt>outputs[0]</tt>: tensor(int32, Linear)</dt>
<dd>Selected indices. 2-D tensor of shape (num_selected_indices, 3) as [[batch_index, class_index, box_index], ...].</dd>
<dd>num_selected_indices=num_batches* num_classes* min(max_output_boxes_per_class, spatial_dimension).</dd>
<dd>All invalid indices will be filled with -1.</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)

### MMCVDeformConv2d

#### Description

Perform Deformable Convolution on input feature, read [Deformable Convolutional Network](https://arxiv.org/abs/1703.06211) for detail.

#### Parameters

| Type           | Parameter          | Description                                                                                                                       |
| -------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------- |
| `list of ints` | `stride`           | The stride of the convolving kernel. (sH, sW)                                                                                     |
| `list of ints` | `padding`          | Paddings on both sides of the input. (padH, padW)                                                                                 |
| `list of ints` | `dilation`         | The spacing between kernel elements. (dH, dW)                                                                                     |
| `int`          | `deformable_group` | Groups of deformable offset.                                                                                                      |
| `int`          | `group`            | Split input into groups. `input_channel` should be divisible by the number of groups.                                             |
| `int`          | `im2col_step`      | DeformableConv2d use im2col to compute convolution. im2col_step is used to split input and offset, reduce memory usage of column. |

#### Inputs

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.</dd>
<dt><tt>inputs[2]</tt>: T</dt>
<dd>Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).</dd>
</dl>

#### Outputs

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>Output feature; 4-D tensor of shape (N, output_channel, outH, outW).</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)

### grid_sampler

#### Description

Perform sample from `input` with pixel locations from `grid`.

#### Parameters

| Type  | Parameter            | Description                                                                                                                                                                                                                                                                                     |
| ----- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `int` | `interpolation_mode` | Interpolation mode to calculate output values. (0: `bilinear` , 1: `nearest`)                                                                                                                                                                                                                   |
| `int` | `padding_mode`       | Padding mode for outside grid values. (0: `zeros`, 1: `border`, 2: `reflection`)                                                                                                                                                                                                                |
| `int` | `align_corners`      | If `align_corners=1`, the extrema (`-1` and `1`) are considered as referring to the center points of the input's corner pixels. If `align_corners=0`, they are instead considered as referring to the corner points of the input's corner pixels, making the sampling more resolution agnostic. |

#### Inputs

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>Input offset; 4-D tensor of shape (N, outH, outW, 2), where outH and outW is the height and width of offset and output. </dd>
</dl>

#### Outputs

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>Output feature; 4-D tensor of shape (N, C, outH, outW).</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)

### cummax

#### Description

Returns a namedtuple (`values`, `indices`) where `values` is the cumulative maximum of elements of `input` in the dimension `dim`. And `indices` is the index location of each maximum value found in the dimension `dim`.

#### Parameters

| Type  | Parameter | Description                             |
| ----- | --------- | --------------------------------------- |
| `int` | `dim`     | The dimension to do the operation over. |

#### Inputs

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>The input tensor.</dd>
</dl>

#### Outputs

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>Output values.</dd>
<dt><tt>outputs[1]</tt>: (int32, Linear)</dt>
<dd>Output indices.</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)

### cummin

#### Description

Returns a namedtuple (`values`, `indices`) where `values` is the cumulative minimum of elements of `input` in the dimension `dim`. And `indices` is the index location of each minimum value found in the dimension `dim`.

#### Parameters

| Type  | Parameter | Description                             |
| ----- | --------- | --------------------------------------- |
| `int` | `dim`     | The dimension to do the operation over. |

#### Inputs

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>The input tensor.</dd>
</dl>

#### Outputs

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>Output values.</dd>
<dt><tt>outputs[1]</tt>: (int32, Linear)</dt>
<dd>Output indices.</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)

### MMCVInstanceNormalization

#### Description

Carries out instance normalization as described in the paper https://arxiv.org/abs/1607.08022.

y = scale * (x - mean) / sqrt(variance + epsilon) + B, where mean and variance are computed per instance per channel.

#### Parameters

| Type    | Parameter | Description                                                          |
| ------- | --------- | -------------------------------------------------------------------- |
| `float` | `epsilon` | The epsilon value to use to avoid division by zero. Default is 1e-05 |

#### Inputs

<dl>
<dt><tt>input</tt>: T</dt>
<dd>Input data tensor from the previous operator; dimensions for image case are (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and the width of the data. For non image case, the dimensions are in the form of (N x C x D1 x D2 ... Dn), where N is the batch size.</dd>
<dt><tt>scale</tt>: T</dt>
<dd>The input 1-dimensional scale tensor of size C.</dd>
<dt><tt>B</tt>: T</dt>
<dd>The input 1-dimensional bias tensor of size C.</dd>
</dl>

#### Outputs

<dl>
<dt><tt>output</tt>: T</dt>
<dd>The output tensor of the same shape as input.</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)

### MMCVModulatedDeformConv2d

#### Description

Perform Modulated Deformable Convolution on input feature, read [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline) for detail.

#### Parameters

| Type           | Parameter          | Description                                                                           |
| -------------- | ------------------ | ------------------------------------------------------------------------------------- |
| `list of ints` | `stride`           | The stride of the convolving kernel. (sH, sW)                                         |
| `list of ints` | `padding`          | Paddings on both sides of the input. (padH, padW)                                     |
| `list of ints` | `dilation`         | The spacing between kernel elements. (dH, dW)                                         |
| `int`          | `deformable_group` | Groups of deformable offset.                                                          |
| `int`          | `group`            | Split input into groups. `input_channel` should be divisible by the number of groups. |

#### Inputs

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.</dd>
<dt><tt>inputs[2]</tt>: T</dt>
<dd>Input mask; 4-D tensor of shape (N, deformable_group* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.</dd>
<dt><tt>inputs[3]</tt>: T</dt>
<dd>Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).</dd>
<dt><tt>inputs[4]</tt>: T, optional</dt>
<dd>Input weight; 1-D tensor of shape (output_channel).</dd>
</dl>

#### Outputs

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>Output feature; 4-D tensor of shape (N, output_channel, outH, outW).</dd>
</dl>

#### Type Constraints

- T:tensor(float32, Linear)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/deployment/tensorrt_plugin.md
================================================
## TensorRT Deployment

<!-- TOC -->

- [TensorRT Deployment](#tensorrt-deployment)
  - [Introduction](#introduction)
  - [List of TensorRT plugins supported in MMCV](#list-of-tensorrt-plugins-supported-in-mmcv)
  - [How to build TensorRT plugins in MMCV](#how-to-build-tensorrt-plugins-in-mmcv)
    - [Prerequisite](#prerequisite)
    - [Build on Linux](#build-on-linux)
  - [Create TensorRT engine and run inference in python](#create-tensorrt-engine-and-run-inference-in-python)
  - [How to add a TensorRT plugin for custom op in MMCV](#how-to-add-a-tensorrt-plugin-for-custom-op-in-mmcv)
    - [Main procedures](#main-procedures)
    - [Reminders](#reminders)
  - [Known Issues](#known-issues)
  - [References](#references)

<!-- TOC -->

### Introduction

**NVIDIA TensorRT** is a software development kit(SDK) for high-performance inference of deep learning models. It includes a deep learning inference optimizer and runtime that delivers low latency and high-throughput for deep learning inference applications. Please check its [developer's website](https://developer.nvidia.com/tensorrt) for more information.
To ease the deployment of trained models with custom operators from `mmcv.ops` using TensorRT, a series of TensorRT plugins are included in MMCV.

### List of TensorRT plugins supported in MMCV

| ONNX Operator             | TensorRT Plugin                                                                 | MMCV Releases |
|:--------------------------|:--------------------------------------------------------------------------------|:-------------:|
| MMCVRoiAlign              | [MMCVRoiAlign](./tensorrt_custom_ops.md#mmcvroialign)                           |     1.2.6     |
| ScatterND                 | [ScatterND](./tensorrt_custom_ops.md#scatternd)                                 |     1.2.6     |
| NonMaxSuppression         | [NonMaxSuppression](./tensorrt_custom_ops.md#nonmaxsuppression)                 |     1.3.0     |
| MMCVDeformConv2d          | [MMCVDeformConv2d](./tensorrt_custom_ops.md#mmcvdeformconv2d)                   |     1.3.0     |
| grid_sampler              | [grid_sampler](./tensorrt_custom_ops.md#grid-sampler)                           |     1.3.1     |
| cummax                    | [cummax](./tensorrt_custom_ops.md#cummax)                                       |     1.3.5     |
| cummin                    | [cummin](./tensorrt_custom_ops.md#cummin)                                       |     1.3.5     |
| MMCVInstanceNormalization | [MMCVInstanceNormalization](./tensorrt_custom_ops.md#mmcvinstancenormalization) |     1.3.5     |
| MMCVModulatedDeformConv2d | [MMCVModulatedDeformConv2d](./tensorrt_custom_ops.md#mmcvmodulateddeformconv2d) |     1.3.8     |

Notes

- All plugins listed above are developed on TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0

### How to build TensorRT plugins in MMCV

#### Prerequisite

- Clone repository

```bash
git clone https://github.com/open-mmlab/mmcv.git
```

- Install TensorRT

Download the corresponding TensorRT build from [NVIDIA Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-download).

For example, for Ubuntu 16.04 on x86-64 with cuda-10.2, the downloaded file is `TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz`.

Then, install as below:

```bash
cd ~/Downloads
tar -xvzf TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz
export TENSORRT_DIR=`pwd`/TensorRT-7.2.1.6
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TENSORRT_DIR/lib
```

Install python packages: tensorrt, graphsurgeon, onnx-graphsurgeon

```bash
pip install $TENSORRT_DIR/python/tensorrt-7.2.1.6-cp37-none-linux_x86_64.whl
pip install $TENSORRT_DIR/onnx_graphsurgeon/onnx_graphsurgeon-0.2.6-py2.py3-none-any.whl
pip install $TENSORRT_DIR/graphsurgeon/graphsurgeon-0.4.5-py2.py3-none-any.whl
```

For more detailed information of installing TensorRT using tar, please refer to [Nvidia' website](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-721/install-guide/index.html#installing-tar).

#### Build on Linux

```bash
cd mmcv ## to MMCV root directory
MMCV_WITH_OPS=1 MMCV_WITH_TRT=1 pip install -e .
```

### Create TensorRT engine and run inference in python

Here is an example.

```python
import torch
import onnx

from mmcv.tensorrt import (TRTWrapper, onnx2trt, save_trt_engine,
                                   is_tensorrt_plugin_loaded)

assert is_tensorrt_plugin_loaded(), 'Requires to complie TensorRT plugins in mmcv'

onnx_file = 'sample.onnx'
trt_file = 'sample.trt'
onnx_model = onnx.load(onnx_file)

## Model input
inputs = torch.rand(1, 3, 224, 224).cuda()
## Model input shape info
opt_shape_dict = {
    'input': [list(inputs.shape),
              list(inputs.shape),
              list(inputs.shape)]
}

## Create TensorRT engine
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
    onnx_model,
    opt_shape_dict,
    max_workspace_size=max_workspace_size)

## Save TensorRT engine
save_trt_engine(trt_engine, trt_file)

## Run inference with TensorRT
trt_model = TRTWrapper(trt_file, ['input'], ['output'])

with torch.no_grad():
    trt_outputs = trt_model({'input': inputs})
    output = trt_outputs['output']

```

### How to add a TensorRT plugin for custom op in MMCV

#### Main procedures

Below are the main steps:

1. Add c++ header file
2. Add c++ source file
3. Add cuda kernel file
4. Register plugin in `trt_plugin.cpp`
5. Add unit test in `tests/test_ops/test_tensorrt.py`

**Take RoIAlign plugin `roi_align` for example.**

1. Add header `trt_roi_align.hpp` to TensorRT include directory `mmcv/ops/csrc/tensorrt/`
2. Add source `trt_roi_align.cpp` to TensorRT source directory `mmcv/ops/csrc/tensorrt/plugins/`
3. Add cuda kernel `trt_roi_align_kernel.cu` to TensorRT source directory `mmcv/ops/csrc/tensorrt/plugins/`
4. Register `roi_align` plugin in [trt_plugin.cpp](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp)

    ```c++
    #include "trt_plugin.hpp"

    #include "trt_roi_align.hpp"

    REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator);

    extern "C" {
    bool initLibMMCVInferPlugins() { return true; }
    }  // extern "C"
    ```

5. Add unit test into `tests/test_ops/test_tensorrt.py`
   Check [here](https://github.com/open-mmlab/mmcv/blob/master/tests/test_ops/test_tensorrt.py) for examples.

#### Reminders

- *Please note that this feature is experimental and may change in the future. Strongly suggest users always try with the latest master branch.*

- Some of the [custom ops](https://mmcv.readthedocs.io/en/latest/ops.html) in `mmcv` have their cuda implementations, which could be referred.

### Known Issues

- None

### References

- [Developer guide of Nvidia TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html)
- [TensorRT Open Source Software](https://github.com/NVIDIA/TensorRT)
- [onnx-tensorrt](https://github.com/onnx/onnx-tensorrt)
- [TensorRT python API](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html)
- [TensorRT c++ plugin API](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_plugin.html)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/faq.md
================================================
## Frequently Asked Questions

We list some common troubles faced by many users and their corresponding solutions here.
Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them.

### Installation

- KeyError: "xxx: 'yyy is not in the zzz registry'"

    The registry mechanism will be triggered only when the file of the module is imported.
    So you need to import that file somewhere. More details can be found at https://github.com/open-mmlab/mmdetection/issues/5974.

- "No module named 'mmcv.ops'"; "No module named 'mmcv._ext'"

    1. Uninstall existing mmcv in the environment using `pip uninstall mmcv`
    2. Install mmcv-full following the [installation instruction](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) or [Build MMCV from source](https://mmcv.readthedocs.io/en/latest/get_started/build.html)

- "invalid device function" or "no kernel image is available for execution"

    1. Check the CUDA compute capability of you GPU
    2. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built for the correct GPU architecture. You may need to set `TORCH_CUDA_ARCH_LIST` to reinstall MMCV. The compatibility issue could happen when  using old GPUS, e.g., Tesla K80 (3.7) on colab.
    3. Check whether the running environment is the same as that when mmcv/mmdet is compiled. For example, you may compile mmcv using CUDA 10.0 bug run it on CUDA9.0 environments

- "undefined symbol" or "cannot open xxx.so"

    1. If those symbols are CUDA/C++ symbols (e.g., libcudart.so or GLIBCXX), check
       whether the CUDA/GCC runtimes are the same as those used for compiling mmcv
    2. If those symbols are Pytorch symbols (e.g., symbols containing caffe, aten, and TH), check whether the Pytorch version is the same as that used for compiling mmcv
    3. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built by and running on the same environment

- "RuntimeError: CUDA error: invalid configuration argument"

    This error may be caused by the poor performance of GPU. Try to decrease the value of [THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10)
    and recompile mmcv.

- "RuntimeError: nms is not compiled with GPU support"

    This error is because your CUDA environment is not installed correctly.
    You may try to re-install your CUDA environment and then delete the build/ folder before re-compile mmcv.

- "Segmentation fault"

    1. Check your GCC version and use GCC >= 5.4. This usually caused by the incompatibility between PyTorch and the environment (e.g., GCC < 4.9 for PyTorch). We also recommend the users to avoid using GCC 5.5 because many feedbacks report that GCC 5.5 will cause "segmentation fault" and simply changing it to GCC 5.4 could solve the problem
    2. Check whether PyTorch is correctly installed and could use CUDA op, e.g. type the following command in your terminal and see whether they could correctly output results
        ```shell
        python -c 'import torch; print(torch.cuda.is_available())'
        ```
    3. If PyTorch is correctly installed, check whether MMCV is correctly installed. If MMCV is correctly installed, then there will be no issue of the command
        ```shell
        python -c 'import mmcv; import mmcv.ops'
        ```
    4. If MMCV and PyTorch are correctly installed, you can use `ipdb` to set breakpoints or directly add `print` to debug and see which part leads the `segmentation fault`

- "libtorch_cuda_cu.so: cannot open shared object file"

    `mmcv-full` depends on the share object but it can not be found. We can check whether the object exists in `~/miniconda3/envs/{environment-name}/lib/python3.7/site-packages/torch/lib` or try to re-install the PyTorch.

- "fatal error C1189: #error:  -- unsupported Microsoft Visual Studio version!"

  If you are building mmcv-full on Windows and the version of CUDA is 9.2, you will probably encounter the error `"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include\crt/host_config.h(133): fatal error C1189: #error:  -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!"`, in which case you can use a lower version of Microsoft Visual Studio like vs2017.

- "error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized"

  If your version of PyTorch is 1.5.0 and you are building mmcv-full on Windows, you will probably encounter the error `- torch/csrc/jit/api/module.h(474): error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized`. The way to solve the error is to replace all the `static constexpr bool all_slots = false;` with `static bool all_slots = false;` at this file `https://github.com/pytorch/pytorch/blob/v1.5.0/torch/csrc/jit/api/module.h`. More details can be found at https://github.com/pytorch/pytorch/issues/39394.

- "error: a member with an in-class initializer must be const"

  If your version of PyTorch is 1.6.0 and you are building mmcv-full on Windows, you will probably encounter the error `"- torch/include\torch/csrc/jit/api/module.h(483): error: a member with an in-class initializer must be const"`. The way to solve the error is to replace all the `CONSTEXPR_EXCEPT_WIN_CUDA ` with `const` at `torch/include\torch/csrc/jit/api/module.h`. More details can be found at https://github.com/open-mmlab/mmcv/issues/575.

- "error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized"

  If your version of PyTorch is 1.7.0 and you are building mmcv-full on Windows, you will probably encounter the error `torch/include\torch/csrc/jit/ir/ir.h(1347): error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized`. The way to solve the error needs to modify several local files of PyTorch:

  - delete `static constexpr Symbol Kind = ::c10::prim::profile;` and `tatic constexpr Symbol Kind = ::c10::prim::profile_optional;` at `torch/include\torch/csrc/jit/ir/ir.h`
  - replace `explicit operator type&() { return *(this->value); }` with `explicit operator type&() { return *((type*)this->value); }` at `torch\include\pybind11\cast.h`
  - replace all the `CONSTEXPR_EXCEPT_WIN_CUDA` with `const` at `torch/include\torch/csrc/jit/api/module.h`

- Compatibility issue between MMCV and MMDetection; "ConvWS is already registered in conv layer"

    Please install the correct version of MMCV for the version of your MMDetection following the [installation instruction](https://mmdetection.readthedocs.io/en/latest/get_started.html#installation). More details can be found at https://github.com/pytorch/pytorch/pull/45956.

### Usage

- "RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one"

    1. This error indicates that your module has parameters that were not used in producing loss. This phenomenon may be caused by running different branches in your code in DDP mode. More datails at https://github.com/pytorch/pytorch/issues/55582
    2. You can set ` find_unused_parameters = True` in the config to solve the above problems or find those unused parameters manually

- "RuntimeError: Trying to backward through the graph a second time"

   `GradientCumulativeOptimizerHook` and `OptimizerHook` are both set which causes the `loss.backward()` to be called twice so `RuntimeError` was raised. We can only use one of these. More datails at https://github.com/open-mmlab/mmcv/issues/1379.


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/get_started/build.md
================================================
## Build MMCV from source

### Build on Linux or macOS

After cloning the repo with

```bash
git clone https://github.com/open-mmlab/mmcv.git
cd mmcv
```

It is recommended to install `ninja` to speed up the compilation

```bash
pip install -r requirements/optional.txt
```

You can either

- install the lite version

  ```bash
  pip install -e .
  ```

- install the full version

  ```bash
  MMCV_WITH_OPS=1 pip install -e .
  ```

If you are on macOS, add the following environment variables before the installing command.

```bash
CC=clang CXX=clang++ CFLAGS='-stdlib=libc++'
```

e.g.,

```bash
CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' MMCV_WITH_OPS=1 pip install -e .
```

```{note}
If you would like to use `opencv-python-headless` instead of `opencv-python`,
e.g., in a minimum container environment or servers without GUI,
you can first install it before installing MMCV to skip the installation of `opencv-python`.
```
### Build on Windows

Building MMCV on Windows is a bit more complicated than that on Linux.
The following instructions show how to get this accomplished.

#### Prerequisite

The following software is required for building MMCV on windows.
Install them first.

- [Git](https://git-scm.com/download/win)
  - During installation, tick **add git to Path**.
- [Visual Studio Community 2019](https://visualstudio.microsoft.com)
  - A compiler for C++ and CUDA codes.
- [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
  - Official distributions of Python should work too.
- [CUDA 10.2](https://developer.nvidia.com/cuda-10.2-download-archive)
  - Not required for building CPU version.
  - Customize the installation if necessary. As a recommendation, skip the driver installation if a newer version is already installed.

```{note}
You should know how to set up environment variables, especially `Path`, on Windows. The following instruction relies heavily on this skill.
```

#### Setup Python Environment

1. Launch Anaconda prompt from Windows Start menu

    Do not use raw `cmd.exe` s instruction is based on PowerShell syntax.

2. Create a new conda environment

    ```shell
    conda create --name mmcv python=3.7  # 3.6, 3.7, 3.8 should work too as tested
    conda activate mmcv  # make sure to activate environment before any operation
    ```

3. Install PyTorch. Choose a version based on your need.

    ```shell
    conda install pytorch torchvision cudatoolkit=10.2 -c pytorch
    ```

    We only tested PyTorch version >= 1.6.0.

4. Prepare MMCV source code

    ```shell
    git clone https://github.com/open-mmlab/mmcv.git
    cd mmcv
    ```

5. Install required Python packages

    ```shell
    pip3 install -r requirements/runtime.txt
    ```

6. It is recommended to install `ninja` to speed up the compilation

    ```bash
    pip install -r requirements/optional.txt
    ```

#### Build and install MMCV

MMCV can be built in three ways:

1. Lite version (without ops)

   In this way, no custom ops are compiled and mmcv is a pure python package.

2. Full version (CPU ops)

   Module `ops` will be compiled as a pytorch extension, but only x86 code will be compiled. The compiled ops can be executed on CPU only.

3. Full version (CUDA ops)

   Both x86 and CUDA codes of `ops` module will be compiled. The compiled version can be run on both CPU and CUDA-enabled GPU (if implemented).

##### Common steps

1. Set up MSVC compiler

    Set Environment variable, add `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.27.29110\bin\Hostx86\x64` to `PATH`, so that `cl.exe` will be available in prompt, as shown below.

    ```none
    (base) PS C:\Users\xxx> cl
    Microsoft (R) C/C++ Optimizing  Compiler Version 19.27.29111 for x64
    Copyright (C) Microsoft Corporation.   All rights reserved.

    usage: cl [ option... ] filename... [ / link linkoption... ]
    ```

    For compatibility, we use the x86-hosted and x64-targeted compiler. note `Hostx86\x64` in the path.

    You may want to change the system language to English because pytorch will parse text output from `cl.exe` to check its version. However only utf-8 is recognized. Navigate to Control Panel -> Region -> Administrative -> Language for Non-Unicode programs and change it to English.

##### Option 1: Build MMCV (lite version)

After finishing above common steps, launch Anaconda shell from Start menu and issue the following commands:

```shell
# activate environment
conda activate mmcv
# change directory
cd mmcv
# install
python setup.py develop
# check
pip list
```

##### Option 2: Build MMCV (full version with CPU)

1. Finish above common steps
2. Set up environment variables

    ```shell
    $env:MMCV_WITH_OPS = 1
    $env:MAX_JOBS = 8  # based on your available number of CPU cores and amount of memory
    ```

3. Following build steps of the lite version

    ```shell
    # activate environment
    conda activate mmcv
    # change directory
    cd mmcv
    # build
    python setup.py build_ext # if success, cl will be launched to compile ops
    # install
    python setup.py develop
    # check
    pip list
    ```

##### Option 3: Build MMCV (full version with CUDA)

1. Finish above common steps
2. Make sure `CUDA_PATH` or `CUDA_HOME` is already set in `envs` via `ls env:`, desired output is shown as below:

   ```none
   (base) PS C:\Users\WRH> ls env:

   Name                           Value
   ----                           -----
   <... omit some lines ...>
   CUDA_PATH                      C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2
   CUDA_PATH_V10_1                C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1
   CUDA_PATH_V10_2                C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2
   <... omit some lines ...>
   ```

   This should already be done by CUDA installer. If not, or you have multiple version of CUDA toolkit installed, set it with

   ```shell
   $env:CUDA_HOME = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2"
   # OR
   $env:CUDA_HOME = $env:CUDA_PATH_V10_2 # if CUDA_PATH_V10_2 is in envs:
   ```

3. Set CUDA target arch

   ```shell
   # Suppose you are using GTX 1080, which is of capability 6.1
   $env:TORCH_CUDA_ARCH_LIST="6.1"
   # OR build all supported arch, will be slow
   $env:TORCH_CUDA_ARCH_LIST="3.5 3.7 5.0 5.2 6.0 6.1 7.0 7.5"
   ```

```{note}
Check your the compute capability of your GPU from [here](https://developer.nvidia.com/cuda-gpus).
```

4. Launch compiling the same way as CPU

   ```shell
   $env:MMCV_WITH_OPS = 1
   $env:MAX_JOBS = 8  # based on available number of CPU cores and amount of memory
   # activate environment
   conda activate mmcv
   # change directory
   cd mmcv
   # build
   python setup.py build_ext # if success, cl will be launched to compile ops
   # install
   python setup.py develop
   # check
   pip list
   ```

```{note}
If you are compiling against PyTorch 1.6.0, you might meet some errors from PyTorch as described in [this issue](https://github.com/pytorch/pytorch/issues/42467). Follow [this pull request](https://github.com/pytorch/pytorch/pull/43380/files) to modify the source code in your local PyTorch installation.
```

If you meet issues when running or compiling mmcv, we list some common issues in [Frequently Asked Question](../faq.html).


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/get_started/installation.md
================================================
## Installation

There are two versions of MMCV:

- **mmcv-full**: comprehensive, with full features and various CUDA ops out of box. It takes longer time to build.
- **mmcv**: lite, without CUDA ops but all other features, similar to mmcv<1.0.0. It is useful when you do not need those CUDA ops.

```{warning}
Do not install both versions in the same environment, otherwise you may encounter errors like `ModuleNotFound`. You need to uninstall one before installing the other. `Installing the full version is highly recommended if CUDA is avaliable`.
```

a. Install the full version.

Before installing mmcv-full, make sure that PyTorch has been successfully installed following the [official guide](https://pytorch.org/).

We provide pre-built mmcv packages (recommended) with different PyTorch and CUDA versions to simplify the building. In addition, you can run [check_installation.py](.dev_scripts/check_installation.py) to check the installation of mmcv-full after running the installation commands.

i. Install the latest version.

The rule for installing the latest ``mmcv-full`` is as follows:

```shell
pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
```

Please replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired one. For example,
to install the latest ``mmcv-full`` with ``CUDA 11.1`` and ``PyTorch 1.9.0``, use the following command:

```shell
pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
```

For more details, please refer the the following tables and delete ``=={mmcv_version}``.

ii. Install a specified version.

The rule for installing a specified ``mmcv-full`` is as follows:

```shell
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
```

First of all, please refer to the Releases and replace ``{mmcv_version}`` a specified one. e.g. ``1.3.9``.
Then replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired versions. For example,
to install ``mmcv-full==1.3.9`` with ``CUDA 11.1`` and ``PyTorch 1.9.0``, use the following command:

```shell
pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
```

```{note}
mmcv-full is only compiled on PyTorch 1.x.0 because the compatibility
usually holds between 1.x.0 and 1.x.1. If your PyTorch version is 1.x.1, you
can install mmcv-full compiled with PyTorch 1.x.0 and it usually works well.
For example, if your PyTorch version is 1.8.1 and CUDA version is 11.1, you
can use the following command to install mmcv-full.

`pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html`
```

For more details, please refer the the following tables.

<table class="docutils">
  <tbody>
    <tr>
      <th width="80"> CUDA </th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.10</th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.9</th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.8</th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.7</th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.6</th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.5</th>
    </tr>
    <tr>
      <td align="left">11.3</td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"></td>
      <td align="left"></code></pre> </details> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
    </tr>
    <tr>
      <td align="left">11.1</td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
    </tr>
    <tr>
      <td align="left">11.0</td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"> </td>
      <td align="left"> </td>
    </tr>
    <tr>
      <td align="left">10.2</td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
    <tr>
      <td align="left">10.1</td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
    <tr>
      <td align="left">9.2</td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
    <tr>
      <td align="left">cpu</td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> install </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
  </tbody>
</table>

```{note}
The pre-built packages provided above do not include all versions of mmcv-full, you can click on the corresponding links to see the supported versions. For example, if you click [cu102-torch1.8.0](https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html), you can see that `cu102-torch1.8.0` only provides 1.3.0 and above versions of mmcv-full. In addition, We no longer provide `mmcv-full` pre-built packages compiled with `PyTorch 1.3 & 1.4` since v1.3.17. You can find previous versions that compiled with PyTorch 1.3 & 1.4 [here](./previous_versions.md). The compatibility is still ensured in our CI, but we will discard the support of PyTorch 1.3 & 1.4 next year.
```

Another way is to compile locally by running

```python
pip install mmcv-full
```

Note that the local compiling may take up to 10 mins.

b. Install the lite version.

```python
pip install mmcv
```

c. Install full version with custom operators for onnxruntime

- Check [here](https://mmcv.readthedocs.io/en/latest/deployment/onnxruntime_custom_ops.html) for detailed instruction.

If you would like to build MMCV from source, please refer to the [guide](https://mmcv.readthedocs.io/en/latest/get_started/build.html).


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/get_started/introduction.md
================================================
## Introduction

MMCV is a foundational library for computer vision research and supports many
research projects as below:

- [MIM](https://github.com/open-mmlab/mim): MIM Installs OpenMMLab Packages.
- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark.
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark.
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection.
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark.
- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark.
- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark.
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
- [MMOCR](https://github.com/open-mmlab/mmocr): A Comprehensive Toolbox for Text Detection, Recognition and Understanding.
- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark.
- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab FewShot Learning Toolbox and Benchmark.
- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark.
- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning Toolbox and Benchmark.
- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab Model Compression Toolbox and Benchmark.
- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab Model Deployment Framework.

It provides the following functionalities.

- Universal IO APIs
- Image/Video processing
- Image and annotation visualization
- Useful utilities (progress bar, timer, ...)
- PyTorch runner with hooking mechanism
- Various CNN architectures
- High-quality implementation of common CUDA ops

```{note}
MMCV requires Python 3.6+.
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/get_started/previous_versions.md
================================================
## OTHER VERSIONS OF PYTORCH BUILT FOR MMCV-FULL

We no longer provide `mmcv-full` packages compiled under lower versions of `PyTorch`, but for your convenience, you can find them below.

### PyTorch 1.4

| 1.0.0 <= mmcv_version <= 1.2.1

#### CUDA 10.1

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.4.0/index.html
```

#### CUDA 9.2

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.4.0/index.html
```

#### CPU

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.4.0/index.html
```

### PyTorch v1.3

| 1.0.0 <= mmcv_version <= 1.3.16

#### CUDA 10.1

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.3.0/index.html
```

#### CUDA 9.2

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.3.0/index.html
```

#### CPU

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.3.0/index.html
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/index.rst
================================================
Welcome to MMCV's documentation!
================================

You can switch between Chinese and English documents in the lower-left corner of the layout.

.. toctree::
   :maxdepth: 2
   :caption: Get Started

   get_started/introduction.md
   get_started/installation.md
   get_started/build.md

.. toctree::
   :maxdepth: 2
   :caption: Understand MMCV

   understand_mmcv/config.md
   understand_mmcv/registry.md
   understand_mmcv/runner.md
   understand_mmcv/io.md
   understand_mmcv/data_process.md
   understand_mmcv/visualization.md
   understand_mmcv/cnn.md
   understand_mmcv/ops.md
   understand_mmcv/utils.md

.. toctree::
   :maxdepth: 2
   :caption: Deployment

   deployment/mmcv_ops_definition.md
   deployment/onnx.md
   deployment/onnxruntime_op.md
   deployment/tensorrt_plugin.md

.. toctree::
   :maxdepth: 2
   :caption: Compatibility

   compatibility.md

.. toctree::
   :maxdepth: 2
   :caption: FAQ

   faq.md

.. toctree::
   :maxdepth: 2
   :caption: Community

   community/contributing.md
   community/pr.md

.. toctree::
   :maxdepth: 2
   :caption: API Reference

   api.rst

Indices and tables
==================

* :ref:`genindex`
* :ref:`search`


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/cnn.md
================================================
## CNN

We provide some building bricks for CNNs, including layer building, module bundles and weight initialization.

### Layer building

We may need to try different layers of the same type when running experiments,
but do not want to modify the code from time to time.
Here we provide some layer building methods to construct layers from a dict,
which can be written in configs or specified via command line arguments.

#### Usage

A simplest example is

```python
cfg = dict(type='Conv3d')
layer = build_conv_layer(cfg, in_channels=3, out_channels=8, kernel_size=3)
```

- `build_conv_layer`: Supported types are Conv1d, Conv2d, Conv3d, Conv (alias for Conv2d).
- `build_norm_layer`: Supported types are BN1d, BN2d, BN3d, BN (alias for BN2d), SyncBN, GN, LN, IN1d, IN2d, IN3d, IN (alias for IN2d).
- `build_activation_layer`: Supported types are ReLU, LeakyReLU, PReLU, RReLU, ReLU6, ELU, Sigmoid, Tanh, GELU.
- `build_upsample_layer`: Supported types are nearest, bilinear, deconv, pixel_shuffle.
- `build_padding_layer`: Supported types are zero, reflect, replicate.

#### Extension

We also allow extending the building methods with custom layers and operators.

1. Write and register your own module.

    ```python
    from mmcv.cnn import UPSAMPLE_LAYERS

    @UPSAMPLE_LAYERS.register_module()
    class MyUpsample:

        def __init__(self, scale_factor):
            pass

        def forward(self, x):
            pass
    ```

2. Import `MyUpsample` somewhere (e.g., in `__init__.py`) and then use it.

    ```python
    cfg = dict(type='MyUpsample', scale_factor=2)
    layer = build_upsample_layer(cfg)
    ```

### Module bundles

We also provide common module bundles to facilitate the network construction.
`ConvModule` is a bundle of convolution, normalization and activation layers,
please refer to the [api](api.html#mmcv.cnn.ConvModule) for details.

```python
# conv + bn + relu
conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
# conv + gn + relu
conv = ConvModule(3, 8, 2, norm_cfg=dict(type='GN', num_groups=2))
# conv + relu
conv = ConvModule(3, 8, 2)
# conv
conv = ConvModule(3, 8, 2, act_cfg=None)
# conv + leaky relu
conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU'))
# bn + conv + relu
conv = ConvModule(
    3, 8, 2, norm_cfg=dict(type='BN'), order=('norm', 'conv', 'act'))
```

### Weight initialization

> Implementation details are available at [mmcv/cnn/utils/weight_init.py](../../mmcv/cnn/utils/weight_init.py)

During training, a proper initialization strategy is beneficial to speed up the
training or obtain a higher performance. In MMCV, we provide some commonly used
methods for initializing modules like `nn.Conv2d`. Of course, we also provide
high-level APIs for initializing models containing one or more
modules.

#### Initialization functions

Initialize a `nn.Module` such as `nn.Conv2d`, `nn.Linear` in a functional way.

We provide the following initialization methods.

- constant_init

  Initialize module parameters with constant values.

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import constant_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # constant_init(module, val, bias=0)
    >>> constant_init(conv1, 1, 0)
    >>> conv1.weight
    ```

- xavier_init

  Initialize module parameters with values according to the method
  described in [Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010)](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf)

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import xavier_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # xavier_init(module, gain=1, bias=0, distribution='normal')
    >>> xavier_init(conv1, distribution='normal')
    ```

- normal_init

  Initialize module parameters with the values drawn from a normal distribution.

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import normal_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # normal_init(module, mean=0, std=1, bias=0)
    >>> normal_init(conv1, std=0.01, bias=0)
    ```

- uniform_init

  Initialize module parameters with values drawn from a uniform distribution.

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import uniform_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # uniform_init(module, a=0, b=1, bias=0)
    >>> uniform_init(conv1, a=0, b=1)
    ```

- kaiming_init

  Initialize module parameters with the values according to the method
  described in [Delving deep into rectifiers: Surpassing human-level
  performance on ImageNet classification - He, K. et al. (2015)](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf)

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import kaiming_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # kaiming_init(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal')
    >>> kaiming_init(conv1)
    ```

- caffe2_xavier_init

  The xavier initialization is implemented in caffe2, which corresponds to `kaiming_uniform_` in PyTorch.

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import caffe2_xavier_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # caffe2_xavier_init(module, bias=0)
    >>> caffe2_xavier_init(conv1)
    ```

- bias_init_with_prob

  Initialize conv/fc bias value according to a given probability, as proposed in [Focal Loss for Dense Object Detection](https://arxiv.org/pdf/1708.02002.pdf).

    ```python
    >>> from mmcv.cnn import bias_init_with_prob
    >>> # bias_init_with_prob is proposed in Focal Loss
    >>> bias = bias_init_with_prob(0.01)
    >>> bias
    -4.59511985013459
    ```

#### Initializers and configs

On the basis of the initialization methods, we define the corresponding initialization classes and register them to `INITIALIZERS`, so we can
use the configuration to initialize the model.

We provide the following initialization classes.

- ConstantInit
- XavierInit
- NormalInit
- UniformInit
- KaimingInit
- Caffe2XavierInit
- PretrainedInit

Let us introduce the usage of `initialize` in detail.

1. Initialize model by `layer` key

    If we only define `layer`, it just initialize the layer in `layer` key.

    NOTE: Value of `layer` key is the class name with attributes weights and bias of Pytorch, so `MultiheadAttention layer` is not supported.

- Define `layer` key for initializing module with same configuration.

  ```python
  import torch.nn as nn
  from mmcv.cnn import initialize

  class FooNet(nn.Module):
      def __init__(self):
          super().__init__()
          self.feat = nn.Conv1d(3, 1, 3)
          self.reg = nn.Conv2d(3, 3, 3)
          self.cls = nn.Linear(1, 2)

  model = FooNet()
  init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d', 'Linear'], val=1)
  # initialize whole module with same configuration
  initialize(model, init_cfg)
  # model.feat.weight
  # Parameter containing:
  # tensor([[[1., 1., 1.],
  #          [1., 1., 1.],
  #          [1., 1., 1.]]], requires_grad=True)
  ```

- Define `layer` key for initializing layer with different configurations.

  ```python
  import torch.nn as nn
  from mmcv.cnn.utils import initialize

  class FooNet(nn.Module):
      def __init__(self):
          super().__init__()
          self.feat = nn.Conv1d(3, 1, 3)
          self.reg = nn.Conv2d(3, 3, 3)
          self.cls = nn.Linear(1,2)

  model = FooNet()
  init_cfg = [dict(type='Constant', layer='Conv1d', val=1),
              dict(type='Constant', layer='Conv2d', val=2),
              dict(type='Constant', layer='Linear', val=3)]
  # nn.Conv1d will be initialized with dict(type='Constant', val=1)
  # nn.Conv2d will be initialized with dict(type='Constant', val=2)
  # nn.Linear will be initialized with dict(type='Constant', val=3)
  initialize(model, init_cfg)
  # model.reg.weight
  # Parameter containing:
  # tensor([[[[2., 2., 2.],
  #           [2., 2., 2.],
  #           [2., 2., 2.]],
  #          ...,
  #          [[2., 2., 2.],
  #           [2., 2., 2.],
  #           [2., 2., 2.]]]], requires_grad=True)
  ```

2. Initialize model by `override` key

- When initializing some specific part with its attribute name, we can use `override` key, and the value in `override` will ignore the value in init_cfg.

    ```python
    import torch.nn as nn
    from mmcv.cnn import initialize

    class FooNet(nn.Module):
        def __init__(self):
            super().__init__()
            self.feat = nn.Conv1d(3, 1, 3)
            self.reg = nn.Conv2d(3, 3, 3)
            self.cls = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2))

    # if we would like to initialize model's weights as 1 and bias as 2
    # but weight in `cls` as 3 and bias 4, we can use override key
    model = FooNet()
    init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], val=1, bias=2,
                    override=dict(type='Constant', name='reg', val=3, bias=4))
    # self.feat and self.cls will be initialized with dict(type='Constant', val=1, bias=2)
    # The module called 'reg' will be initialized with dict(type='Constant', val=3, bias=4)
    initialize(model, init_cfg)
    # model.reg.weight
    # Parameter containing:
    # tensor([[[[3., 3., 3.],
    #           [3., 3., 3.],
    #           [3., 3., 3.]],
    #           ...,
    #           [[3., 3., 3.],
    #            [3., 3., 3.],
    #            [3., 3., 3.]]]], requires_grad=True)
    ```

- If `layer` is None in init_cfg, only sub-module with the name in override will be initialized, and type and other args in override can be omitted.

    ```python
    model = FooNet()
    init_cfg = dict(type='Constant', val=1, bias=2, override=dict(name='reg'))
    # self.feat and self.cls will be initialized by Pytorch
    # The module called 'reg' will be initialized with dict(type='Constant', val=1, bias=2)
    initialize(model, init_cfg)
    # model.reg.weight
    # Parameter containing:
    # tensor([[[[1., 1., 1.],
    #           [1., 1., 1.],
    #           [1., 1., 1.]],
    #           ...,
    #           [[1., 1., 1.],
    #            [1., 1., 1.],
    #            [1., 1., 1.]]]], requires_grad=True)
    ```

- If we don't define `layer` key or `override` key, it will not initialize anything.

- Invalid usage

   ```python
   # It is invalid that override don't have name key
   init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'],
                   val=1, bias=2,
                   override=dict(type='Constant', val=3, bias=4))

   # It is also invalid that override has name and other args except type
   init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'],
                   val=1, bias=2,
                   override=dict(name='reg', val=3, bias=4))
   ```

3. Initialize model with the pretrained model

    ```python
    import torch.nn as nn
    import torchvision.models as models
    from mmcv.cnn import initialize

    # initialize model with pretrained model
    model = models.resnet50()
    # model.conv1.weight
    # Parameter containing:
    # tensor([[[[-6.7435e-03, -2.3531e-02, -9.0143e-03,  ..., -2.1245e-03,
    #            -1.8077e-03,  3.0338e-03],
    #           [-1.2603e-02, -2.7831e-02,  2.3187e-02,  ..., -1.5793e-02,
    #             1.1655e-02,  4.5889e-03],
    #           [-3.7916e-02,  1.2014e-02,  1.3815e-02,  ..., -4.2651e-03,
    #             1.7314e-02, -9.9998e-03],
    #           ...,

    init_cfg = dict(type='Pretrained',
                    checkpoint='torchvision://resnet50')
    initialize(model, init_cfg)
    # model.conv1.weight
    # Parameter containing:
    # tensor([[[[ 1.3335e-02,  1.4664e-02, -1.5351e-02,  ..., -4.0896e-02,
    #            -4.3034e-02, -7.0755e-02],
    #           [ 4.1205e-03,  5.8477e-03,  1.4948e-02,  ...,  2.2060e-03,
    #            -2.0912e-02, -3.8517e-02],
    #           [ 2.2331e-02,  2.3595e-02,  1.6120e-02,  ...,  1.0281e-01,
    #             6.2641e-02,  5.1977e-02],
    #           ...,

    # initialize weights of a sub-module with the specific part of a pretrained model by using 'prefix'
    model = models.resnet50()
    url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\
          'retinanet_r50_fpn_1x_coco/'\
          'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth'
    init_cfg = dict(type='Pretrained',
                    checkpoint=url, prefix='backbone.')
    initialize(model, init_cfg)
    ```

4. Initialize model inherited from BaseModule, Sequential, ModuleList, ModuleDict

    `BaseModule` is inherited from `torch.nn.Module`, and the only different between them is that `BaseModule` implements `init_weight`.

    `Sequential` is inherited from `BaseModule` and `torch.nn.Sequential`.

    `ModuleList` is inherited from `BaseModule` and `torch.nn.ModuleList`.

    `ModuleDict` is inherited from `BaseModule` and `torch.nn.ModuleDict`.

    `````python
    import torch.nn as nn
    from mmcv.runner import BaseModule, Sequential, ModuleList, ModuleDict

    class FooConv1d(BaseModule):

        def __init__(self, init_cfg=None):
            super().__init__(init_cfg)
            self.conv1d = nn.Conv1d(4, 1, 4)

        def forward(self, x):
            return self.conv1d(x)

    class FooConv2d(BaseModule):

        def __init__(self, init_cfg=None):
            super().__init__(init_cfg)
            self.conv2d = nn.Conv2d(3, 1, 3)

        def forward(self, x):
            return self.conv2d(x)

    # BaseModule
    init_cfg = dict(type='Constant', layer='Conv1d', val=0., bias=1.)
    model = FooConv1d(init_cfg)
    model.init_weights()
    # model.conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #        [0., 0., 0., 0.],
    #        [0., 0., 0., 0.],
    #        [0., 0., 0., 0.]]], requires_grad=True)

    # Sequential
    init_cfg1 = dict(type='Constant', layer='Conv1d', val=0., bias=1.)
    init_cfg2 = dict(type='Constant', layer='Conv2d', val=2., bias=3.)
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    seq_model = Sequential(model1, model2)
    seq_model.init_weights()
    # seq_model[0].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # seq_model[1].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)

    # inner init_cfg has higher priority
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)
    seq_model = Sequential(model1, model2, init_cfg=init_cfg)
    seq_model.init_weights()
    # seq_model[0].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # seq_model[1].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)

    # ModuleList
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    modellist = ModuleList([model1, model2])
    modellist.init_weights()
    # modellist[0].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # modellist[1].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)

    # inner init_cfg has higher priority
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)
    modellist = ModuleList([model1, model2], init_cfg=init_cfg)
    modellist.init_weights()
    # modellist[0].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # modellist[1].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)

    # ModuleDict
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    modeldict = ModuleDict(dict(model1=model1, model2=model2))
    modeldict.init_weights()
    # modeldict['model1'].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # modeldict['model2'].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)

    # inner init_cfg has higher priority
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)
    modeldict = ModuleDict(dict(model1=model1, model2=model2), init_cfg=init_cfg)
    modeldict.init_weights()
    # modeldict['model1'].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # modeldict['model2'].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)
    `````

### Model Zoo

Besides torchvision pre-trained models, we also provide pre-trained models of following CNN:

- VGG Caffe
- ResNet Caffe
- ResNeXt
- ResNet with Group Normalization
- ResNet with Group Normalization and Weight Standardization
- HRNetV2
- Res2Net
- RegNet

#### Model URLs in JSON

The model zoo links in MMCV are managed by JSON files.
The json file consists of key-value pair of model name and its url or path.
An example json file could be like:

```json
{
    "model_a": "https://example.com/models/model_a_9e5bac.pth",
    "model_b": "pretrain/model_b_ab3ef2c.pth"
}
```

The default links of the pre-trained models hosted on OpenMMLab AWS could be found [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/model_zoo/open_mmlab.json).

You may override default links by putting `open-mmlab.json` under `MMCV_HOME`. If `MMCV_HOME` is not find in the environment, `~/.cache/mmcv` will be used by default. You may `export MMCV_HOME=/your/path` to use your own path.

The external json files will be merged into default one. If the same key presents in both external json and default json, the external one will be used.

#### Load Checkpoint

The following types are supported for `filename` argument of `mmcv.load_checkpoint()`.

- filepath: The filepath of the checkpoint.
- `http://xxx` and `https://xxx`: The link to download the checkpoint. The `SHA256` postfix should be contained in the filename.
- `torchvision://xxx`: The model links in `torchvision.models`.Please refer to [torchvision](https://pytorch.org/docs/stable/torchvision/models.html) for details.
- `open-mmlab://xxx`: The model links or filepath provided in default and additional json files.


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/config.md
================================================
## Config

`Config` class is used for manipulating config and config files. It supports
loading configs from multiple file formats including **python**, **json** and **yaml**.
It provides dict-like apis to get and set values.

Here is an example of the config file `test.py`.

```python
a = 1
b = dict(b1=[0, 1, 2], b2=None)
c = (1, 2)
d = 'string'
```

To load and use configs

```python
>>> cfg = Config.fromfile('test.py')
>>> print(cfg)
>>> dict(a=1,
...      b=dict(b1=[0, 1, 2], b2=None),
...      c=(1, 2),
...      d='string')
```

For all format configs, some predefined variables are supported. It will convert the variable in `{{ var }}` with its real value.

Currently, it supports four predefined variables:

`{{ fileDirname }}` - the current opened file's dirname, e.g. /home/your-username/your-project/folder

`{{ fileBasename }}` - the current opened file's basename, e.g. file.ext

`{{ fileBasenameNoExtension }}` - the current opened file's basename with no file extension, e.g. file

`{{ fileExtname }}` - the current opened file's extension, e.g. .ext

These variable names are referred from [VS Code](https://code.visualstudio.com/docs/editor/variables-reference).

Here is one examples of config with predefined variables.

`config_a.py`

```python
a = 1
b = './work_dir/{{ fileBasenameNoExtension }}'
c = '{{ fileExtname }}'
```

```python
>>> cfg = Config.fromfile('./config_a.py')
>>> print(cfg)
>>> dict(a=1,
...      b='./work_dir/config_a',
...      c='.py')
```

For all format configs, inheritance is supported. To reuse fields in other config files,
specify `_base_='./config_a.py'` or a list of configs `_base_=['./config_a.py', './config_b.py']`.
Here are 4 examples of config inheritance.

`config_a.py`

```python
a = 1
b = dict(b1=[0, 1, 2], b2=None)
```

### Inherit from base config without overlapped keys

`config_b.py`

```python
_base_ = './config_a.py'
c = (1, 2)
d = 'string'
```

```python
>>> cfg = Config.fromfile('./config_b.py')
>>> print(cfg)
>>> dict(a=1,
...      b=dict(b1=[0, 1, 2], b2=None),
...      c=(1, 2),
...      d='string')
```

New fields in `config_b.py` are combined with old fields in `config_a.py`

### Inherit from base config with overlapped keys

`config_c.py`

```python
_base_ = './config_a.py'
b = dict(b2=1)
c = (1, 2)
```

```python
>>> cfg = Config.fromfile('./config_c.py')
>>> print(cfg)
>>> dict(a=1,
...      b=dict(b1=[0, 1, 2], b2=1),
...      c=(1, 2))
```

`b.b2=None` in `config_a` is replaced with `b.b2=1` in `config_c.py`.

### Inherit from base config with ignored fields

`config_d.py`

```python
_base_ = './config_a.py'
b = dict(_delete_=True, b2=None, b3=0.1)
c = (1, 2)
```

```python
>>> cfg = Config.fromfile('./config_d.py')
>>> print(cfg)
>>> dict(a=1,
...      b=dict(b2=None, b3=0.1),
...      c=(1, 2))
```

You may also set `_delete_=True` to ignore some fields in base configs. All old keys `b1, b2, b3` in `b` are replaced with new keys `b2, b3`.

### Inherit from multiple base configs (the base configs should not contain the same keys)

`config_e.py`

```python
c = (1, 2)
d = 'string'
```

`config_f.py`

```python
_base_ = ['./config_a.py', './config_e.py']
```

```python
>>> cfg = Config.fromfile('./config_f.py')
>>> print(cfg)
>>> dict(a=1,
...      b=dict(b1=[0, 1, 2], b2=None),
...      c=(1, 2),
...      d='string')
```

### Reference variables from base

You can reference variables defined in base using the following grammar.

`base.py`

```python
item1 = 'a'
item2 = dict(item3 = 'b')
```

`config_g.py`

```python
_base_ = ['./base.py']
item = dict(a = {{ _base_.item1 }}, b = {{ _base_.item2.item3 }})
```

```python
>>> cfg = Config.fromfile('./config_g.py')
>>> print(cfg.pretty_text)
item1 = 'a'
item2 = dict(item3='b')
item = dict(a='a', b='b')
```

### Add deprecation information in configs

Deprecation information can be added in a config file, which will trigger a `UserWarning` when this config file is loaded.

`deprecated_cfg.py`

```python
_base_ = 'expected_cfg.py'

_deprecation_ = dict(
    expected = 'expected_cfg.py',  # optional to show expected config path in the warning information
    reference = 'url to related PR'  # optional to show reference link in the warning information
)
```

```python
>>> cfg = Config.fromfile('./deprecated_cfg.py')

UserWarning: The config file deprecated.py will be deprecated in the future. Please use expected_cfg.py instead. More information can be found at https://github.com/open-mmlab/mmcv/pull/1275
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/data_process.md
================================================
## Data Process

### Image

This module provides some image processing methods, which requires `opencv` to be installed.

#### Read/Write/Show

To read or write images files, use `imread` or `imwrite`.

```python
import mmcv

img = mmcv.imread('test.jpg')
img = mmcv.imread('test.jpg', flag='grayscale')
img_ = mmcv.imread(img)  # nothing will happen, img_ = img
mmcv.imwrite(img, 'out.jpg')
```

To read images from bytes

```python
with open('test.jpg', 'rb') as f:
    data = f.read()
img = mmcv.imfrombytes(data)
```

To show an image file or a loaded image

```python
mmcv.imshow('tests/data/color.jpg')
# this is equivalent to

for i in range(10):
    img = np.random.randint(256, size=(100, 100, 3), dtype=np.uint8)
    mmcv.imshow(img, win_name='test image', wait_time=200)
```

#### Color space conversion

Supported conversion methods:

- bgr2gray
- gray2bgr
- bgr2rgb
- rgb2bgr
- bgr2hsv
- hsv2bgr

```python
img = mmcv.imread('tests/data/color.jpg')
img1 = mmcv.bgr2rgb(img)
img2 = mmcv.rgb2gray(img1)
img3 = mmcv.bgr2hsv(img)
```

#### Resize

There are three resize methods. All `imresize_*` methods have an argument `return_scale`,
if this argument is `False`, then the return value is merely the resized image, otherwise
is a tuple `(resized_img, scale)`.

```python
# resize to a given size
mmcv.imresize(img, (1000, 600), return_scale=True)

# resize to the same size of another image
mmcv.imresize_like(img, dst_img, return_scale=False)

# resize by a ratio
mmcv.imrescale(img, 0.5)

# resize so that the max edge no longer than 1000, short edge no longer than 800
# without changing the aspect ratio
mmcv.imrescale(img, (1000, 800))
```

#### Rotate

To rotate an image by some angle, use `imrotate`. The center can be specified,
which is the center of original image by default. There are two modes of rotating,
one is to keep the image size unchanged so that some parts of the image will be
cropped after rotating, the other is to extend the image size to fit the rotated
image.

```python
img = mmcv.imread('tests/data/color.jpg')

# rotate the image clockwise by 30 degrees.
img_ = mmcv.imrotate(img, 30)

# rotate the image counterclockwise by 90 degrees.
img_ = mmcv.imrotate(img, -90)

# rotate the image clockwise by 30 degrees, and rescale it by 1.5x at the same time.
img_ = mmcv.imrotate(img, 30, scale=1.5)

# rotate the image clockwise by 30 degrees, with (100, 100) as the center.
img_ = mmcv.imrotate(img, 30, center=(100, 100))

# rotate the image clockwise by 30 degrees, and extend the image size.
img_ = mmcv.imrotate(img, 30, auto_bound=True)
```

#### Flip

To flip an image, use `imflip`.

```python
img = mmcv.imread('tests/data/color.jpg')

# flip the image horizontally
mmcv.imflip(img)

# flip the image vertically
mmcv.imflip(img, direction='vertical')
```

#### Crop

`imcrop` can crop the image with one or some regions, represented as (x1, y1, x2, y2).

```python
import mmcv
import numpy as np

img = mmcv.imread('tests/data/color.jpg')

# crop the region (10, 10, 100, 120)
bboxes = np.array([10, 10, 100, 120])
patch = mmcv.imcrop(img, bboxes)

# crop two regions (10, 10, 100, 120) and (0, 0, 50, 50)
bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]])
patches = mmcv.imcrop(img, bboxes)

# crop two regions, and rescale the patches by 1.2x
patches = mmcv.imcrop(img, bboxes, scale_ratio=1.2)
```

#### Padding

There are two methods `impad` and `impad_to_multiple` to pad an image to the
specific size with given values.

```python
img = mmcv.imread('tests/data/color.jpg')

# pad the image to (1000, 1200) with all zeros
img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0)

# pad the image to (1000, 1200) with different values for three channels.
img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=[100, 50, 200])

# pad the image on left, right, top, bottom borders with all zeros
img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0)

# pad the image on left, right, top, bottom borders with different values
# for three channels.
img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=[100, 50, 200])

# pad an image so that each edge is a multiple of some value.
img_ = mmcv.impad_to_multiple(img, 32)
```

### Video

This module provides the following functionalities.

- A `VideoReader` class with friendly apis to read and convert videos.
- Some methods for editing (cut, concat, resize) videos.
- Optical flow read/write/warp.

#### VideoReader

The `VideoReader` class provides sequence like apis to access video frames.
It will internally cache the frames which have been visited.

```python
video = mmcv.VideoReader('test.mp4')

# obtain basic information
print(len(video))
print(video.width, video.height, video.resolution, video.fps)

# iterate over all frames
for frame in video:
    print(frame.shape)

# read the next frame
img = video.read()

# read a frame by index
img = video[100]

# read some frames
img = video[5:10]
```

To convert a video to images or generate a video from a image directory.

```python
# split a video into frames and save to a folder
video = mmcv.VideoReader('test.mp4')
video.cvt2frames('out_dir')

# generate video from frames
mmcv.frames2video('out_dir', 'test.avi')
```

#### Editing utils

There are also some methods for editing videos, which wraps the commands of ffmpeg.

```python
# cut a video clip
mmcv.cut_video('test.mp4', 'clip1.mp4', start=3, end=10, vcodec='h264')

# join a list of video clips
mmcv.concat_video(['clip1.mp4', 'clip2.mp4'], 'joined.mp4', log_level='quiet')

# resize a video with the specified size
mmcv.resize_video('test.mp4', 'resized1.mp4', (360, 240))

# resize a video with a scaling ratio of 2
mmcv.resize_video('test.mp4', 'resized2.mp4', ratio=2)
```

#### Optical flow

`mmcv` provides the following methods to operate on optical flows.

- IO
- Visualization
- Flow warpping

We provide two options to dump optical flow files: uncompressed and compressed.
The uncompressed way just dumps the floating numbers to a binary file. It is
lossless but the dumped file has a larger size.
The compressed way quantizes the optical flow to 0-255 and dumps it as a
jpeg image. The flow of x-dim and y-dim will be concatenated into a single image.

1. IO

```python
flow = np.random.rand(800, 600, 2).astype(np.float32)
# dump the flow to a flo file (~3.7M)
mmcv.flowwrite(flow, 'uncompressed.flo')
# dump the flow to a jpeg file (~230K)
# the shape of the dumped image is (800, 1200)
mmcv.flowwrite(flow, 'compressed.jpg', quantize=True, concat_axis=1)

# read the flow file, the shape of loaded flow is (800, 600, 2) for both ways
flow = mmcv.flowread('uncompressed.flo')
flow = mmcv.flowread('compressed.jpg', quantize=True, concat_axis=1)
```

2. Visualization

It is possible to visualize optical flows with `mmcv.flowshow()`.

```python
mmcv.flowshow(flow)
```

![progress](../_static/flow_visualization.png)

3. Flow warpping

```python
img1 = mmcv.imread('img1.jpg')
flow = mmcv.flowread('flow.flo')
warpped_img2 = mmcv.flow_warp(img1, flow)
```

img1 (left) and img2 (right)

![raw images](../_static/flow_raw_images.png)

optical flow (img2 -> img1)

![optical flow](../_static/flow_img2toimg1.png)

warpped image and difference with ground truth

![warpped image](../_static/flow_warp_diff.png)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/io.md
================================================
## File IO

This module provides two universal API to load and dump files of different formats.

```{note}
Since v1.3.16, the IO modules support loading (dumping) data from (to) different backends, respectively. More details are in PR [#1330](https://github.com/open-mmlab/mmcv/pull/1330).
```

### Load and dump data

`mmcv` provides a universal api for loading and dumping data, currently
supported formats are json, yaml and pickle.

#### Load from disk or dump to disk

```python
import mmcv

# load data from a file
data = mmcv.load('test.json')
data = mmcv.load('test.yaml')
data = mmcv.load('test.pkl')
# load data from a file-like object
with open('test.json', 'r') as f:
    data = mmcv.load(f, file_format='json')

# dump data to a string
json_str = mmcv.dump(data, file_format='json')

# dump data to a file with a filename (infer format from file extension)
mmcv.dump(data, 'out.pkl')

# dump data to a file with a file-like object
with open('test.yaml', 'w') as f:
    data = mmcv.dump(data, f, file_format='yaml')
```

#### Load from other backends or dump to other backends

```python
import mmcv

# load data from a file
data = mmcv.load('s3://bucket-name/test.json')
data = mmcv.load('s3://bucket-name/test.yaml')
data = mmcv.load('s3://bucket-name/test.pkl')

# dump data to a file with a filename (infer format from file extension)
mmcv.dump(data, 's3://bucket-name/out.pkl')
```

It is also very convenient to extend the api to support more file formats.
All you need to do is to write a file handler inherited from `BaseFileHandler`
and register it with one or several file formats.

You need to implement at least 3 methods.

```python
import mmcv

# To register multiple file formats, a list can be used as the argument.
# @mmcv.register_handler(['txt', 'log'])
@mmcv.register_handler('txt')
class TxtHandler1(mmcv.BaseFileHandler):

    def load_from_fileobj(self, file):
        return file.read()

    def dump_to_fileobj(self, obj, file):
        file.write(str(obj))

    def dump_to_str(self, obj, **kwargs):
        return str(obj)
```

Here is an example of `PickleHandler`.

```python
import pickle

class PickleHandler(mmcv.BaseFileHandler):

    def load_from_fileobj(self, file, **kwargs):
        return pickle.load(file, **kwargs)

    def load_from_path(self, filepath, **kwargs):
        return super(PickleHandler, self).load_from_path(
            filepath, mode='rb', **kwargs)

    def dump_to_str(self, obj, **kwargs):
        kwargs.setdefault('protocol', 2)
        return pickle.dumps(obj, **kwargs)

    def dump_to_fileobj(self, obj, file, **kwargs):
        kwargs.setdefault('protocol', 2)
        pickle.dump(obj, file, **kwargs)

    def dump_to_path(self, obj, filepath, **kwargs):
        super(PickleHandler, self).dump_to_path(
            obj, filepath, mode='wb', **kwargs)
```

### Load a text file as a list or dict

For example `a.txt` is a text file with 5 lines.

```
a
b
c
d
e
```

#### Load from disk

Use `list_from_file` to load the list from a.txt.

```python
>>> mmcv.list_from_file('a.txt')
['a', 'b', 'c', 'd', 'e']
>>> mmcv.list_from_file('a.txt', offset=2)
['c', 'd', 'e']
>>> mmcv.list_from_file('a.txt', max_num=2)
['a', 'b']
>>> mmcv.list_from_file('a.txt', prefix='/mnt/')
['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e']
```

For example `b.txt` is a text file with 3 lines.

```
1 cat
2 dog cow
3 panda
```

Then use `dict_from_file` to load the dict from `b.txt`.

```python
>>> mmcv.dict_from_file('b.txt')
{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}
>>> mmcv.dict_from_file('b.txt', key_type=int)
{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'}
```

#### Load from other backends

Use `list_from_file` to load the list from `s3://bucket-name/a.txt`.

```python
>>> mmcv.list_from_file('s3://bucket-name/a.txt')
['a', 'b', 'c', 'd', 'e']
>>> mmcv.list_from_file('s3://bucket-name/a.txt', offset=2)
['c', 'd', 'e']
>>> mmcv.list_from_file('s3://bucket-name/a.txt', max_num=2)
['a', 'b']
>>> mmcv.list_from_file('s3://bucket-name/a.txt', prefix='/mnt/')
['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e']
```

Use `dict_from_file` to load the dict from `s3://bucket-name/b.txt`.

```python
>>> mmcv.dict_from_file('s3://bucket-name/b.txt')
{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}
>>> mmcv.dict_from_file('s3://bucket-name/b.txt', key_type=int)
{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'}
```

### Load and dump checkpoints

#### Load checkpoints from disk or save to disk

We can read the checkpoints from disk or save to disk in the following way.

```python
import torch

filepath1 = '/path/of/your/checkpoint1.pth'
filepath2 = '/path/of/your/checkpoint2.pth'
# read from filepath1
checkpoint = torch.load(filepath1)
# save to filepath2
torch.save(checkpoint, filepath2)
```

MMCV provides many backends. `HardDiskBackend` is one of them and we can use it to read or save checkpoints.

```python
import io
from mmcv.fileio.file_client import HardDiskBackend

disk_backend = HardDiskBackend()
with io.BytesIO(disk_backend.get(filepath1)) as buffer:
    checkpoint = torch.load(buffer)
with io.BytesIO() as buffer:
    torch.save(checkpoint, f)
    disk_backend.put(f.getvalue(), filepath2)
```

If we want to implement an interface which automatically select the corresponding
backend based on the file path, we can use the `FileClient`.
For example, we want to implement two methods for reading checkpoints as well as saving checkpoints,
which need to support different types of file paths, either disk paths, network paths or other paths.

```python
from mmcv.fileio.file_client import FileClient

def load_checkpoint(path):
    file_client = FileClient.infer(uri=path)
    with io.BytesIO(file_client.get(path)) as buffer:
        checkpoint = torch.load(buffer)
    return checkpoint

def save_checkpoint(checkpoint, path):
    with io.BytesIO() as buffer:
        torch.save(checkpoint, buffer)
        file_client.put(buffer.getvalue(), path)

file_client = FileClient.infer_client(uri=filepath1)
checkpoint = load_checkpoint(filepath1)
save_checkpoint(checkpoint, filepath2)
```

#### Load checkpoints from the Internet

```{note}
Currently, it only supports reading checkpoints from the Internet, and does not support saving checkpoints to the Internet.
```

```python
import io
import torch
from mmcv.fileio.file_client import HTTPBackend, FileClient

filepath = 'http://path/of/your/checkpoint.pth'
checkpoint = torch.utils.model_zoo.load_url(filepath)

http_backend = HTTPBackend()
with io.BytesIO(http_backend.get(filepath)) as buffer:
    checkpoint = torch.load(buffer)

file_client = FileClient.infer_client(uri=filepath)
with io.BytesIO(file_client.get(filepath)) as buffer:
    checkpoint = torch.load(buffer)
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/ops.md
================================================
## CUDA ops

We implement common CUDA ops used in detection, segmentation, etc.

- ActiveRotatedFilter
- AssignScoreWithK
- BallQuery
- BBoxOverlaps
- CARAFE
- CrissCrossAttention
- ContextBlock
- ConvexIoU
- CornerPool
- Deformable Convolution v1/v2
- Deformable RoIPool
- DynamicScatter
- GatherPoints
- FurthestPointSample
- FurthestPointSampleWithDist
- GeneralizedAttention
- GroupPoints
- KNN
- MaskedConv
- MinAreaPolygon
- NMS
- PointsInPolygons
- PSAMask
- RiRoIAlignRotated
- RotatedFeatureAlign
- RoIPointPool3d
- RoIPool
- RoIAlign
- RoIAwarePool3d
- SimpleRoIAlign
- SigmoidFocalLoss
- SoftmaxFocalLoss
- SoftNMS
- Synchronized BatchNorm
- Voxelization
- ThreeInterpolate
- ThreeNN
- Weight standardization
- Correlation


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/registry.md
================================================
## Registry

MMCV implements [registry](https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/registry.py) to manage different modules that share similar functionalities, e.g., backbones, head, and necks, in detectors.
Most projects in OpenMMLab use registry to manage modules of datasets and models, such as [MMDetection](https://github.com/open-mmlab/mmdetection), [MMDetection3D](https://github.com/open-mmlab/mmdetection3d), [MMClassification](https://github.com/open-mmlab/mmclassification), [MMEditing](https://github.com/open-mmlab/mmediting), etc.

### What is registry

In MMCV, registry can be regarded as a mapping that maps a class to a string.
These classes contained by a single registry usually have similar APIs but implement different algorithms or support different datasets.
With the registry, users can find and instantiate the class through its corresponding string, and use the instantiated module as they want.
One typical example is the config systems in most OpenMMLab projects, which use the registry to create hooks, runners, models, and datasets, through configs.
The API reference could be found [here](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.Registry).

To manage your modules in the codebase by `Registry`, there are three steps as below.

1. Create a build method (optional, in most cases you can just use the default one).
2. Create a registry.
3. Use this registry to manage the modules.

`build_func` argument of `Registry` is to customize how to instantiate the class instance, the default one is `build_from_cfg` implemented [here](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.build_from_cfg).

### A Simple Example

Here we show a simple example of using registry to manage modules in a package.
You can find more practical examples in OpenMMLab projects.

Assuming we want to implement a series of Dataset Converter for converting different formats of data to the expected data format.
We create a directory as a package named `converters`.
In the package, we first create a file to implement builders, named `converters/builder.py`, as below

```python
from mmcv.utils import Registry
# create a registry for converters
CONVERTERS = Registry('converter')
```

Then we can implement different converters in the package. For example, implement `Converter1` in `converters/converter1.py`

```python

from .builder import CONVERTERS

# use the registry to manage the module
@CONVERTERS.register_module()
class Converter1(object):
    def __init__(self, a, b):
        self.a = a
        self.b = b
```
The key step to use registry for managing the modules is to register the implemented module into the registry `CONVERTERS` through
`@CONVERTERS.register_module()` when you are creating the module. By this way, a mapping between a string and the class is built and maintained by `CONVERTERS` as below

```python
'Converter1' -> <class 'Converter1'>
```
```{note}
The registry mechanism will be triggered only when the file where the module is located is imported.
So you need to import that file somewhere. More details can be found at https://github.com/open-mmlab/mmdetection/issues/5974.
```

If the module is successfully registered, you can use this converter through configs as

```python
converter_cfg = dict(type='Converter1', a=a_value, b=b_value)
converter = CONVERTERS.build(converter_cfg)
```

### Customize Build Function

Suppose we would like to customize how `converters` are built, we could implement a customized `build_func` and pass it into the registry.

```python
from mmcv.utils import Registry

# create a build function
def build_converter(cfg, registry, *args, **kwargs):
    cfg_ = cfg.copy()
    converter_type = cfg_.pop('type')
    if converter_type not in registry:
        raise KeyError(f'Unrecognized converter type {converter_type}')
    else:
        converter_cls = registry.get(converter_type)

    converter = converter_cls(*args, **kwargs, **cfg_)
    return converter

# create a registry for converters and pass ``build_converter`` function
CONVERTERS = Registry('converter', build_func=build_converter)
```

```{note}
In this example, we demonstrate how to use the `build_func` argument to customize the way to build a class instance.
The functionality is similar to the default `build_from_cfg`. In most cases, default one would be sufficient.
`build_model_from_cfg` is also implemented to build PyTorch module in `nn.Sequentail`, you may directly use them instead of implementing by yourself.
```

### Hierarchy Registry

You could also build modules from more than one OpenMMLab frameworks, e.g. you could use all backbones in [MMClassification](https://github.com/open-mmlab/mmclassification) for object detectors in [MMDetection](https://github.com/open-mmlab/mmdetection), you may also combine an object detection model in [MMDetection](https://github.com/open-mmlab/mmdetection) and semantic segmentation model in [MMSegmentation](https://github.com/open-mmlab/mmsegmentation).

All `MODELS` registries of downstream codebases are children registries of MMCV's `MODELS` registry.
Basically, there are two ways to build a module from child or sibling registries.

1. Build from children registries.

   For example:

   In MMDetection we define:

   ```python
   from mmcv.utils import Registry
   from mmcv.cnn import MODELS as MMCV_MODELS
   MODELS = Registry('model', parent=MMCV_MODELS)

   @MODELS.register_module()
   class NetA(nn.Module):
       def forward(self, x):
           return x
   ```

   In MMClassification we define:

   ```python
   from mmcv.utils import Registry
   from mmcv.cnn import MODELS as MMCV_MODELS
   MODELS = Registry('model', parent=MMCV_MODELS)

   @MODELS.register_module()
   class NetB(nn.Module):
       def forward(self, x):
           return x + 1
   ```

   We could build two net in either MMDetection or MMClassification by:

   ```python
   from mmdet.models import MODELS
   net_a = MODELS.build(cfg=dict(type='NetA'))
   net_b = MODELS.build(cfg=dict(type='mmcls.NetB'))
   ```

   or

   ```python
   from mmcls.models import MODELS
   net_a = MODELS.build(cfg=dict(type='mmdet.NetA'))
   net_b = MODELS.build(cfg=dict(type='NetB'))
   ```

2. Build from parent registry.

   The shared `MODELS` registry in MMCV is the parent registry for all downstream codebases (root registry):

   ```python
   from mmcv.cnn import MODELS as MMCV_MODELS
   net_a = MMCV_MODELS.build(cfg=dict(type='mmdet.NetA'))
   net_b = MMCV_MODELS.build(cfg=dict(type='mmcls.NetB'))
   ```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/runner.md
================================================
## Runner

The runner class is designed to manage the training. It eases the training process with less code demanded from users while staying flexible and configurable. The main features are as listed:

- Support `EpochBasedRunner` and `IterBasedRunner` for different scenarios. Implementing customized runners is also allowed to meet customized needs.
- Support customized workflow to allow switching between different modes while training. Currently, supported modes are train and val.
- Enable extensibility through various hooks, including hooks defined in MMCV and customized ones.

### EpochBasedRunner

As its name indicates, workflow in `EpochBasedRunner` should be set based on epochs. For example, [('train', 2), ('val', 1)] means running 2 epochs for training and 1 epoch for validation, iteratively. And each epoch may contain multiple iterations. Currently, MMDetection uses `EpochBasedRunner` by default.

Let's take a look at its core logic:

```python
# the condition to stop training
while curr_epoch < max_epochs:
    # traverse the workflow.
    # e.g. workflow = [('train', 2), ('val', 1)]
    for i, flow in enumerate(workflow):
        # mode(e.g. train) determines which function to run
        mode, epochs = flow
        # epoch_runner will be either self.train() or self.val()
        epoch_runner = getattr(self, mode)
        # execute the corresponding function
        for _ in range(epochs):
            epoch_runner(data_loaders[i], **kwargs)
```

Currently, we support 2 modes: train and val. Let's take a train function for example and have a look at its core logic:

```python
# Currently, epoch_runner could be either train or val
def train(self, data_loader, **kwargs):
    # traverse the dataset and get batch data for 1 epoch
    for i, data_batch in enumerate(data_loader):
        # it will execute all before_train_iter function in the hooks registered. You may want to watch out for the order.
        self.call_hook('before_train_iter')
        # set train_mode as False in val function
        self.run_iter(data_batch, train_mode=True, **kwargs)
        self.call_hook('after_train_iter')
   self.call_hook('after_train_epoch')
```

### IterBasedRunner

Different from `EpochBasedRunner`, workflow in `IterBasedRunner` should be set based on iterations. For example, [('train', 2), ('val', 1)] means running 2 iters for training and 1 iter for validation, iteratively. Currently, MMSegmentation uses `IterBasedRunner` by default.

Let's take a look at its core logic:

```python
# Although we set workflow by iters here, we might also need info on the epochs in some using cases. That can be provided by IterLoader.
iter_loaders = [IterLoader(x) for x in data_loaders]
# the condition to stop training
while curr_iter < max_iters:
    # traverse the workflow.
    # e.g. workflow = [('train', 2), ('val', 1)]
    for i, flow in enumerate(workflow):
        # mode(e.g. train) determines which function to run
        mode, iters = flow
        # iter_runner will be either self.train() or self.val()
        iter_runner = getattr(self, mode)
        # execute the corresponding function
        for _ in range(iters):
            iter_runner(iter_loaders[i], **kwargs)
```

Currently, we support 2 modes: train and val. Let's take a val function for example and have a look at its core logic:

```python
# Currently, iter_runner could be either train or val
def val(self, data_loader, **kwargs):
    # get batch data for 1 iter
    data_batch = next(data_loader)
    # it will execute all before_val_iter function in the hooks registered. You may want to watch out for the order.
    self.call_hook('before_val_iter')
    outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
    self.outputs = outputs
    self.call_hook('after_val_iter')
```

Other than the basic functionalities explained above, `EpochBasedRunner` and `IterBasedRunner` provide methods such as `resume`, `save_checkpoint` and `register_hook`. In case you are not familiar with the term Hook mentioned earlier, we will also provide a tutorial about it.(coming soon...) Essentially, a hook is functionality to alter or augment the code behaviors through predefined api. It allows users to have their own code called under certain circumstances. It makes code extensible in a non-intrusive manner.

### A Simple Example

We will walk you through the usage of runner with a classification task. The following code only contains essential steps for demonstration purposes. The following steps are necessary for any training tasks.

**(1) Initialize dataloader, model, optimizer, etc.**

```python
# initialize model
model=...
# initialize optimizer, typically, we set: cfg.optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
optimizer = build_optimizer(model, cfg.optimizer)
# initialize the dataloader corresponding to the workflow(train/val)
data_loaders = [
        build_dataloader(
            ds,
            cfg.data.samples_per_gpu,
            cfg.data.workers_per_gpu,
            ...) for ds in dataset
    ]
```

**(2) Initialize runner**

```python
runner = build_runner(
    # cfg.runner is typically set as:
    # runner = dict(type='EpochBasedRunner', max_epochs=200)
    cfg.runner,
    default_args=dict(
        model=model,
        batch_processor=None,
        optimizer=optimizer,
        logger=logger))
```

**(3) Register training hooks and customized hooks.**

```python
# register default hooks necessary for training
runner.register_training_hooks(
    # configs of learning rate, it is typically set as:
    # lr_config = dict(policy='step', step=[100, 150])
    cfg.lr_config,
    # configuration of optimizer, e.g. grad_clip
    optimizer_config,
    # configuration of saving checkpoints, it is typically set as:
    # checkpoint_config = dict(interval=1), saving checkpoints every epochs
    cfg.checkpoint_config,
    # configuration of logs
    cfg.log_config,
    ...)

# register customized hooks
# say we want to enable ema, then we could set custom_hooks=[dict(type='EMAHook')]
if cfg.get('custom_hooks', None):
    custom_hooks = cfg.custom_hooks
    for hook_cfg in cfg.custom_hooks:
        hook_cfg = hook_cfg.copy()
        priority = hook_cfg.pop('priority', 'NORMAL')
        hook = build_from_cfg(hook_cfg, HOOKS)
        runner.register_hook(hook, priority=priority)
```

Then, we can use `resume` or `load_checkpoint` to load existing weights.

**(4) Start training**

```python
# workflow is typically set as: workflow = [('train', 1)]
# here the training begins.
runner.run(data_loaders, cfg.workflow)
```

Let's take `EpochBasedRunner` for example and go a little bit into details about setting workflow:

- Say we only want to put train in the workflow, then we can set: workflow = [('train', 1)]. The runner will only execute train iteratively in this case.
- Say we want to put both train and val in the workflow, then we can set: workflow = [('train', 3), ('val',1)]. The runner will first execute train for 3 epochs and then switch to val mode and execute val for 1 epoch. The workflow will be repeated until the current epoch hit the max_epochs.
- Workflow is highly flexible. Therefore, you can set workflow = [('val', 1), ('train',1)] if you would like the runner to validate first and train after.

The code we demonstrated above is already in `train.py` in MM repositories. Simply modify the corresponding keys in the configuration files and the script will execute the expected workflow automatically.


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/utils.md
================================================
## Utils

### ProgressBar

If you want to apply a method to a list of items and track the progress, `track_progress`
is a good choice. It will display a progress bar to tell the progress and ETA.

```python
import mmcv

def func(item):
    # do something
    pass

tasks = [item_1, item_2, ..., item_n]

mmcv.track_progress(func, tasks)
```

The output is like the following.

![progress](../_static/progress.*)

There is another method `track_parallel_progress`, which wraps multiprocessing and
progress visualization.

```python
mmcv.track_parallel_progress(func, tasks, 8)  # 8 workers
```

![progress](../_static/parallel_progress.*)

If you want to iterate or enumerate a list of items and track the progress, `track_iter_progress`
is a good choice. It will display a progress bar to tell the progress and ETA.

```python
import mmcv

tasks = [item_1, item_2, ..., item_n]

for task in mmcv.track_iter_progress(tasks):
    # do something like print
    print(task)

for i, task in enumerate(mmcv.track_iter_progress(tasks)):
    # do something like print
    print(i)
    print(task)
```

### Timer

It is convenient to compute the runtime of a code block with `Timer`.

```python
import time

with mmcv.Timer():
    # simulate some code block
    time.sleep(1)
```

or try with `since_start()` and `since_last_check()`. This former can
return the runtime since the timer starts and the latter will return the time
since the last time checked.

```python
timer = mmcv.Timer()
# code block 1 here
print(timer.since_start())
# code block 2 here
print(timer.since_last_check())
print(timer.since_start())
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/en/understand_mmcv/visualization.md
================================================
## Visualization

`mmcv` can show images and annotations (currently supported types include bounding boxes).

```python
# show an image file
mmcv.imshow('a.jpg')

# show a loaded image
img = np.random.rand(100, 100, 3)
mmcv.imshow(img)

# show image with bounding boxes
img = np.random.rand(100, 100, 3)
bboxes = np.array([[0, 0, 50, 50], [20, 20, 60, 60]])
mmcv.imshow_bboxes(img, bboxes)
```

`mmcv` can also visualize special images such as optical flows.

```python
flow = mmcv.flowread('test.flo')
mmcv.flowshow(flow)
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/_static/css/readthedocs.css
================================================
.header-logo {
    background-image: url("../image/mmcv-logo.png");
    background-size: 85px 40px;
    height: 40px;
    width: 85px;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/api.rst
================================================
fileio
-------
.. automodule:: mmcv.fileio
    :members:

image
------
.. automodule:: mmcv.image
    :members:

video
------
.. automodule:: mmcv.video
    :members:

arraymisc
---------
.. automodule:: mmcv.arraymisc
    :members:

visualization
--------------
.. automodule:: mmcv.visualization
    :members:

utils
-----
.. automodule:: mmcv.utils
    :members:

cnn
----
.. automodule:: mmcv.cnn
    :members:

runner
------
.. automodule:: mmcv.runner
    :members:

engine
------
.. automodule:: mmcv.engine
    :members:

ops
------
.. automodule:: mmcv.ops
    :members:


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/community/contributing.md
================================================
## 贡献代码

欢迎任何类型的贡献，包括但不限于

- 修改拼写错误或代码错误
- 添加文档或将文档翻译成其他语言
- 添加新功能和新组件

### 工作流
| 详细工作流见 [拉取请求](pr.md)
1. 复刻并拉取最新的 OpenMMLab 算法库
2. 创建新的分支（不建议使用主分支提拉取请求）
3. 提交你的修改
4. 创建拉取请求

```{note}
如果你计划添加新功能并且该功能包含比较大的改动，建议先开 issue 讨论
```
### 代码风格

#### Python

[PEP8](https://www.python.org/dev/peps/pep-0008/) 作为 OpenMMLab 算法库首选的代码规范，我们使用以下工具检查和格式化代码

- [flake8](http://flake8.pycqa.org/en/latest/): Python 官方发布的代码规范检查工具，是多个检查工具的封装
- [yapf](https://github.com/google/yapf): Google 发布的代码规范检查工具
- [isort](https://github.com/timothycrosley/isort): 自动调整模块导入顺序的工具
- [markdownlint](https://github.com/markdownlint/markdownlint): 检查 markdown 文件的工具
- [docformatter](https://github.com/myint/docformatter): 格式化 docstring 的工具

yapf 和 isort 的配置可以在 [setup.cfg](./setup.cfg) 找到

通过配置 [pre-commit hook](https://pre-commit.com/) ，我们可以在提交代码时自动检查和格式化 `flake8`、`yapf`、`isort`、`trailing whitespaces`、`markdown files`，
修复 `end-of-files`、`double-quoted-strings`、`python-encoding-pragma`、`mixed-line-ending`，调整 `requirments.txt` 的包顺序。
pre-commit 钩子的配置可以在 [.pre-commit-config](./.pre-commit-config.yaml) 找到。

在克隆算法库后，你需要安装并初始化 pre-commit 钩子

```shell
pip install -U pre-commit
```

切换算法库根目录

```shell
pre-commit install
```

如果安装 markdownlint 遇到了问题，可以尝试使用以下的步骤安装 ruby

```shell
# install rvm
curl -L https://get.rvm.io | bash -s -- --autolibs=read-fail
[[ -s "$HOME/.rvm/scripts/rvm" ]] && source "$HOME/.rvm/scripts/rvm"
rvm autolibs disable

# install ruby
rvm install 2.7.1
```

或者参考 [这个代码库](https://github.com/innerlee/setup) 和 [`zzruby.sh`](https://github.com/innerlee/setup/blob/master/zzruby.sh)。

至此，每一次 commit 修改都会触发 pre-commit 检查代码格式。

>提交拉取请求前，请确保你的代码符合 yapf 的格式

#### C++ and CUDA

C++ 和 CUDA 的代码规范遵从 [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/community/pr.md
================================================
## 拉取请求

### 什么是拉取请求？

`拉取请求` (Pull Request), [GitHub 官方文档](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests)定义如下。

```
拉取请求是一种通知机制。你修改了他人的代码，将你的修改通知原来作者，希望他合并你的修改。
```

### 基本的工作流：

1. 获取最新的代码库
2. 从主分支创建最新的分支进行开发
3. 提交修改
4. 推送你的修改并创建一个 `拉取请求`
5. 讨论、审核代码
6. 将开发分支合并到主分支

### 具体步骤

#### 1. 获取最新的代码库

+ 当你第一次提 PR 时

  复刻 OpenMMLab 原代码库，点击 GitHub 页面右上角的 **Fork** 按钮即可
    ![avatar](../../en/_static/community/1.png)

  克隆复刻的代码库到本地

  ```bash
  git clone git@github.com:XXX/mmcv.git
  ```

  添加原代码库为上游代码库

  ```bash
  git remote add upstream git@github.com:open-mmlab/mmcv
  ```

+ 从第二个 PR 起

  检出本地代码库的主分支，然后从最新的原代码库的主分支拉取更新

  ```bash
  git checkout master
  git pull upstream master
   ```

#### 2. 从主分支创建一个新的开发分支

```bash
git checkout -b branchname
```

```{tip}
为了保证提交历史清晰可读，我们强烈推荐您先检出主分支 (master)，再创建新的分支。
```
#### 3. 提交你的修改

```bash
# coding
git add [files]
git commit -m 'messages'
```

#### 4. 推送你的修改到复刻的代码库，并创建一个`拉取请求`

+ 推送当前分支到远端复刻的代码库

    ```bash
    git push origin branchname
    ```

+ 创建一个`拉取请求`
![avatar](../../en/_static/community/2.png)

+ 修改`拉取请求`信息模板，描述修改原因和修改内容。还可以在 PR 描述中，手动关联到相关的`议题` (issue),（更多细节，请参考[官方文档](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue)）。

#### 5. 讨论并评审你的代码

+ 创建`拉取请求`时，可以关联给相关人员进行评审
![avatar](../../en/_static/community/3.png)

+ 根据评审人员的意见修改代码，并推送修改

#### 6. `拉取请求`合并之后删除该分支

```bash
git branch -d branchname # delete local branch
git push origin --delete branchname # delete remote branch
```

### PR 规范

1. 使用 [pre-commit hook](https://pre-commit.com)，尽量减少代码风格相关问题

2. 一个 PR 对应一个短期分支

3. 粒度要细，一个PR只做一件事情，避免超大的PR

    + Bad：实现 Faster R-CNN
    + Acceptable：给 Faster R-CNN 添加一个 box head
    + Good：给 box head 增加一个参数来支持自定义的 conv 层数

4. 每次 Commit 时需要提供清晰且有意义 commit 信息

5. 提供清晰且有意义的`拉取请求`描述

    + 标题写明白任务名称，一般格式:[Prefix] Short description of the pull request (Suffix)
    + prefix: 新增功能 [Feature], 修 bug [Fix], 文档相关 [Docs], 开发中 [WIP] (暂时不会被review)
    + 描述里介绍`拉取请求`的主要修改内容，结果，以及对其他部分的影响, 参考`拉取请求`模板
    + 关联相关的`议题` (issue) 和其他`拉取请求`


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/compatibility.md
================================================
### v1.3.18

部分自定义算子对于不同的设备有不同实现，为此添加的大量宏命令与类型检查使得代码变得难以维护。例如：

```c++
  if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
    CHECK_CUDA_INPUT(input);
    CHECK_CUDA_INPUT(rois);
    CHECK_CUDA_INPUT(output);
    CHECK_CUDA_INPUT(argmax_y);
    CHECK_CUDA_INPUT(argmax_x);

    roi_align_forward_cuda(input, rois, output, argmax_y, argmax_x,
                           aligned_height, aligned_width, spatial_scale,
                           sampling_ratio, pool_mode, aligned);
#else
    AT_ERROR("RoIAlign is not compiled with GPU support");
#endif
  } else {
    CHECK_CPU_INPUT(input);
    CHECK_CPU_INPUT(rois);
    CHECK_CPU_INPUT(output);
    CHECK_CPU_INPUT(argmax_y);
    CHECK_CPU_INPUT(argmax_x);
    roi_align_forward_cpu(input, rois, output, argmax_y, argmax_x,
                          aligned_height, aligned_width, spatial_scale,
                          sampling_ratio, pool_mode, aligned);
  }
```

为此我们设计了注册与分发的机制以更好的管理这些算子实现。

```c++

void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
                                       Tensor argmax_y, Tensor argmax_x,
                                       int aligned_height, int aligned_width,
                                       float spatial_scale, int sampling_ratio,
                                       int pool_mode, bool aligned);

void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned) {
  ROIAlignForwardCUDAKernelLauncher(
      input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width,
      spatial_scale, sampling_ratio, pool_mode, aligned);
}

// 注册算子的cuda实现
void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned);
REGISTER_DEVICE_IMPL(roi_align_forward_impl, CUDA, roi_align_forward_cuda);

// roi_align.cpp
// 使用dispatcher根据参数中的Tensor device类型对实现进行分发
void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned) {
  DISPATCH_DEVICE_IMPL(roi_align_forward_impl, input, rois, output, argmax_y,
                       argmax_x, aligned_height, aligned_width, spatial_scale,
                       sampling_ratio, pool_mode, aligned);
}

```

### v1.3.11

为了灵活地支持更多的后端和硬件，例如 `NVIDIA GPUs` 、`AMD GPUs`，我们重构了 `mmcv/ops/csrc` 目录。注意，这次重构不会影响 API 的使用。更多相关信息，请参考 [PR1206](https://github.com/open-mmlab/mmcv/pull/1206)。

原始的目录结构如下所示

```
.
├── common_cuda_helper.hpp
├── ops_cuda_kernel.cuh
├── pytorch_cpp_helper.hpp
├── pytorch_cuda_helper.hpp
├── parrots_cpp_helper.hpp
├── parrots_cuda_helper.hpp
├── parrots_cudawarpfunction.cuh
├── onnxruntime
│   ├── onnxruntime_register.h
│   ├── onnxruntime_session_options_config_keys.h
│   ├── ort_mmcv_utils.h
│   ├── ...
│   ├── onnx_ops.h
│   └── cpu
│       ├── onnxruntime_register.cpp
│       ├── ...
│       └── onnx_ops_impl.cpp
├── parrots
│   ├── ...
│   ├── ops.cpp
│   ├── ops_cuda.cu
│   ├── ops_parrots.cpp
│   └── ops_pytorch.h
├── pytorch
│   ├── ...
│   ├── ops.cpp
│   ├── ops_cuda.cu
│   ├── pybind.cpp
└── tensorrt
    ├── trt_cuda_helper.cuh
    ├── trt_plugin_helper.hpp
    ├── trt_plugin.hpp
    ├── trt_serialize.hpp
    ├── ...
    ├── trt_ops.hpp
    └── plugins
        ├── trt_cuda_helper.cu
        ├── trt_plugin.cpp
        ├── ...
        ├── trt_ops.cpp
        └── trt_ops_kernel.cu
```

重构之后，它的结构如下所示

```
.
├── common
│   ├── box_iou_rotated_utils.hpp
│   ├── parrots_cpp_helper.hpp
│   ├── parrots_cuda_helper.hpp
│   ├── pytorch_cpp_helper.hpp
│   ├── pytorch_cuda_helper.hpp
│   └── cuda
│       ├── common_cuda_helper.hpp
│       ├── parrots_cudawarpfunction.cuh
│       ├── ...
│       └── ops_cuda_kernel.cuh
├── onnxruntime
│   ├── onnxruntime_register.h
│   ├── onnxruntime_session_options_config_keys.h
│   ├── ort_mmcv_utils.h
│   ├── ...
│   ├── onnx_ops.h
│   └── cpu
│       ├── onnxruntime_register.cpp
│       ├── ...
│       └── onnx_ops_impl.cpp
├── parrots
│   ├── ...
│   ├── ops.cpp
│   ├── ops_parrots.cpp
│   └── ops_pytorch.h
├── pytorch
│   ├── info.cpp
│   ├── pybind.cpp
│   ├── ...
│   ├── ops.cpp
│   └── cuda
│       ├── ...
│       └── ops_cuda.cu
└── tensorrt
    ├── trt_cuda_helper.cuh
    ├── trt_plugin_helper.hpp
    ├── trt_plugin.hpp
    ├── trt_serialize.hpp
    ├── ...
    ├── trt_ops.hpp
    └── plugins
        ├── trt_cuda_helper.cu
        ├── trt_plugin.cpp
        ├── ...
        ├── trt_ops.cpp
        └── trt_ops_kernel.cu
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/conf.py
================================================
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys

import pytorch_sphinx_theme
from sphinx.builders.html import StandaloneHTMLBuilder

sys.path.insert(0, os.path.abspath('../..'))

version_file = '../../mmcv/version.py'
with open(version_file, 'r') as f:
    exec(compile(f.read(), version_file, 'exec'))
__version__ = locals()['__version__']

# -- Project information -----------------------------------------------------

project = 'mmcv'
copyright = '2018-2021, OpenMMLab'
author = 'MMCV Authors'

# The short X.Y version
version = __version__
# The full version, including alpha/beta/rc tags
release = __version__

# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.

extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.napoleon',
    'sphinx.ext.viewcode',
    'sphinx.ext.autosectionlabel',
    'sphinx_markdown_tables',
    'myst_parser',
    'sphinx_copybutton',
]  # yapf: disable

autodoc_mock_imports = ['mmcv._ext', 'mmcv.utils.ext_loader', 'torchvision']
autosectionlabel_prefix_document = True

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {
    '.rst': 'restructuredtext',
    '.md': 'markdown',
}

# The master toctree document.
master_doc = 'index'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = 'zh_CN'

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
# html_theme = 'sphinx_rtd_theme'
html_theme = 'pytorch_sphinx_theme'
html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
    'menu': [
        {
            'name': 'GitHub',
            'url': 'https://github.com/open-mmlab/mmcv'
        },
    ],
    # Specify the language of shared menu
    'menu_lang': 'cn',
}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = ['css/readthedocs.css']

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}

# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'mmcvdoc'

# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',

    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',

    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',

    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'mmcv.tex', 'mmcv Documentation', 'MMCV Contributors',
     'manual'),
]

# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, 'mmcv', 'mmcv Documentation', [author], 1)]

# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (master_doc, 'mmcv', 'mmcv Documentation', author, 'mmcv',
     'One line description of project.', 'Miscellaneous'),
]

# -- Options for Epub output -------------------------------------------------

# Bibliographic Dublin Core info.
epub_title = project

# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''

# A unique identification for the text.
#
# epub_uid = ''

# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']

# set priority when building html
StandaloneHTMLBuilder.supported_image_types = [
    'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
]
# -- Extension configuration -------------------------------------------------
# Ignore >>> when copying code
copybutton_prompt_text = r'>>> |\.\.\. '
copybutton_prompt_is_regexp = True


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/deployment/onnx.md
================================================
## MMCV中ONNX模块简介 (实验性)

### register_extra_symbolics

在将PyTorch模型导出成ONNX时，需要注册额外的符号函数

#### 范例

```python
import mmcv
from mmcv.onnx import register_extra_symbolics

opset_version = 11
register_extra_symbolics(opset_version)
```

#### 常见问题

- 无


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/deployment/onnxruntime_custom_ops.md
================================================
## ONNX Runtime自定义算子

<!-- TOC -->

- [ONNX Runtime自定义算子](#onnx-runtime自定义算子)
  - [SoftNMS](#softnms)
    - [描述](#描述)
    - [模型参数](#模型参数)
    - [输入](#输入)
    - [输出](#输出)
    - [类型约束](#类型约束)
  - [RoIAlign](#roialign)
    - [描述](#描述-1)
    - [模型参数](#模型参数-1)
    - [输入](#输入-1)
    - [输出](#输出-1)
    - [类型约束](#类型约束-1)
  - [NMS](#nms)
    - [描述](#描述-2)
    - [模型参数](#模型参数-2)
    - [输入](#输入-2)
    - [输出](#输出-2)
    - [类型约束](#类型约束-2)
  - [grid_sampler](#grid_sampler)
    - [描述](#描述-3)
    - [模型参数](#模型参数-3)
    - [输入](#输入-3)
    - [输出](#输出-3)
    - [类型约束](#类型约束-3)
  - [CornerPool](#cornerpool)
    - [描述](#描述-4)
    - [模型参数](#模型参数-4)
    - [输入](#输入-4)
    - [输出](#输出-4)
    - [类型约束](#类型约束-4)
  - [cummax](#cummax)
    - [描述](#描述-5)
    - [模型参数](#模型参数-5)
    - [输入](#输入-5)
    - [输出](#输出-5)
    - [类型约束](#类型约束-5)
  - [cummin](#cummin)
    - [描述](#描述-6)
    - [模型参数](#模型参数-6)
    - [输入](#输入-6)
    - [输出](#输出-6)
    - [类型约束](#类型约束-6)
  - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d)
    - [描述](#描述-7)
    - [模型参数](#模型参数-7)
    - [输入](#输入-7)
    - [输出](#输出-7)
    - [类型约束](#类型约束-7)

<!-- TOC -->

### SoftNMS

#### 描述

根据`scores`计算`boxes`的soft NMS。 请阅读[Soft-NMS -- Improving Object Detection With One Line of Code](https://arxiv.org/abs/1704.04503)了解细节。

#### 模型参数

| 类型    | 参数名          | 描述                                                    |
| ------- | --------------- | ------------------------------------------------------- |
| `float` | `iou_threshold` | 用来判断候选框重合度的阈值，取值范围[0, 1]。默认值为0   |
| `float` | `sigma`         | 高斯方法的超参数                                        |
| `float` | `min_score`     | NMS的score阈值                                          |
| `int`   | `method`        | NMS的计算方式, (0: `naive`, 1: `linear`, 2: `gaussian`) |
| `int`   | `offset`        | 用来计算候选框的宽高(x2 - x1 + offset)。可选值0或1      |

#### 输入

<dl>
<dt><tt>boxes</tt>: T</dt>
<dd>输入候选框。形状为(N, 4)的二维张量，N为候选框数量。</dd>
<dt><tt>scores</tt>: T</dt>
<dd>输入得分。形状为(N, )的一维张量。</dd>
</dl>

#### 输出

<dl>
<dt><tt>dets</tt>: T</dt>
<dd>输出的检测框与得分。形状为(num_valid_boxes, 5)的二维张量，内容为[[x1, y1, x2, y2, score], ...]。num_valid_boxes是合法的检测框数量。</dd>
<dt><tt>indices</tt>: tensor(int64)</dt>
<dd>输出序号。形状为(num_valid_boxes, )的一维张量。</dd>
</dl>

#### 类型约束

- T:tensor(float32)

### RoIAlign

#### 描述

在特征图上计算RoIAlign，通常在双阶段目标检测模型的bbox_head中使用

#### 模型参数

| 类型    | 参数名           | 描述                                                    |
| ------- | ---------------- | ------------------------------------------------------- |
| `int`   | `output_height`  | roi特征的输出高度                                       |
| `int`   | `output_width`   | roi特征的输出宽度                                       |
| `float` | `spatial_scale`  | 输入检测框的缩放系数                                    |
| `int`   | `sampling_ratio` | 输出的采样率。`0`表示使用密集采样                       |
| `str`   | `mode`           | 池化方式。 `avg`或`max`                                 |
| `int`   | `aligned`        | 如果`aligned=1`，则像素会进行-0.5的偏移以达到更好的对齐 |

#### 输入

<dl>
<dt><tt>input</tt>: T</dt>
<dd>输入特征图；形状为(N, C, H, W)的四维张量，其中N为batch大小，C为输入通道数，H和W为输入特征图的高和宽。</dd>
<dt><tt>rois</tt>: T</dt>
<dd>需要进行池化的感兴趣区域；形状为(num_rois, 5)的二维张量，内容为[[batch_index, x1, y1, x2, y2], ...]。rois的坐标为输入特征图的坐标系。</dd>
</dl>

#### 输出

<dl>
<dt><tt>feat</tt>: T</dt>
<dd>池化的输出；形状为(num_rois, C, output_height, output_width)的四维张量。每个输出特征feat[i]都与输入感兴趣区域rois[i]一一对应。<dd>
</dl>

#### 类型约束

- T:tensor(float32)

### NMS

#### 描述

根据IoU阈值对候选框进行非极大值抑制。

#### 模型参数

| 类型    | 参数名          | 描述                                                  |
| ------- | --------------- | ----------------------------------------------------- |
| `float` | `iou_threshold` | 用来判断候选框重合度的阈值，取值范围[0, 1]。默认值为0 |
| `int`   | `offset`        | 用来计算候选框的宽高(x2 - x1 + offset)。可选值0或1    |

#### 输入

<dl>
<dt><tt>boxes</tt>: T</dt>
<dd>输入候选框。形状为(N, 4)的二维张量，N为候选框数量。</dd>
<dt><tt>scores</tt>: T</dt>
<dd>输入得分。形状为(N, )的一维张量。</dd>
</dl>

#### 输出

<dl>
<dt><tt>indices</tt>: tensor(int32, Linear)</dt>
<dd>被选中的候选框索引。形状为(num_valid_boxes, )的一维张量，num_valid_boxes表示被选上的候选框数量。</dd>
</dl>

#### 类型约束

- T:tensor(float32)

### grid_sampler

#### 描述

根据`grid`的像素位置对`input`进行网格采样。

#### 模型参数

| 类型  | 参数名               | 描述                                                                                                                                                 |
| ----- | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- |
| `int` | `interpolation_mode` | 计算输出使用的插值模式。(0: `bilinear` , 1: `nearest`)                                                                                               |
| `int` | `padding_mode`       | 边缘填充模式。(0: `zeros`, 1: `border`, 2: `reflection`)                                                                                             |
| `int` | `align_corners`      | 如果`align_corners=1`，则极值(`-1`和`1`)会被当做输入边缘像素的中心点。如果`align_corners=0`，则它们会被看做是边缘像素的边缘点,减小分辨率对采样的影响 |

#### 输入

<dl>
<dt><tt>input</tt>: T</dt>
<dd>输入特征；形状为(N, C, inH, inW)的四维张量，其中N为batch大小，C为输入通道数，inH和inW为输入特征图的高和宽。</dd>
<dt><tt>grid</tt>: T</dt>
<dd>输入网格；形状为(N, outH, outW, 2)的四维张量，outH和outW为输出的高和宽。 </dd>
</dl>

#### 输出

<dl>
<dt><tt>output</tt>: T</dt>
<dd>输出特征；形状为(N, C, outH, outW)的四维张量。</dd>
</dl>

#### 类型约束

- T:tensor(float32, Linear)

### CornerPool

#### 描述

对`input`计算CornerPool。请阅读[CornerNet -- Detecting Objects as Paired Keypoints](https://arxiv.org/abs/1808.01244)了解更多细节。

#### 模型参数

| 类型  | 参数名 | 描述                                                     |
| ----- | ------ | -------------------------------------------------------- |
| `int` | `mode` | 池化模式。(0: `top`, 1: `bottom`, 2: `left`, 3: `right`) |

#### 输入

<dl>
<dt><tt>input</tt>: T</dt>
<dd>输入特征；形状为(N, C, H, W)的四维张量，其中N为batch大小，C为输入通道数，H和W为输入特征图的高和宽。</dd>
</dl>

#### 输出

<dl>
<dt><tt>output</tt>: T</dt>
<dd>输出特征；形状为(N, C, H, W)的四维张量。</dd>
</dl>

#### 类型约束

- T:tensor(float32)

### cummax

#### 描述

返回一个元组(`values`, `indices`)，其中`values`为`input`第`dim`维的累计最大值，`indices`为第`dim`维最大值位置。请阅读[torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html)了解更多细节。

#### 模型参数

| 类型  | 参数名 | 描述               |
| ----- | ------ | ------------------ |
| `int` | `dim`  | 进行累计计算的维度 |

#### 输入

<dl>
<dt><tt>input</tt>: T</dt>
<dd>输入张量；可以使任意形状；也支持空Tensor</dd>
</dl>

#### 输出

<dl>
<dt><tt>output</tt>: T</dt>
<dd>`input`第`dim`维的累计最大值，形状与`input`相同。类型和`input`一致</dd>
<dt><tt>indices</tt>: tensor(int64)</dt>
<dd>第`dim`维最大值位置，形状与`input`相同。</dd>
</dl>

#### 类型约束

- T:tensor(float32)

### cummin

#### 描述

返回一个元组(`values`, `indices`)，其中`values`为`input`第`dim`维的累计最小值，`indices`为第`dim`维最小值位置。请阅读[torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html)了解更多细节。

#### 模型参数

| 类型  | 参数名 | 描述               |
| ----- | ------ | ------------------ |
| `int` | `dim`  | 进行累计计算的维度 |

#### 输入

<dl>
<dt><tt>input</tt>: T</dt>
<dd>输入张量；可以是任意形状；也支持空Tensor</dd>
</dl>

#### 输出

<dl>
<dt><tt>output</tt>: T</dt>
<dd>`input`第`dim`维的累计最小值，形状与`input`相同。类型和`input`一致</dd>
<dt><tt>indices</tt>: tensor(int64)</dt>
<dd>第`dim`维最小值位置，形状与`input`相同。</dd>
</dl>

#### 类型约束

- T:tensor(float32)

### MMCVModulatedDeformConv2d

#### 描述

在输入特征上计算Modulated Deformable Convolution，请阅读[Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline)了解更多细节。

#### 模型参数

| 类型           | 参数名              | 描述                                                          |
| -------------- | ------------------- | ------------------------------------------------------------- |
| `list of ints` | `stride`            | 卷积的步长 (sH, sW)                                           |
| `list of ints` | `padding`           | 输入特征填充大小 (padH, padW)                                 |
| `list of ints` | `dilation`          | 卷积核各元素间隔 (dH, dW)                                     |
| `int`          | `deformable_groups` | 可变偏移量的分组，通常置位1即可                               |
| `int`          | `groups`            | 卷积分组数，`input_channel`会根据这个值被分为数个分组进行计算 |

#### 输入

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>输入特征；形状为(N, C, inH, inW)的四维张量，其中N为batch大小，C为输入通道数，inH和inW为输入特征图的高和宽。</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>输入偏移量；形状为(N, deformable_group* 2* kH* kW, outH, outW)的四维张量，kH和kW为输入特征图的高和宽，outH和outW为输入特征图的高和宽。</dd>
<dt><tt>inputs[2]</tt>: T</dt>
<dd>输入掩码；形状为(N, deformable_group* kH* kW, outH, outW)的四维张量。</dd>
<dt><tt>inputs[3]</tt>: T</dt>
<dd>输入权重；形状为(output_channel, input_channel, kH, kW)的四维张量。</dd>
<dt><tt>inputs[4]</tt>: T, optional</dt>
<dd>输入偏移量；形状为(output_channel)的一维张量。</dd>
</dl>

#### 输出

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>输出特征；形状为(N, output_channel, outH, outW)的四维张量。</dd>
</dl>

#### 类型约束

- T:tensor(float32, Linear)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/deployment/onnxruntime_op.md
================================================
## MMCV中的ONNX Runtime自定义算子

### ONNX Runtime介绍

**ONNX Runtime**是一个跨平台的推理与训练加速器，适配许多常用的机器学习/深度神经网络框架。请访问[github](https://github.com/microsoft/onnxruntime)了解更多信息。

### ONNX介绍

**ONNX**是**Open Neural Network Exchange**的缩写，是许多机器学习/深度神经网络框架使用的*中间表示(IR)*。请访问[github](https://github.com/onnx/onnx)了解更多信息。

### 为什么要在MMCV中添加ONNX自定义算子？

- 为了验证ONNX模型在ONNX Runtime下的推理的正确性。
- 为了方便使用了`mmcv.ops`自定义算子的模型的部署工作。

### MMCV已支持的算子

|                                       算子                                       |  CPU  |  GPU  | MMCV版本 |
| :------------------------------------------------------------------------------: | :---: | :---: | :------: |
|                   [SoftNMS](onnxruntime_custom_ops.md#softnms)                   |   Y   |   N   |  1.2.3   |
|                  [RoIAlign](onnxruntime_custom_ops.md#roialign)                  |   Y   |   N   |  1.2.5   |
|                       [NMS](onnxruntime_custom_ops.md#nms)                       |   Y   |   N   |  1.2.7   |
|              [grid_sampler](onnxruntime_custom_ops.md#grid_sampler)              |   Y   |   N   |  1.3.1   |
|                [CornerPool](onnxruntime_custom_ops.md#cornerpool)                |   Y   |   N   |  1.3.4   |
|                    [cummax](onnxruntime_custom_ops.md#cummax)                    |   Y   |   N   |  1.3.4   |
|                    [cummin](onnxruntime_custom_ops.md#cummin)                    |   Y   |   N   |  1.3.4   |
| [MMCVModulatedDeformConv2d](onnxruntime_custom_ops.md#mmcvmodulateddeformconv2d) |   Y   |   N   |  1.3.12  |

### 如何编译ONNX Runtime自定义算子？

*请注意我们仅在**onnxruntime>=1.8.1**的Linux x86-64 cpu平台上进行过测试*

#### 准备工作

- 克隆代码仓库

```bash
git clone https://github.com/open-mmlab/mmcv.git
```

- 从ONNX Runtime下载`onnxruntime-linux`：[releases](https://github.com/microsoft/onnxruntime/releases/tag/v1.8.1)，解压缩，根据路径创建变量`ONNXRUNTIME_DIR`并把路径下的lib目录添加到`LD_LIBRARY_PATH`，步骤如下：

```bash
wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz

tar -zxvf onnxruntime-linux-x64-1.8.1.tgz
cd onnxruntime-linux-x64-1.8.1
export ONNXRUNTIME_DIR=$(pwd)
export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH
```

#### Linux系统下编译

```bash
cd mmcv ## to MMCV root directory
MMCV_WITH_OPS=1 MMCV_WITH_ORT=1 python setup.py develop
```

### 如何在python下使用ONNX Runtime对导出的ONNX模型做编译

使用`pip`安装ONNX Runtime

```bash
pip install onnxruntime==1.8.1
```

推理范例

```python
import os

import numpy as np
import onnxruntime as ort

from mmcv.ops import get_onnxruntime_op_path

ort_custom_op_path = get_onnxruntime_op_path()
assert os.path.exists(ort_custom_op_path)
session_options = ort.SessionOptions()
session_options.register_custom_ops_library(ort_custom_op_path)
## exported ONNX model with custom operators
onnx_file = 'sample.onnx'
input_data = np.random.randn(1, 3, 224, 224).astype(np.float32)
sess = ort.InferenceSession(onnx_file, session_options)
onnx_results = sess.run(None, {'input' : input_data})
```

### 如何为MMCV添加ONNX Runtime的自定义算子

#### 开发前提醒

- 该算子的ONNX Runtime实现尚未在MMCV中支持[已实现算子列表](https://github.com/microsoft/onnxruntime/blob/master/docs/OperatorKernels.md)。
- 确保该自定义算子可以被ONNX导出。

#### 添加方法

以`soft_nms`为例：

1. 在ONNX Runtime头文件目录`mmcv/ops/csrc/onnxruntime/`下添加头文件`soft_nms.h`
2. 在ONNX Runtime源码目录`mmcv/ops/csrc/onnxruntime/cpu/`下添加算子实现`soft_nms.cpp`
3. 在[onnxruntime_register.cpp](../../mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp)中注册实现的算子`soft_nms`

    ```c++
    #include "soft_nms.h"

    SoftNmsOp c_SoftNmsOp;

    if (auto status = ortApi->CustomOpDomain_Add(domain, &c_SoftNmsOp)) {
    return status;
    }
    ```

4. 在`tests/test_ops/test_onnx.py`添加单元测试，
   可以参考[here](../../tests/test_ops/test_onnx.py)。

**最后，欢迎为MMCV添加ONNX Runtime自定义算子** :nerd_face:

### 已知问题

- "RuntimeError: tuple appears in op that does not forward tuples, unsupported kind: `prim::PythonOp`."
   1. 请注意`cummax`和`cummin`算子是在torch >= 1.5.0被添加的。但他们需要在torch version >= 1.7.0才能正确导出。否则会在导出时发生上面的错误。
   2. 解决方法：升级PyTorch到1.7.0以上版本

### 引用

- [How to export Pytorch model with custom op to ONNX and run it in ONNX Runtime](https://github.com/onnx/tutorials/blob/master/PyTorchCustomOperator/README.md)
- [How to add a custom operator/kernel in ONNX Runtime](https://github.com/microsoft/onnxruntime/blob/master/docs/AddingCustomOp.md)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/deployment/tensorrt_custom_ops.md
================================================
## TensorRT自定义算子

<!-- TOC -->

- [TensorRT自定义算子](#tensorrt自定义算子)
  - [MMCVRoIAlign](#mmcvroialign)
    - [描述](#描述)
    - [模型参数](#模型参数)
    - [输入](#输入)
    - [输出](#输出)
    - [类型约束](#类型约束)
  - [ScatterND](#scatternd)
    - [描述](#描述-1)
    - [模型参数](#模型参数-1)
    - [输入](#输入-1)
    - [输出](#输出-1)
    - [类型约束](#类型约束-1)
  - [NonMaxSuppression](#nonmaxsuppression)
    - [描述](#描述-2)
    - [模型参数](#模型参数-2)
    - [输入](#输入-2)
    - [输出](#输出-2)
    - [类型约束](#类型约束-2)
  - [MMCVDeformConv2d](#mmcvdeformconv2d)
    - [描述](#描述-3)
    - [模型参数](#模型参数-3)
    - [输入](#输入-3)
    - [输出](#输出-3)
    - [类型约束](#类型约束-3)
  - [grid_sampler](#grid_sampler)
    - [描述](#描述-4)
    - [模型参数](#模型参数-4)
    - [输入](#输入-4)
    - [输出](#输出-4)
    - [类型约束](#类型约束-4)
  - [cummax](#cummax)
    - [描述](#描述-5)
    - [模型参数](#模型参数-5)
    - [输入](#输入-5)
    - [输出](#输出-5)
    - [类型约束](#类型约束-5)
  - [cummin](#cummin)
    - [描述](#描述-6)
    - [模型参数](#模型参数-6)
    - [输入](#输入-6)
    - [输出](#输出-6)
    - [类型约束](#类型约束-6)
  - [MMCVInstanceNormalization](#mmcvinstancenormalization)
    - [描述](#描述-7)
    - [模型参数](#模型参数-7)
    - [输入](#输入-7)
    - [输出](#输出-7)
    - [类型约束](#类型约束-7)
  - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d)
    - [描述](#描述-8)
    - [模型参数](#模型参数-8)
    - [输入](#输入-8)
    - [输出](#输出-8)
    - [类型约束](#类型约束-8)

<!-- TOC -->

### MMCVRoIAlign

#### 描述

在特征图上计算RoIAlign，在多数双阶段目标检测模型的bbox_head中使用

#### 模型参数

| 类型    | 参数名           | 描述                                                    |
| ------- | ---------------- | ------------------------------------------------------- |
| `int`   | `output_height`  | roi特征的输出高度                                       |
| `int`   | `output_width`   | roi特征的输出宽度                                       |
| `float` | `spatial_scale`  | 输入检测框的缩放系数                                    |
| `int`   | `sampling_ratio` | 输出的采样率。`0`表示使用密集采样                       |
| `str`   | `mode`           | 池化方式。 `avg`或`max`                                 |
| `int`   | `aligned`        | 如果`aligned=1`，则像素会进行-0.5的偏移以达到更好的对齐 |

#### 输入

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>输入特征图；形状为(N, C, H, W)的四维张量，其中N为batch大小，C为输入通道数，H和W为输入特征图的高和宽。</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>需要进行池化的感兴趣区域；形状为(num_rois, 5)的二维张量，内容为[[batch_index, x1, y1, x2, y2], ...]。rois的坐标为输入特征图的坐标系。</dd>
</dl>

#### 输出

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>池化的输出；形状为(num_rois, C, output_height, output_width)的四维张量。每个输出特征feat[i]都与输入感兴趣区域rois[i]一一对应。<dd>
</dl>
#### 类型约束

- T:tensor(float32, Linear)

### ScatterND

#### 描述

ScatterND接收三个输入，分别为秩为r >= 1的`data`，秩为q >= 1的`indices`以及秩为 q + r - indices.shape[-1] -1 的`update`。输出的计算方式为：首先创建一个`data`的拷贝，然后根据`indces`的值使用`update`对拷贝的`data`进行更新。注意`indices`中不应该存在相同的条目，也就是说对同一个位置进行一次以上的更新是不允许的。

输出的计算方式可以参考如下代码：

```python
  output = np.copy(data)
  update_indices = indices.shape[:-1]
  for idx in np.ndindex(update_indices):
      output[indices[idx]] = updates[idx]
```

#### 模型参数

无

#### 输入

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>秩为r >= 1的输入`data`</dd>

<dt><tt>inputs[1]</tt>: tensor(int32, Linear)</dt>
<dd>秩为q >= 1的输入`update`</dd>

<dt><tt>inputs[2]</tt>: T</dt>
<dd>秩为 q + r - indices.shape[-1] -1 的输入`update`</dd>
</dl>

#### 输出

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>秩为r >= 1的输出张量</dd>
</dl>

#### 类型约束

- T:tensor(float32, Linear), tensor(int32, Linear)

### NonMaxSuppression

#### 描述

根据IoU阈值对候选框进行非极大值抑制。

#### 模型参数

| 类型    | 参数名                       | 描述                                                                                     |
| ------- | ---------------------------- | ---------------------------------------------------------------------------------------- |
| `int`   | `center_point_box`           | 0 - 候选框的格式为[y1, x1, y2, x2]， 1-候选框的格式为[x_center, y_center, width, height] |
| `int`   | `max_output_boxes_per_class` | 每一类最大的输出检测框个数。默认为0，输出检测框个数等于输入候选框数                      |
| `float` | `iou_threshold`              | 用来判断候选框重合度的阈值，取值范围[0, 1]。默认值为0                                    |
| `float` | `score_threshold`            | 用来判断候选框是否合法的阈值                                                             |
| `int`   | `offset`                     | 检测框长宽计算方式为(x2 - x1 + offset)，可选值0或1                                       |

#### 输入

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>输入候选框。形状为(num_batches, spatial_dimension, 4)的三维张量</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>输入得分。形状为(num_batches, num_classes, spatial_dimension)的三维张量</dd>
</dl>

#### 输出

<dl>
<dt><tt>outputs[0]</tt>: tensor(int32, Linear)</dt>
<dd>被选中的候选框索引。形状为(num_selected_indices, 3)的二维张量。每一行内容为[batch_index, class_index, box_index]。</dd>
<dd>其中 num_selected_indices=num_batches* num_classes* min(max_output_boxes_per_class, spatial_dimension)。</dd>
<dd>所有未被选中的候选框索引都会被填充为-1</dd>
</dl>

#### 类型约束

- T:tensor(float32, Linear)

### MMCVDeformConv2d

#### 描述

在输入特征上计算Deformable Convolution，请阅读[Deformable Convolutional Network](https://arxiv.org/abs/1703.06211)了解更多细节。

#### 模型参数

| 类型           | 参数名             | 描述                                                                                          |
| -------------- | ------------------ | --------------------------------------------------------------------------------------------- |
| `list of ints` | `stride`           | 卷积的步长 (sH, sW)                                                                           |
| `list of ints` | `padding`          | 输入特征填充大小 (padH, padW)                                                                 |
| `list of ints` | `dilation`         | 卷积核各元素间隔 (dH, dW)                                                                     |
| `int`          | `deformable_group` | 可变偏移量的分组                                                                              |
| `int`          | `group`            | 卷积分组数，`input_channel`会根据这个值被分为数个分组进行计算                                 |
| `int`          | `im2col_step`      | 可变卷积使用im2col计算卷积。输入与偏移量会以im2col_step为步长分块计算，减少临时空间的使用量。 |

#### 输入

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>输入特征；形状为(N, C, inH, inW)的四维张量，其中N为batch大小，C为输入通道数，inH和inW为输入特征图的高和宽</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>输入偏移量；形状为(N, deformable_group* 2* kH* kW, outH, outW)的四维张量，kH和kW为输入特征图的高和宽，outH和outW为输入特征图的高和宽</dd>
<dt><tt>inputs[2]</tt>: T</dt>
<dd>输入权重；形状为(output_channel, input_channel, kH, kW)的四维张量</dd>
</dl>

#### 输出

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>输出特征；形状为(N, output_channel, outH, outW)的四维张量</dd>
</dl>

#### 类型约束

- T:tensor(float32, Linear)

### grid_sampler

#### 描述

根据`grid`的像素位置对`input`进行网格采样。

#### 模型参数

| 类型  | 参数名               | 描述                                                                                                                                                 |
| ----- | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- |
| `int` | `interpolation_mode` | 计算输出使用的插值模式。(0: `bilinear` , 1: `nearest`)                                                                                               |
| `int` | `padding_mode`       | 边缘填充模式。(0: `zeros`, 1: `border`, 2: `reflection`)                                                                                             |
| `int` | `align_corners`      | 如果`align_corners=1`，则极值(`-1`和`1`)会被当做输入边缘像素的中心点。如果`align_corners=0`，则它们会被看做是边缘像素的边缘点,减小分辨率对采样的影响 |

#### 输入

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>输入特征；形状为(N, C, inH, inW)的四维张量，其中N为batch大小，C为输入通道数，inH和inW为输入特征图的高和宽</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>输入网格；形状为(N, outH, outW, 2)的四维张量，outH和outW为输出的高和宽 </dd>
</dl>

#### 输出

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>输出特征；形状为(N, C, outH, outW)的四维张量</dd>
</dl>

#### 类型约束

- T:tensor(float32, Linear)

### cummax

#### 描述

返回一个元组(`values`, `indices`)，其中`values`为`input`第`dim`维的累计最大值，`indices`为第`dim`维最大值位置。请阅读[torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html)了解更多细节。

#### 模型参数

| 类型  | 参数名 | 描述               |
| ----- | ------ | ------------------ |
| `int` | `dim`  | 进行累计计算的维度 |

#### 输入

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>输入张量；可以使任意形状</dd>
</dl>

#### 输出

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>`input`第`dim`维的累计最大值，形状与`input`相同。类型和`input`一致</dd>
<dt><tt>outputs[1]</tt>: (int32, Linear)</dt>
<dd>第`dim`维最大值位置，形状与`input`相同</dd>
</dl>

#### 类型约束

- T:tensor(float32, Linear)

### cummin

#### 描述

返回一个元组(`values`, `indices`)，其中`values`为`input`第`dim`维的累计最小值，`indices`为第`dim`维最小值位置。请阅读[torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html)了解更多细节。

#### 模型参数

| 类型  | 参数名 | 描述               |
| ----- | ------ | ------------------ |
| `int` | `dim`  | 进行累计计算的维度 |

#### 输入

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>输入张量；可以使任意形状</dd>
</dl>

#### 输出

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>`input`第`dim`维的累计最小值，形状与`input`相同。类型和`input`一致</dd>
<dt><tt>outputs[1]</tt>: (int32, Linear)</dt>
<dd>第`dim`维最小值位置，形状与`input`相同</dd>
</dl>

#### 类型约束

- T:tensor(float32, Linear)

### MMCVInstanceNormalization

#### 描述

对特征计算instance normalization，请阅读[Instance Normalization: The Missing Ingredient for Fast Stylization](https://arxiv.org/abs/1607.08022)了解更多详细信息。

#### 模型参数

| 类型    | 参数名    | 描述                         |
| ------- | --------- | ---------------------------- |
| `float` | `epsilon` | 用来避免除0错误。默认为1e-05 |

#### 输入

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>输入特征。形状为(N, C, H， W)的四维张量，其中N为batch大小，C为输入通道数，H和W为输入特征图的高和宽</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>输入缩放系数。形状为(C，)的一维张量</dd>
<dt><tt>inputs[2]</tt>: T</dt>
<dd>输入偏移量。形状为(C，)的一维张量</dd>
</dl>

#### 输出

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>输出特征。形状为(N, C, H， W)的四维张量</dd>
</dl>

#### 类型约束

- T:tensor(float32, Linear)

### MMCVModulatedDeformConv2d

#### 描述

在输入特征上计算Modulated Deformable Convolution，请阅读[Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline)了解更多细节。

#### 模型参数

| 类型           | 参数名              | 描述                                                          |
| -------------- | ------------------- | ------------------------------------------------------------- |
| `list of ints` | `stride`            | 卷积的步长 (sH, sW)                                           |
| `list of ints` | `padding`           | 输入特征填充大小 (padH, padW)                                 |
| `list of ints` | `dilation`          | 卷积核各元素间隔 (dH, dW)                                     |
| `int`          | `deformable_groups` | 可变偏移量的分组，通常置位1即可                               |
| `int`          | `groups`            | 卷积分组数，`input_channel`会根据这个值被分为数个分组进行计算 |

#### 输入

<dl>
<dt><tt>inputs[0]</tt>: T</dt>
<dd>输入特征；形状为(N, C, inH, inW)的四维张量，其中N为batch大小，C为输入通道数，inH和inW为输入特征图的高和宽</dd>
<dt><tt>inputs[1]</tt>: T</dt>
<dd>输入偏移量；形状为(N, deformable_group* 2* kH* kW, outH, outW)的四维张量，kH和kW为输入特征图的高和宽，outH和outW为输入特征图的高和宽</dd>
<dt><tt>inputs[2]</tt>: T</dt>
<dd>输入掩码；形状为(N, deformable_group* kH* kW, outH, outW)的四维张量</dd>
<dt><tt>inputs[3]</tt>: T</dt>
<dd>输入权重；形状为(output_channel, input_channel, kH, kW)的四维张量</dd>
<dt><tt>inputs[4]</tt>: T, optional</dt>
<dd>输入偏移量；形状为(output_channel)的一维张量</dd>
</dl>

#### 输出

<dl>
<dt><tt>outputs[0]</tt>: T</dt>
<dd>输出特征；形状为(N, output_channel, outH, outW)的四维张量</dd>
</dl>

#### 类型约束

- T:tensor(float32, Linear)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/deployment/tensorrt_plugin.md
================================================
## MMCV中的TensorRT自定义算子 (实验性)

<!-- TOC -->

- [MMCV中的TensorRT自定义算子 (实验性)](#mmcv中的tensorrt自定义算子-实验性)
  - [介绍](#介绍)
  - [MMCV中的TensorRT插件列表](#mmcv中的tensorrt插件列表)
  - [如何编译MMCV中的TensorRT插件](#如何编译mmcv中的tensorrt插件)
    - [准备](#准备)
    - [在Linux上编译](#在linux上编译)
  - [创建TensorRT推理引擎并在python下进行推理](#创建tensorrt推理引擎并在python下进行推理)
  - [如何在MMCV中添加新的TensorRT自定义算子](#如何在mmcv中添加新的tensorrt自定义算子)
    - [主要流程](#主要流程)
    - [注意](#注意)
  - [已知问题](#已知问题)
  - [引用](#引用)

<!-- TOC -->

### 介绍

**NVIDIA TensorRT**是一个为深度学习模型高性能推理准备的软件开发工具(SDK)。它包括深度学习推理优化器和运行时，可为深度学习推理应用提供低延迟和高吞吐量。请访问[developer's website](https://developer.nvidia.com/tensorrt)了解更多信息。
为了简化TensorRT部署带有MMCV自定义算子的模型的流程，MMCV中添加了一系列TensorRT插件。

### MMCV中的TensorRT插件列表

|         ONNX算子          |                                  TensorRT插件                                   | MMCV版本 |
| :-----------------------: | :-----------------------------------------------------------------------------: | :------: |
|       MMCVRoiAlign        |              [MMCVRoiAlign](./tensorrt_custom_ops.md#mmcvroialign)              |  1.2.6   |
|         ScatterND         |                 [ScatterND](./tensorrt_custom_ops.md#scatternd)                 |  1.2.6   |
|     NonMaxSuppression     |         [NonMaxSuppression](./tensorrt_custom_ops.md#nonmaxsuppression)         |  1.3.0   |
|     MMCVDeformConv2d      |          [MMCVDeformConv2d](./tensorrt_custom_ops.md#mmcvdeformconv2d)          |  1.3.0   |
|       grid_sampler        |              [grid_sampler](./tensorrt_custom_ops.md#grid-sampler)              |  1.3.1   |
|          cummax           |                    [cummax](./tensorrt_custom_ops.md#cummax)                    |  1.3.5   |
|          cummin           |                    [cummin](./tensorrt_custom_ops.md#cummin)                    |  1.3.5   |
| MMCVInstanceNormalization | [MMCVInstanceNormalization](./tensorrt_custom_ops.md#mmcvinstancenormalization) |  1.3.5   |
| MMCVModulatedDeformConv2d | [MMCVModulatedDeformConv2d](./tensorrt_custom_ops.md#mmcvmodulateddeformconv2d) |  master  |

注意

- 以上所有算子均在 TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0 环境下开发。

### 如何编译MMCV中的TensorRT插件

#### 准备

- 克隆代码仓库

```bash
git clone https://github.com/open-mmlab/mmcv.git
```

- 安装TensorRT

从 [NVIDIA Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-download) 下载合适的TensorRT版本。

比如，对安装了cuda-10.2的x86-64的Ubuntu 16.04，下载文件为`TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz`.

然后使用下面方式安装并配置环境

```bash
cd ~/Downloads
tar -xvzf TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz
export TENSORRT_DIR=`pwd`/TensorRT-7.2.1.6
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TENSORRT_DIR/lib
```

安装python依赖: tensorrt, graphsurgeon, onnx-graphsurgeon

```bash
pip install $TENSORRT_DIR/python/tensorrt-7.2.1.6-cp37-none-linux_x86_64.whl
pip install $TENSORRT_DIR/onnx_graphsurgeon/onnx_graphsurgeon-0.2.6-py2.py3-none-any.whl
pip install $TENSORRT_DIR/graphsurgeon/graphsurgeon-0.4.5-py2.py3-none-any.whl
```

想了解更多通过tar包安装TensorRT，请访问[Nvidia' website](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-721/install-guide/index.html#installing-tar).

#### 在Linux上编译

```bash
cd mmcv ## to MMCV root directory
MMCV_WITH_OPS=1 MMCV_WITH_TRT=1 pip install -e .
```

### 创建TensorRT推理引擎并在python下进行推理

范例如下：

```python
import torch
import onnx

from mmcv.tensorrt import (TRTWrapper, onnx2trt, save_trt_engine,
                                   is_tensorrt_plugin_loaded)

assert is_tensorrt_plugin_loaded(), 'Requires to complie TensorRT plugins in mmcv'

onnx_file = 'sample.onnx'
trt_file = 'sample.trt'
onnx_model = onnx.load(onnx_file)

## Model input
inputs = torch.rand(1, 3, 224, 224).cuda()
## Model input shape info
opt_shape_dict = {
    'input': [list(inputs.shape),
              list(inputs.shape),
              list(inputs.shape)]
}

## Create TensorRT engine
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
    onnx_model,
    opt_shape_dict,
    max_workspace_size=max_workspace_size)

## Save TensorRT engine
save_trt_engine(trt_engine, trt_file)

## Run inference with TensorRT
trt_model = TRTWrapper(trt_file, ['input'], ['output'])

with torch.no_grad():
    trt_outputs = trt_model({'input': inputs})
    output = trt_outputs['output']

```

### 如何在MMCV中添加新的TensorRT自定义算子

#### 主要流程

下面是主要的步骤：

1. 添加c++头文件
2. 添加c++源文件
3. 添加cuda kernel文件
4. 在`trt_plugin.cpp`中注册插件
5. 在`tests/test_ops/test_tensorrt.py`中添加单元测试

**以RoIAlign算子插件`roi_align`举例。**

1. 在TensorRT包含目录`mmcv/ops/csrc/tensorrt/`中添加头文件`trt_roi_align.hpp`
2. 在TensorRT源码目录`mmcv/ops/csrc/tensorrt/plugins/`中添加头文件`trt_roi_align.cpp`
3. 在TensorRT源码目录`mmcv/ops/csrc/tensorrt/plugins/`中添加cuda kernel文件`trt_roi_align_kernel.cu`
4. 在[trt_plugin.cpp](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp)中注册`roi_align`插件

    ```c++
    #include "trt_plugin.hpp"

    #include "trt_roi_align.hpp"

    REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator);

    extern "C" {
    bool initLibMMCVInferPlugins() { return true; }
    }  // extern "C"
    ```

5. 在`tests/test_ops/test_tensorrt.py`中添加单元测试

#### 注意

- 部分MMCV中的自定义算子存在对应的cuda实现，在进行TensorRT插件开发的时候可以参考。

### 已知问题

- 无

### 引用

- [Developer guide of Nvidia TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html)
- [TensorRT Open Source Software](https://github.com/NVIDIA/TensorRT)
- [onnx-tensorrt](https://github.com/onnx/onnx-tensorrt)
- [TensorRT python API](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html)
- [TensorRT c++ plugin API](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_plugin.html)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/faq.md
================================================
## 常见问题

在这里我们列出了用户经常遇到的问题以及对应的解决方法。如果您遇到了其他常见的问题，并且知道可以帮到大家的解决办法，
欢迎随时丰富这个列表。

### 安装问题

- KeyError: "xxx: 'yyy is not in the zzz registry'"

    只有模块所在的文件被导入时，注册机制才会被触发，所以您需要在某处导入该文件，更多详情请查看 https://github.com/open-mmlab/mmdetection/issues/5974。

- "No module named 'mmcv.ops'"; "No module named 'mmcv._ext'"

    1. 使用 `pip uninstall mmcv` 卸载您环境中的 mmcv
    2. 参考 [installation instruction](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) 或者 [Build MMCV from source](https://mmcv.readthedocs.io/en/latest/get_started/build.html) 安装 mmcv-full

- "invalid device function" 或者 "no kernel image is available for execution"

    1. 检查 GPU 的 CUDA 计算能力
    2. 运行  `python mmdet/utils/collect_env.py` 来检查 PyTorch、torchvision 和 MMCV 是否是针对正确的 GPU 架构构建的，您可能需要去设置 `TORCH_CUDA_ARCH_LIST` 来重新安装 MMCV。兼容性问题可能会出现在使用旧版的 GPUs，如：colab 上的 Tesla K80 (3.7)
    3. 检查运行环境是否和 mmcv/mmdet 编译时的环境相同。例如，您可能使用 CUDA 10.0 编译 mmcv，但在 CUDA 9.0 的环境中运行它

- "undefined symbol" 或者 "cannot open xxx.so"

    1. 如果符号和 CUDA/C++ 相关（例如：libcudart.so 或者 GLIBCXX），请检查 CUDA/GCC 运行时的版本是否和编译 mmcv 的一致
    2. 如果符号和 PyTorch 相关（例如：符号包含 caffe、aten 和 TH），请检查 PyTorch 运行时的版本是否和编译 mmcv 的一致
    3. 运行 `python mmdet/utils/collect_env.py` 以检查 PyTorch、torchvision 和 MMCV 构建和运行的环境是否相同

- "RuntimeError: CUDA error: invalid configuration argument"

    这个错误可能是由于您的 GPU 性能不佳造成的。尝试降低[THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10)
    的值并重新编译 mmcv。

- "RuntimeError: nms is not compiled with GPU support"

    这个错误是由于您的 CUDA 环境没有正确安装。
    您可以尝试重新安装您的 CUDA 环境，然后删除 mmcv/build 文件夹并重新编译 mmcv。

- "Segmentation fault"

    1. 检查 GCC 的版本，通常是因为 PyTorch 版本与 GCC 版本不匹配 （例如 GCC < 4.9 )，我们推荐用户使用 GCC 5.4，我们也不推荐使用 GCC 5.5， 因为有反馈 GCC 5.5 会导致 "segmentation fault" 并且切换到 GCC 5.4 就可以解决问题
    2. 检查是否正确安装 CUDA 版本的 PyTorc。输入以下命令并检查是否返回 True
        ```shell
        python -c 'import torch; print(torch.cuda.is_available())'
        ```
    3. 如果 `torch` 安装成功，那么检查 MMCV 是否安装成功。输入以下命令，如果没有报错说明 mmcv-full 安装成。
        ```shell
        python -c 'import mmcv; import mmcv.ops'
        ```
    4. 如果 MMCV 与 PyTorch 都安装成功了，则可以使用 `ipdb` 设置断点或者使用 `print` 函数，分析是哪一部分的代码导致了 `segmentation fault`

- "libtorch_cuda_cu.so: cannot open shared object file"

    `mmcv-full` 依赖 `libtorch_cuda_cu.so` 文件，但程序运行时没能找到该文件。我们可以检查该文件是否存在 `~/miniconda3/envs/{environment-name}/lib/python3.7/site-packages/torch/lib` 也可以尝试重装 PyTorch。

- "fatal error C1189: #error:  -- unsupported Microsoft Visual Studio version!"

  如果您在 Windows 上编译 mmcv-full 并且 CUDA 的版本是 9.2，您很可能会遇到这个问题 `"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include\crt/host_config.h(133): fatal error C1189: #error:  -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!"`，您可以尝试使用低版本的 Microsoft Visual Studio，例如 vs2017。

- "error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized"

  如果您在 Windows 上编译 mmcv-full 并且 PyTorch 的版本是 1.5.0，您很可能会遇到这个问题 `- torch/csrc/jit/api/module.h(474): error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized`。解决这个问题的方法是将 `torch/csrc/jit/api/module.h` 文件中所有 `static constexpr bool all_slots = false;` 替换为 `static bool all_slots = false;`。更多细节可以查看 https://github.com/pytorch/pytorch/issues/39394。

- "error: a member with an in-class initializer must be const"

  如果您在 Windows 上编译 mmcv-full 并且 PyTorch 的版本是 1.6.0，您很可能会遇到这个问题 `"- torch/include\torch/csrc/jit/api/module.h(483): error: a member with an in-class initializer must be const"`. 解决这个问题的方法是将 `torch/include\torch/csrc/jit/api/module.h` 文件中的所有 `CONSTEXPR_EXCEPT_WIN_CUDA ` 替换为 `const`。更多细节可以查看 https://github.com/open-mmlab/mmcv/issues/575。

- "error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized"

  如果您在 Windows 上编译 mmcv-full 并且 PyTorch 的版本是 1.7.0，您很可能会遇到这个问题 `torch/include\torch/csrc/jit/ir/ir.h(1347): error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized`. 解决这个问题的方法是修改 PyTorch 中的几个文件：

  - 删除 `torch/include\torch/csrc/jit/ir/ir.h` 文件中的 `static constexpr Symbol Kind = ::c10::prim::profile;` 和 `tatic constexpr Symbol Kind = ::c10::prim::profile_optional;`
  - 将 `torch\include\pybind11\cast.h` 文件中的 `explicit operator type&() { return *(this->value); }` 替换为 `explicit operator type&() { return *((type*)this->value); }`
  - 将 `torch/include\torch/csrc/jit/api/module.h` 文件中的 所有 `CONSTEXPR_EXCEPT_WIN_CUDA` 替换为 `const`

  更多细节可以查看 https://github.com/pytorch/pytorch/pull/45956。

- MMCV 和 MMDetection 的兼容性问题；"ConvWS is already registered in conv layer"

  请参考 [installation instruction](https://mmdetection.readthedocs.io/en/latest/get_started.html#installation) 为您的 MMDetection 版本安装正确版本的 MMCV。

### 使用问题

- "RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one"

    1. 这个错误是因为有些参数没有参与 loss 的计算，可能是代码中存在多个分支，导致有些分支没有参与 loss 的计算。更多细节见 https://github.com/pytorch/pytorch/issues/55582。
    2. 你可以设置 DDP 中的 `find_unused_parameters` 为 `True`，或者手动查找哪些参数没有用到。

- "RuntimeError: Trying to backward through the graph a second time"

    不能同时设置 `GradientCumulativeOptimizerHook` 和 `OptimizerHook`，这会导致 `loss.backward()` 被调用两次，于是程序抛出 `RuntimeError`。我们只需设置其中的一个。更多细节见 https://github.com/open-mmlab/mmcv/issues/1379。


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/get_started/build.md
================================================
## 从源码编译 MMCV

### 在 Linux 或者 macOS 上编译 MMCV

克隆算法库

```bash
git clone https://github.com/open-mmlab/mmcv.git
cd mmcv
```

建议安装 `ninja` 以加快编译速度

```bash
pip install -r requirements/optional.txt
```

你可以安装 lite 版本

```bash
pip install -e .
```

也可以安装 full 版本

```bash
MMCV_WITH_OPS=1 pip install -e .
```

如果是在 macOS 上编译，则需要在安装命令前添加一些环境变量

```bash
CC=clang CXX=clang++ CFLAGS='-stdlib=libc++'
```

例如

```bash
CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' MMCV_WITH_OPS=1 pip install -e .
```

```{note}
如果你打算使用 `opencv-python-headless` 而不是 `opencv-python`，例如在一个很小的容器环境或者没有图形用户界面的服务器中，你可以先安装 `opencv-python-headless`，这样在安装 mmcv 依赖的过程中会跳过 `opencv-python`
```
### 在 Windows 上编译 MMCV

在 Windows 上编译 MMCV 比 Linux 复杂，本节将一步步介绍如何在 Windows 上编译 MMCV。

#### 依赖项

请首先安装以下的依赖项：

- [Git](https://git-scm.com/download/win)：安装期间，请选择 **add git to Path**
- [Visual Studio Community 2019](https://visualstudio.microsoft.com)：用于编译 C++ 和 CUDA 代码
- [Miniconda](https://docs.conda.io/en/latest/miniconda.html)：包管理工具
- [CUDA 10.2](https://developer.nvidia.com/cuda-10.2-download-archive)：如果只需要 CPU 版本可以不安装 CUDA，安装CUDA时，可根据需要进行自定义安装。如果已经安装新版本的显卡驱动，建议取消驱动程序的安装

```{note}
您需要知道如何在 Windows 上设置变量环境，尤其是 "PATH" 的设置，以下安装过程都会用到。
```

#### 设置 Python 环境

1. 从 Windows 菜单启动 Anaconda 命令行

```{note}
如 Miniconda 安装程序建议，不要使用原始的 `cmd.exe` 或是 `powershell.exe`。命令行有两个版本，一个基于 PowerShell，一个基于传统的 `cmd.exe`。请注意以下说明都是使用的基于 PowerShell
```

2. 创建一个新的 Conda 环境

    ```shell
    conda create --name mmcv python=3.7  # 经测试，3.6, 3.7, 3.8 也能通过
    conda activate mmcv  # 确保做任何操作前先激活环境
    ```

3. 安装 PyTorch 时，可以根据需要安装支持 CUDA 或不支持 CUDA 的版本

    ```shell
    # CUDA version
    conda install pytorch torchvision cudatoolkit=10.2 -c pytorch
    # CPU version
    conda install pytorch torchvision cpuonly -c pytorch
    ```

4. 准备 MMCV 源代码

    ```shell
    git clone https://github.com/open-mmlab/mmcv.git
    cd mmcv
    ```

5. 安装所需 Python 依赖包

    ```shell
    pip3 install -r requirements/runtime.txt
    ```

6. 建议安装 `ninja` 以加快编译速度

    ```bash
    pip install -r requirements/optional.txt
    ```

#### 编译与安装 MMCV

MMCV 有三种安装的模式：

1. Lite 版本（不包含算子）

    这种方式下，没有算子被编译，这种模式的 mmcv 是原生的 python 包

2. Full 版本（只包含 CPU 算子）

    编译 CPU 算子，但只有 x86 将会被编译，并且编译版本只能在 CPU only 情况下运行

3. Full 版本（既包含 CPU 算子，又包含 CUDA 算子）

    同时编译 CPU 和 CUDA 算子，`ops` 模块的 x86 与 CUDA 的代码都可以被编译。同时编译的版本可以在 CUDA 上调用 GPU

##### 通用步骤

1. 设置 MSVC 编译器

    设置环境变量。添加 `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.27.29110\bin\Hostx86\x64` 到 `PATH`，则 `cl.exe` 可以在命令行中运行，如下所示。

    ```none
    (base) PS C:\Users\xxx> cl
    Microsoft (R) C/C++ Optimizing  Compiler Version 19.27.29111 for x64
    Copyright (C) Microsoft Corporation.   All rights reserved.

    usage: cl [ option... ] filename... [ / link linkoption... ]
    ```

    为了兼容性，我们使用 x86-hosted 以及 x64-targeted 版本，即路径中的 `Hostx86\x64` 。

    因为 PyTorch 将解析 `cl.exe` 的输出以检查其版本，只有 utf-8 将会被识别，你可能需要将系统语言更改为英语。控制面板 -> 地区-> 管理-> 非 Unicode 来进行语言转换。

##### 安装方式一：Lite version（不包含算子）

在完成上述的公共步骤后，从菜单打开 Anaconda 命令框，输入以下命令

```shell
# 激活环境
conda activate mmcv
# 切换到 mmcv 根目录
cd mmcv
# 安装
python setup.py develop
# 检查是否安装成功
pip list
```

##### 安装方式二：Full version（只编译 CPU 算子）

1. 完成上述的公共步骤

2. 设置环境变量

    ```shell
    $env:MMCV_WITH_OPS = 1
    $env:MAX_JOBS = 8  # 根据你可用CPU以及内存量进行设置
    ```

3. 编译安装

    ```shell
    conda activate mmcv  # 激活环境
    cd mmcv  # 改变路径
    python setup.py build_ext  # 如果成功, cl 将被启动用于编译算子
    python setup.py develop  # 安装
    pip list  # 检查是否安装成功
    ```

##### 安装方式三：Full version（既编译 CPU 算子又编译 CUDA 算子）

1. 完成上述的公共步骤

2. 设置环境变量

    ```shell
    $env:MMCV_WITH_OPS = 1
    $env:MAX_JOBS = 8  # 根据你可用CPU以及内存量进行设置
    ```

3.  检查 `CUDA_PATH` 或者 `CUDA_HOME` 环境变量已经存在在 `envs` 之中

    ```none
    (base) PS C:\Users\WRH> ls env:

    Name                           Value
    ----                           -----
    CUDA_PATH                      C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2
    CUDA_PATH_V10_1                C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1
    CUDA_PATH_V10_2                C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2
    ```

    如果没有，你可以按照下面的步骤设置

    ```shell
    $env:CUDA_HOME = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2"
    # 或者
    $env:CUDA_HOME = $env:CUDA_PATH_V10_2  # CUDA_PATH_V10_2 已经在环境变量中
    ```

4. 设置 CUDA 的目标架构

    ```shell
    $env:TORCH_CUDA_ARCH_LIST="6.1" # 支持 GTX 1080
    # 或者用所有支持的版本，但可能会变得很慢
    $env:TORCH_CUDA_ARCH_LIST="3.5 3.7 5.0 5.2 6.0 6.1 7.0 7.5"
    ```

```{note}
我们可以在 [here](https://developer.nvidia.com/cuda-gpus) 查看 GPU 的计算能力
```

5. 编译安装

    ```shell
    $env:MMCV_WITH_OPS = 1
    $env:MAX_JOBS = 8 # 根据你可用CPU以及内存量进行设置
    conda activate mmcv # 激活环境
    cd mmcv  # 改变路径
    python setup.py build_ext  # 如果成功, cl 将被启动用于编译算子
    python setup.py develop # 安装
    pip list # 检查是否安装成功
    ```

```{note}
如果你的 PyTorch 版本是 1.6.0，你可能会遇到一些这个 [issue](https://github.com/pytorch/pytorch/issues/42467) 提到的错误，则可以参考这个 [pull request](https://github.com/pytorch/pytorch/pull/43380/files) 修改 本地环境的 PyTorch 源代码
```

如果编译安装 mmcv 的过程中遇到了问题，你也许可以在 [Frequently Asked Question](../faq.html) 找到解决方法


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/get_started/installation.md
================================================
## 安装 MMCV

MMCV 有两个版本：

- **mmcv-full**: 完整版，包含所有的特性以及丰富的开箱即用的 CUDA 算子。注意完整版本可能需要更长时间来编译。
- **mmcv**: 精简版，不包含 CUDA 算子但包含其余所有特性和功能，类似 MMCV 1.0 之前的版本。如果你不需要使用 CUDA 算子的话，精简版可以作为一个考虑选项。

```{warning}
请不要在同一个环境中安装两个版本，否则可能会遇到类似 `ModuleNotFound` 的错误。在安装一个版本之前，需要先卸载另一个。`如果CUDA可用，强烈推荐安装mmcv-full`。
```

a. 安装完整版

在安装 mmcv-full 之前，请确保 PyTorch 已经成功安装在环境中，可以参考 PyTorch 官方[文档](https://pytorch.org/)。

我们提供了不同 PyTorch 和 CUDA 版本的 mmcv-full 预编译包，可以大大简化用户安装编译过程。强烈推荐通过预编译包来安装。另外，安装完成后可以运行 [check_installation.py](https://github.com/open-mmlab/mmcv/.dev_scripts/check_installation.py) 脚本检查 mmcv-full 是否安装成功。

i. 安装最新版本

如下是安装最新版 ``mmcv-full`` 的命令

```shell
pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
```

请将链接中的 ``{cu_version}`` 和 ``{torch_version}`` 根据自身需求替换成实际的版本号，例如想安装和 ``CUDA 11.1``、``PyTorch 1.9.0`` 兼容的最新版 ``mmcv-full``，使用如下替换过的命令

```shell
pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
```

```{note}
PyTorch 在 1.x.0 和 1.x.1 之间通常是兼容的，故 mmcv-full 只提供 1.x.0 的编译包。如果你
的 PyTorch 版本是 1.x.1，你可以放心地安装在 1.x.0 版本编译的 mmcv-full。例如，如果你的
PyTorch 版本是 1.8.1、CUDA 版本是 11.1，你可以使用以下命令安装 mmcv-full。

`pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html`
```

如果想知道更多 CUDA 和 PyTorch 版本的命令，可以参考下面的表格，将链接中的 ``=={mmcv_version}`` 删去即可。

ii. 安装特定的版本

如下是安装特定版本 ``mmcv-full`` 的命令

```shell
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
```

首先请参考版本发布信息找到想要安装的版本号，将 ``{mmcv_version}`` 替换成该版本号，例如 ``1.3.9``。
然后将链接中的 ``{cu_version}`` 和 ``{torch_version}`` 根据自身需求替换成实际的版本号，例如想安装和 ``CUDA 11.1``、``PyTorch 1.9.0`` 兼容的 ``mmcv-full`` 1.3.9 版本，使用如下替换过的命令

```shell
pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
```

对于更多的 PyTorch 和 CUDA 版本组合，请参考下表：

<table class="docutils">
  <tbody>
    <tr>
      <th width="80"> CUDA </th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.10</th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.9</th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.8</th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.7</th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.6</th>
      <th valign="bottom" align="left" style="min-width: 100px">torch 1.5</th>
    </tr>
    <tr>
      <td align="left">11.3</td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"></td>
      <td align="left"></code></pre> </details> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
    </tr>
    <tr>
      <td align="left">11.1</td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
    </tr>
    <tr>
      <td align="left">11.0</td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"> </td>
      <td align="left"> </td>
    </tr>
    <tr>
      <td align="left">10.2</td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code>pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
    <tr>
      <td align="left">10.1</td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
    <tr>
      <td align="left">9.2</td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
    <tr>
      <td align="left">cpu</td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html</code></pre> </details> </td>
       <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html</code></pre> </details> </td>
      <td align="left"><details><summary> 安装 </summary><pre><code> pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html</code></pre> </details> </td>
    </tr>
  </tbody>
</table>

```{note}
以上提供的预编译包并不囊括所有的 mmcv-full 版本，我们可以点击对应链接查看支持的版本。例如，点击 [cu102-torch1.8.0](https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html)，可以看到 `cu102-torch1.8.0` 只提供了 1.3.0 及以上的 mmcv-full 版本。另外，从 `mmcv v1.3.17` 开始，我们不再提供`PyTorch 1.3 & 1.4` 对应的 mmcv-full 预编译包。你可以在 [这](./previous_versions.md) 找到 `PyTorch 1.3 & 1.4` 对应的预编包。虽然我们不再提供 `PyTorch 1.3 & 1.4` 对应的预编译包，但是我们依然在 CI 中保证对它们的兼容持续到下一年。
```

除了使用预编译包之外，另一种方式是在本地进行编译，直接运行下述命令

```python
pip install mmcv-full
```

但注意本地编译可能会耗时 10 分钟以上。

b. 安装精简版

```python
pip install mmcv
```

c. 安装完整版并且编译 onnxruntime 的自定义算子

- 详细的指南请查看 [这里](https://mmcv.readthedocs.io/zh_CN/latest/deployment/onnxruntime_custom_ops.html)。

如果想从源码编译 MMCV，请参考[该文档](https://mmcv.readthedocs.io/zh_CN/latest/get_started/build.html)。


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/get_started/introduction.md
================================================
## 介绍 MMCV

MMCV 是一个面向计算机视觉的基础库，它支持了很多开源项目，例如：

- [MIM](https://github.com/open-mmlab/mim): OpenMMLab 项目、算法、模型的统一入口
- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图像分类工具箱与测试基准
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 检测工具箱与测试基准
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用3D目标检测平台
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱与测试基准
- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱与测试基准
- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱与测试基准
- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图像视频编辑工具箱
- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包
- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 新一代生成模型工具箱
- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab 光流估计工具箱与测试基准
- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准
- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 人体参数化模型工具箱与测试基准
- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监督学习工具箱与测试基准
- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准
- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架

MMCV 提供了如下众多功能：

- 通用的 IO 接口
- 图像和视频处理
- 图像和标注结果可视化
- 常用小工具（进度条，计时器等）
- 基于 PyTorch 的通用训练框架
- 多种 CNN 网络结构
- 高质量实现的常见 CUDA 算子

如想了解更多特性和使用，请参考[文档](https://mmcv.readthedocs.io/zh_CN/latest)。

```{note}
MMCV 需要 Python 3.6 以上版本。
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/get_started/previous_versions.md
================================================

## 其他版本的 PyTorch

我们不再提供在较低的 `PyTorch` 版本下编译的 `mmcv-full` 包，但为了您的方便，您可以在下面找到它们。

### PyTorch 1.4

| 1.0.0 <= mmcv_version <= 1.2.1

#### CUDA 10.1

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.4.0/index.html
```

#### CUDA 9.2

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.4.0/index.html
```

#### CPU

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.4.0/index.html
```

### PyTorch v1.3

| 1.0.0 <= mmcv_version <= 1.3.16

#### CUDA 10.1

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.3.0/index.html
```

#### CUDA 9.2

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.3.0/index.html
```

#### CPU

```bash
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.3.0/index.html
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/index.rst
================================================
欢迎来到 MMCV 的中文文档！
=============================

您可以在页面左下角切换中英文文档。

.. toctree::
   :maxdepth: 2
   :caption: 介绍与安装

   get_started/introduction.md
   get_started/installation.md
   get_started/build.md

.. toctree::
   :maxdepth: 2
   :caption: 深入理解 MMCV

   understand_mmcv/config.md
   understand_mmcv/registry.md
   understand_mmcv/runner.md
   understand_mmcv/io.md
   understand_mmcv/data_process.md
   understand_mmcv/visualization.md
   understand_mmcv/cnn.md
   understand_mmcv/ops.md
   understand_mmcv/utils.md

.. toctree::
   :maxdepth: 2
   :caption: 部署

   deployment/onnx.md
   deployment/onnxruntime_op.md
   deployment/onnxruntime_custom_ops.md
   deployment/tensorrt_plugin.md
   deployment/tensorrt_custom_ops.md

.. toctree::
   :maxdepth: 2
   :caption: 兼容性

   compatibility.md

.. toctree::
   :maxdepth: 2
   :caption: 常见问题

   faq.md

.. toctree::
   :maxdepth: 2
   :caption: 社区

   community/contributing.md
   community/pr.md

.. toctree::
   :maxdepth: 2
   :caption: API 文档

   api.rst


Indices and tables
==================

* :ref:`genindex`
* :ref:`search`


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/cnn.md
================================================
## 卷积神经网络

我们为卷积神经网络提供了一些构建模块，包括层构建、模块组件和权重初始化。

### 网络层的构建

在运行实验时，我们可能需要尝试同属一种类型但不同配置的层，但又不希望每次都修改代码。于是我们提供一些层构建方法，可以从字典构建层，字典可以在配置文件中配置，也可以通过命令行参数指定。

#### 用法

一个简单的例子：

```python
cfg = dict(type='Conv3d')
layer = build_conv_layer(cfg, in_channels=3, out_channels=8, kernel_size=3)
```

- `build_conv_layer`: 支持的类型包括 Conv1d、Conv2d、Conv3d、Conv (Conv是Conv2d的别名）
- `build_norm_layer`: 支持的类型包括 BN1d、BN2d、BN3d、BN (alias for BN2d)、SyncBN、GN、LN、IN1d、IN2d、IN3d、IN（IN是IN2d的别名）
- `build_activation_layer`：支持的类型包括 ReLU、LeakyReLU、PReLU、RReLU、ReLU6、ELU、Sigmoid、Tanh、GELU
- `build_upsample_layer`: 支持的类型包括 nearest、bilinear、deconv、pixel_shuffle
- `build_padding_layer`: 支持的类型包括 zero、reflect、replicate

#### 拓展

我们还允许自定义层和算子来扩展构建方法。

1. 编写和注册自己的模块：

    ```python
    from mmcv.cnn import UPSAMPLE_LAYERS

    @UPSAMPLE_LAYERS.register_module()
    class MyUpsample:

        def __init__(self, scale_factor):
            pass

        def forward(self, x):
            pass
    ```

2. 在某处导入 `MyUpsample` （例如 `__init__.py` ）然后使用它：

    ```python
    cfg = dict(type='MyUpsample', scale_factor=2)
    layer = build_upsample_layer(cfg)
    ```

### 模块组件

我们还提供了常用的模块组件，以方便网络构建。
卷积组件 `ConvModule` 由 convolution、normalization以及activation layers 组成，更多细节请参考 [ConvModule api](api.html#mmcv.cnn.ConvModule)。

```python
# conv + bn + relu
conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
# conv + gn + relu
conv = ConvModule(3, 8, 2, norm_cfg=dict(type='GN', num_groups=2))
# conv + relu
conv = ConvModule(3, 8, 2)
# conv
conv = ConvModule(3, 8, 2, act_cfg=None)
# conv + leaky relu
conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU'))
# bn + conv + relu
conv = ConvModule(
    3, 8, 2, norm_cfg=dict(type='BN'), order=('norm', 'conv', 'act'))
```

### Weight initialization

> 实现细节可以在 [mmcv/cnn/utils/weight_init.py](../../mmcv/cnn/utils/weight_init.py)中找到

在训练过程中，适当的初始化策略有利于加快训练速度或者获得更高的性能。 在MMCV中，我们提供了一些常用的方法来初始化模块，比如 `nn.Conv2d` 模块。当然，我们也提供了一些高级API，可用于初始化包含一个或多个模块的模型。

#### Initialization functions

以函数的方式初始化 `nn.Module` ，例如 `nn.Conv2d` 、 `nn.Linear` 等。

我们提供以下初始化方法，

- constant_init

  使用给定常量值初始化模型参数

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import constant_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # constant_init(module, val, bias=0)
    >>> constant_init(conv1, 1, 0)
    >>> conv1.weight
    ```

- xavier_init

   按照 [Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010)](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf) 描述的方法初始化模型参数

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import xavier_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # xavier_init(module, gain=1, bias=0, distribution='normal')
    >>> xavier_init(conv1, distribution='normal')
    ```

- normal_init

  使用正态分布（高斯分布）初始化模型参数

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import normal_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # normal_init(module, mean=0, std=1, bias=0)
    >>> normal_init(conv1, std=0.01, bias=0)
    ```

- uniform_init

  使用均匀分布初始化模型参数

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import uniform_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # uniform_init(module, a=0, b=1, bias=0)
    >>> uniform_init(conv1, a=0, b=1)
    ```

- kaiming_init

   按照 [Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification - He, K. et al. (2015)](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf) 描述的方法来初始化模型参数。

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import kaiming_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # kaiming_init(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal')
    >>> kaiming_init(conv1)
    ```

- caffe2_xavier_init

  caffe2中实现的 `xavier initialization`，对应于 PyTorch中的 `kaiming_uniform_`

    ```python
    >>> import torch.nn as nn
    >>> from mmcv.cnn import caffe2_xavier_init
    >>> conv1 = nn.Conv2d(3, 3, 1)
    >>> # caffe2_xavier_init(module, bias=0)
    >>> caffe2_xavier_init(conv1)
    ```

- bias_init_with_prob

  根据给定的概率初始化 `conv/fc`, 这在 [Focal Loss for Dense Object Detection](https://arxiv.org/pdf/1708.02002.pdf) 提出。

    ```python
    >>> from mmcv.cnn import bias_init_with_prob
    >>> # bias_init_with_prob is proposed in Focal Loss
    >>> bias = bias_init_with_prob(0.01)
    >>> bias
    -4.59511985013459
    ```

#### Initializers and configs

在初始化方法的基础上，我们定义了相应的初始化类，并将它们注册到 `INITIALIZERS` 中，这样我们就可以使用 `config` 配置来初始化模型了。

我们提供以下初始化类：

- ConstantInit
- XavierInit
- NormalInit
- UniformInit
- KaimingInit
- Caffe2XavierInit
- PretrainedInit

接下来详细介绍 `initialize` 的使用方法

1. 通过关键字 `layer` 来初始化模型

    如果我们只定义了关键字 `layer` ，那么只初始化 `layer` 中包含的层。

    注意: 关键字 `layer` 支持的模块是带有 weights 和 bias 属性的 PyTorch 模块，所以不支持 `MultiheadAttention layer`

- 定义关键字 `layer` 列表并使用相同相同配置初始化模块

  ```python
  import torch.nn as nn
  from mmcv.cnn import initialize

  class FooNet(nn.Module):
      def __init__(self):
          super().__init__()
          self.feat = nn.Conv1d(3, 1, 3)
          self.reg = nn.Conv2d(3, 3, 3)
          self.cls = nn.Linear(1, 2)

  model = FooNet()
  init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d', 'Linear'], val=1)
  # 使用相同的配置初始化整个模块
  initialize(model, init_cfg)
  # model.feat.weight
  # Parameter containing:
  # tensor([[[1., 1., 1.],
  #          [1., 1., 1.],
  #          [1., 1., 1.]]], requires_grad=True)
  ```

- 定义关键字 `layer` 用于初始化不同配置的层

  ```python
  import torch.nn as nn
  from mmcv.cnn.utils import initialize

  class FooNet(nn.Module):
      def __init__(self):
          super().__init__()
          self.feat = nn.Conv1d(3, 1, 3)
          self.reg = nn.Conv2d(3, 3, 3)
          self.cls = nn.Linear(1,2)

  model = FooNet()
  init_cfg = [dict(type='Constant', layer='Conv1d', val=1),
              dict(type='Constant', layer='Conv2d', val=2),
              dict(type='Constant', layer='Linear', val=3)]
  # nn.Conv1d 使用 dict(type='Constant', val=1) 初始化
  # nn.Conv2d 使用 dict(type='Constant', val=2) 初始化
  # nn.Linear 使用 dict(type='Constant', val=3) 初始化
  initialize(model, init_cfg)
  # model.reg.weight
  # Parameter containing:
  # tensor([[[[2., 2., 2.],
  #           [2., 2., 2.],
  #           [2., 2., 2.]],
  #          ...,
  #          [[2., 2., 2.],
  #           [2., 2., 2.],
  #           [2., 2., 2.]]]], requires_grad=True)
  ```

2. 定义关键字`override`初始化模型

- 当用属性名初始化某个特定部分时, 我们可以使用关键字 `override`, 关键字 `override` 对应的Value会替代init_cfg中相应的值

    ```python
    import torch.nn as nn
    from mmcv.cnn import initialize

    class FooNet(nn.Module):
        def __init__(self):
            super().__init__()
            self.feat = nn.Conv1d(3, 1, 3)
            self.reg = nn.Conv2d(3, 3, 3)
            self.cls = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2))

    # 如果我们想将模型的权重初始化为 1，将偏差初始化为 2
    # 但希望 `cls` 中的权重为 3，偏差为 4，则我们可以使用关键字override

    model = FooNet()
    init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], val=1, bias=2,
                    override=dict(type='Constant', name='reg', val=3, bias=4))
    #  使用 dict(type='Constant', val=1, bias=2)来初始化 self.feat and self.cls
    # 使用dict(type='Constant', val=3, bias=4)来初始化‘reg’模块。
    initialize(model, init_cfg)
    # model.reg.weight
    # Parameter containing:
    # tensor([[[[3., 3., 3.],
    #           [3., 3., 3.],
    #           [3., 3., 3.]],
    #           ...,
    #           [[3., 3., 3.],
    #            [3., 3., 3.],
    #            [3., 3., 3.]]]], requires_grad=True)
    ```

- 如果 init_cfg 中的关键字`layer`为None，则只初始化在关键字override中的子模块，并且省略override中的 type 和其他参数

    ```python
    model = FooNet()
    init_cfg = dict(type='Constant', val=1, bias=2, override=dict(name='reg'))
    # self.feat 和 self.cls 使用pyTorch默认的初始化
    # 将使用 dict(type='Constant', val=1, bias=2) 初始化名为 'reg' 的模块
    initialize(model, init_cfg)
    # model.reg.weight
    # Parameter containing:
    # tensor([[[[1., 1., 1.],
    #           [1., 1., 1.],
    #           [1., 1., 1.]],
    #           ...,
    #           [[1., 1., 1.],
    #            [1., 1., 1.],
    #            [1., 1., 1.]]]], requires_grad=True)
    ```

- 如果我们没有定义关键字`layer`或`override` , 将不会初始化任何东西

- 关键字`override`的无效用法

   ```python
   # 没有重写任何子模块
   init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'],
                   val=1, bias=2,
                   override=dict(type='Constant', val=3, bias=4))

   # 没有指定type，即便有其他参数，也是无效的。
   init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'],
                   val=1, bias=2,
                   override=dict(name='reg', val=3, bias=4))
   ```

3. 用预训练模型初始化

    ```python
    import torch.nn as nn
    import torchvision.models as models
    from mmcv.cnn import initialize

    # 使用预训练模型来初始化
    model = models.resnet50()
    # model.conv1.weight
    # Parameter containing:
    # tensor([[[[-6.7435e-03, -2.3531e-02, -9.0143e-03,  ..., -2.1245e-03,
    #            -1.8077e-03,  3.0338e-03],
    #           [-1.2603e-02, -2.7831e-02,  2.3187e-02,  ..., -1.5793e-02,
    #             1.1655e-02,  4.5889e-03],
    #           [-3.7916e-02,  1.2014e-02,  1.3815e-02,  ..., -4.2651e-03,
    #             1.7314e-02, -9.9998e-03],
    #           ...,

    init_cfg = dict(type='Pretrained',
                    checkpoint='torchvision://resnet50')
    initialize(model, init_cfg)
    # model.conv1.weight
    # Parameter containing:
    # tensor([[[[ 1.3335e-02,  1.4664e-02, -1.5351e-02,  ..., -4.0896e-02,
    #            -4.3034e-02, -7.0755e-02],
    #           [ 4.1205e-03,  5.8477e-03,  1.4948e-02,  ...,  2.2060e-03,
    #            -2.0912e-02, -3.8517e-02],
    #           [ 2.2331e-02,  2.3595e-02,  1.6120e-02,  ...,  1.0281e-01,
    #             6.2641e-02,  5.1977e-02],
    #           ...,

    # 使用关键字'prefix'用预训练模型的特定部分来初始化子模块权重
    model = models.resnet50()
    url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\
          'retinanet_r50_fpn_1x_coco/'\
          'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth'
    init_cfg = dict(type='Pretrained',
                    checkpoint=url, prefix='backbone.')
    initialize(model, init_cfg)
    ```

4. 初始化继承自BaseModule、Sequential、ModuleList、ModuleDict的模型

    `BaseModule` 继承自 `torch.nn.Module`, 它们之间唯一的不同是 `BaseModule` 实现了 `init_weight`

    `Sequential` 继承自 `BaseModule` 和 `torch.nn.Sequential`

    `ModuleList` 继承自 `BaseModule` 和 `torch.nn.ModuleList`

    `ModuleDict` 继承自 `BaseModule` 和 `torch.nn.ModuleDict`

    `````python
    import torch.nn as nn
    from mmcv.runner import BaseModule, Sequential, ModuleList, ModuleDict

    class FooConv1d(BaseModule):

        def __init__(self, init_cfg=None):
            super().__init__(init_cfg)
            self.conv1d = nn.Conv1d(4, 1, 4)

        def forward(self, x):
            return self.conv1d(x)

    class FooConv2d(BaseModule):

        def __init__(self, init_cfg=None):
            super().__init__(init_cfg)
            self.conv2d = nn.Conv2d(3, 1, 3)

        def forward(self, x):
            return self.conv2d(x)

    # BaseModule
    init_cfg = dict(type='Constant', layer='Conv1d', val=0., bias=1.)
    model = FooConv1d(init_cfg)
    model.init_weights()
    # model.conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #        [0., 0., 0., 0.],
    #        [0., 0., 0., 0.],
    #        [0., 0., 0., 0.]]], requires_grad=True)

    # Sequential
    init_cfg1 = dict(type='Constant', layer='Conv1d', val=0., bias=1.)
    init_cfg2 = dict(type='Constant', layer='Conv2d', val=2., bias=3.)
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    seq_model = Sequential(model1, model2)
    seq_model.init_weights()
    # seq_model[0].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # seq_model[1].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)

    # inner init_cfg has higher priority
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)
    seq_model = Sequential(model1, model2, init_cfg=init_cfg)
    seq_model.init_weights()
    # seq_model[0].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # seq_model[1].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)

    # ModuleList
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    modellist = ModuleList([model1, model2])
    modellist.init_weights()
    # modellist[0].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # modellist[1].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)

    # inner init_cfg has higher priority
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)
    modellist = ModuleList([model1, model2], init_cfg=init_cfg)
    modellist.init_weights()
    # modellist[0].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # modellist[1].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)

    # ModuleDict
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    modeldict = ModuleDict(dict(model1=model1, model2=model2))
    modeldict.init_weights()
    # modeldict['model1'].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # modeldict['model2'].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)

    # inner init_cfg has higher priority
    model1 = FooConv1d(init_cfg1)
    model2 = FooConv2d(init_cfg2)
    init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.)
    modeldict = ModuleDict(dict(model1=model1, model2=model2), init_cfg=init_cfg)
    modeldict.init_weights()
    # modeldict['model1'].conv1d.weight
    # Parameter containing:
    # tensor([[[0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.],
    #         [0., 0., 0., 0.]]], requires_grad=True)
    # modeldict['model2'].conv2d.weight
    # Parameter containing:
    # tensor([[[[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]],
    #         ...,
    #          [[2., 2., 2.],
    #           [2., 2., 2.],
    #           [2., 2., 2.]]]], requires_grad=True)
    `````

### Model Zoo

除了`torchvision`的预训练模型，我们还提供以下 CNN 的预训练模型：

- VGG Caffe
- ResNet Caffe
- ResNeXt
- ResNet with Group Normalization
- ResNet with Group Normalization and Weight Standardization
- HRNetV2
- Res2Net
- RegNet

#### Model URLs in JSON

MMCV中的Model Zoo Link 由 JSON 文件管理。 json 文件由模型名称及其url或path的键值对组成,一个json文件可能类似于:

```json
{
    "model_a": "https://example.com/models/model_a_9e5bac.pth",
    "model_b": "pretrain/model_b_ab3ef2c.pth"
}
```

可以在[此处](https://github.com/open-mmlab/mmcv/blob/master/mmcv/model_zoo/open_mmlab.json)找到托管在 OpenMMLab AWS 上的预训练模型的默认链接。

你可以通过将 `open-mmlab.json` 放在 `MMCV_HOME`下来覆盖默认链接，如果在环境中找不到`MMCV_HOME`，则默认使用 `~/.cache/mmcv`。当然你也可以使用命令 `export MMCV_HOME=/your/path`来设置自己的路径。

外部的json文件将被合并为默认文件，如果相同的键出现在外部`json`和默认`json`中，则将使用外部`json`。

#### Load Checkpoint

`mmcv.load_checkpoint()`的参数`filename`支持以下类型：

- filepath: `checkpoint`路径
- `http://xxx` and `https://xxx`: 下载checkpoint的链接，文件名中必需包含`SHA256`后缀
- `torchvision://xxx`: `torchvision.models`中的模型链接，更多细节参考 [torchvision](https://pytorch.org/docs/stable/torchvision/models.html)
- `open-mmlab://xxx`: 默认和其他 json 文件中提供的模型链接或文件路径


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/config.md
================================================
## 配置

`Config` 类用于操作配置文件，它支持从多种文件格式中加载配置，包括 **python**, **json** 和 **yaml**。
它提供了类似字典对象的接口来获取和设置值。

以配置文件 `test.py` 为例

```python
a = 1
b = dict(b1=[0, 1, 2], b2=None)
c = (1, 2)
d = 'string'
```

加载与使用配置文件

```python
>>> cfg = Config.fromfile('test.py')
>>> print(cfg)
>>> dict(a=1,
...      b=dict(b1=[0, 1, 2], b2=None),
...      c=(1, 2),
...      d='string')
```

对于所有格式的配置文件，都支持一些预定义变量。它会将 `{{ var }}` 替换为实际值。

目前支持以下四个预定义变量：

`{{ fileDirname }}` - 当前打开文件的目录名，例如 /home/your-username/your-project/folder

`{{ fileBasename }}` - 当前打开文件的文件名，例如 file.ext

`{{ fileBasenameNoExtension }}` - 当前打开文件不包含扩展名的文件名，例如 file

`{{ fileExtname }}` - 当前打开文件的扩展名，例如 .ext

这些变量名引用自 [VS Code](https://code.visualstudio.com/docs/editor/variables-reference)。

这里是一个带有预定义变量的配置文件的例子。

`config_a.py`
```python
a = 1
b = './work_dir/{{ fileBasenameNoExtension }}'
c = '{{ fileExtname }}'
```

```python
>>> cfg = Config.fromfile('./config_a.py')
>>> print(cfg)
>>> dict(a=1,
...      b='./work_dir/config_a',
...      c='.py')
```

对于所有格式的配置文件, 都支持继承。为了重用其他配置文件的字段，
需要指定 `_base_='./config_a.py'` 或者一个包含配置文件的列表 `_base_=['./config_a.py', './config_b.py']`。

这里有 4 个配置继承关系的例子。

`config_a.py` 作为基类配置文件

```python
a = 1
b = dict(b1=[0, 1, 2], b2=None)
```
### 不含重复键值对从基类配置文件继承

`config_b.py`

```python
_base_ = './config_a.py'
c = (1, 2)
d = 'string'
```

```python
>>> cfg = Config.fromfile('./config_b.py')
>>> print(cfg)
>>> dict(a=1,
...      b=dict(b1=[0, 1, 2], b2=None),
...      c=(1, 2),
...      d='string')
```
在`config_b.py`里的新字段与在`config_a.py`里的旧字段拼接

### 含重复键值对从基类配置文件继承

`config_c.py`

```python
_base_ = './config_a.py'
b = dict(b2=1)
c = (1, 2)
```

```python
>>> cfg = Config.fromfile('./config_c.py')
>>> print(cfg)
>>> dict(a=1,
...      b=dict(b1=[0, 1, 2], b2=1),
...      c=(1, 2))
```

在基类配置文件：`config_a` 里的 `b.b2=None`被配置文件：`config_c.py`里的 `b.b2=1`替代。

### 从具有忽略字段的配置文件继承

`config_d.py`

```python
_base_ = './config_a.py'
b = dict(_delete_=True, b2=None, b3=0.1)
c = (1, 2)
```

```python
>>> cfg = Config.fromfile('./config_d.py')
>>> print(cfg)
>>> dict(a=1,
...      b=dict(b2=None, b3=0.1),
...      c=(1, 2))
```

您还可以设置 `_delete_=True`忽略基类配置文件中的某些字段。所有在`b`中的旧键 `b1, b2, b3` 将会被新键 `b2, b3` 所取代。

### 从多个基类配置文件继承（基类配置文件不应包含相同的键）

`config_e.py`

```python
c = (1, 2)
d = 'string'
```

`config_f.py`

```python
_base_ = ['./config_a.py', './config_e.py']
```

```python
>>> cfg = Config.fromfile('./config_f.py')
>>> print(cfg)
>>> dict(a=1,
...      b=dict(b1=[0, 1, 2], b2=None),
...      c=(1, 2),
...      d='string')
```

### 从基类引用变量

您可以使用以下语法引用在基类中定义的变量。

`base.py`

```python
item1 = 'a'
item2 = dict(item3 = 'b')
```

`config_g.py`

```python
_base_ = ['./base.py']
item = dict(a = {{ _base_.item1 }}, b = {{ _base_.item2.item3 }})
```

```python
>>> cfg = Config.fromfile('./config_g.py')
>>> print(cfg.pretty_text)
item1 = 'a'
item2 = dict(item3='b')
item = dict(a='a', b='b')
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/data_process.md
================================================
## 数据处理

### 图像

图像模块提供了一些图像预处理的函数，该模块依赖 `opencv` 。

#### 读取/保存/显示

使用 `imread` 和 `imwrite` 函数可以读取和保存图像。

```python
import mmcv

img = mmcv.imread('test.jpg')
img = mmcv.imread('test.jpg', flag='grayscale')
img_ = mmcv.imread(img)  # 相当于什么也没做
mmcv.imwrite(img, 'out.jpg')
```

从二进制中读取图像

```python
with open('test.jpg', 'rb') as f:
    data = f.read()
img = mmcv.imfrombytes(data)
```

显示图像文件或已读取的图像

```python
mmcv.imshow('tests/data/color.jpg')

for i in range(10):
    img = np.random.randint(256, size=(100, 100, 3), dtype=np.uint8)
    mmcv.imshow(img, win_name='test image', wait_time=200)
```

#### 色彩空间转换

支持的转换函数：

- bgr2gray
- gray2bgr
- bgr2rgb
- rgb2bgr
- bgr2hsv
- hsv2bgr

```python
img = mmcv.imread('tests/data/color.jpg')
img1 = mmcv.bgr2rgb(img)
img2 = mmcv.rgb2gray(img1)
img3 = mmcv.bgr2hsv(img)
```

#### 缩放

有三种缩放图像的方法。所有以 `imresize_*` 开头的函数都有一个 `return_scale` 参数，如果
该参数为 `False` ，函数的返回值只有调整之后的图像，否则是一个元组 `(resized_img, scale)` 。

```python
# 缩放图像至给定的尺寸
mmcv.imresize(img, (1000, 600), return_scale=True)

# 缩放图像至与给定的图像同样的尺寸
mmcv.imresize_like(img, dst_img, return_scale=False)

# 以一定的比例缩放图像
mmcv.imrescale(img, 0.5)

# 缩放图像至最长的边不大于1000、最短的边不大于800并且没有改变图像的长宽比
mmcv.imrescale(img, (1000, 800))
```

#### 旋转

我们可以使用 `imrotate` 旋转图像一定的角度。旋转的中心需要指定，默认值是原始图像的中心。有
两种旋转的模式，一种保持图像的尺寸不变，因此旋转后原始图像中的某些部分会被裁剪，另一种是扩大
图像的尺寸进而保留完整的原始图像。

```python
img = mmcv.imread('tests/data/color.jpg')

# 顺时针旋转图像30度
img_ = mmcv.imrotate(img, 30)

# 逆时针旋转图像90度
img_ = mmcv.imrotate(img, -90)

# 顺时针旋转图像30度并且缩放图像为原始图像的1.5倍
img_ = mmcv.imrotate(img, 30, scale=1.5)

# 以坐标(100, 100)为中心顺时针旋转图像30度
img_ = mmcv.imrotate(img, 30, center=(100, 100))

# 顺时针旋转图像30度并扩大图像的尺寸
img_ = mmcv.imrotate(img, 30, auto_bound=True)
```

#### 翻转

我们可以使用 `imflip` 翻转图像。

```python
img = mmcv.imread('tests/data/color.jpg')

# 水平翻转图像
mmcv.imflip(img)

# 垂直翻转图像
mmcv.imflip(img, direction='vertical')
```

#### 裁剪

`imcrop` 可以裁剪图像的一个或多个区域，每个区域用左上角和右下角坐标表示，形如(x1, y1, x2, y2)

```python
import mmcv
import numpy as np

img = mmcv.imread('tests/data/color.jpg')

# 裁剪区域 (10, 10, 100, 120)
bboxes = np.array([10, 10, 100, 120])
patch = mmcv.imcrop(img, bboxes)

# 裁剪两个区域，分别是 (10, 10, 100, 120) 和 (0, 0, 50, 50)
bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]])
patches = mmcv.imcrop(img, bboxes)

# 裁剪两个区域并且缩放区域1.2倍
patches = mmcv.imcrop(img, bboxes, scale_ratio=1.2)
```

#### 填充

`impad` and `impad_to_multiple` 可以用给定的值将图像填充至给定的尺寸。

```python
img = mmcv.imread('tests/data/color.jpg')

# 用给定值将图像填充至 (1000, 1200)
img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0)

# 用给定值分别填充图像的3个通道至 (1000, 1200)
img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=[100, 50, 200])

# 用给定值填充图像的左、右、上、下四条边
img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0)

# 用3个值分别填充图像的左、右、上、下四条边的3个通道
img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=[100, 50, 200])

# 将图像的四条边填充至能够被给定值整除
img_ = mmcv.impad_to_multiple(img, 32)
```

### 视频

视频模块提供了以下的功能：

- 一个 `VideoReader` 类，具有友好的 API 接口可以读取和转换视频
- 一些编辑视频的方法，包括 `cut` ， `concat` ， `resize`
- 光流的读取/保存/变换

#### VideoReader

`VideoReader` 类提供了和序列一样的接口去获取视频帧。该类会缓存所有被访问过的帧。

```python
video = mmcv.VideoReader('test.mp4')

# 获取基本的信息
print(len(video))
print(video.width, video.height, video.resolution, video.fps)

# 遍历所有的帧
for frame in video:
    print(frame.shape)

# 读取下一帧
img = video.read()

# 使用索引获取帧
img = video[100]

# 获取指定范围的帧
img = video[5:10]
```

将视频切成帧并保存至给定目录或者从给定目录中生成视频。

```python
# 将视频切成帧并保存至目录
video = mmcv.VideoReader('test.mp4')
video.cvt2frames('out_dir')

# 从给定目录中生成视频
mmcv.frames2video('out_dir', 'test.avi')
```

#### 编辑函数

有几个用于编辑视频的函数，这些函数是对 `ffmpeg` 的封装。

```python
# 裁剪视频
mmcv.cut_video('test.mp4', 'clip1.mp4', start=3, end=10, vcodec='h264')

# 将多个视频拼接成一个视频
mmcv.concat_video(['clip1.mp4', 'clip2.mp4'], 'joined.mp4', log_level='quiet')

# 将视频缩放至给定的尺寸
mmcv.resize_video('test.mp4', 'resized1.mp4', (360, 240))

# 将视频缩放至给定的倍率
mmcv.resize_video('test.mp4', 'resized2.mp4', ratio=2)
```

#### 光流

`mmcv` 提供了以下用于操作光流的函数：

- 读取/保存
- 可视化
- 流变换

我们提供了两种将光流dump到文件的方法，分别是非压缩和压缩的方法。非压缩的方法直接将浮点数值的光流
保存至二进制文件，虽然光流无损但文件会比较大。而压缩的方法先量化光流至 0-255 整形数值再保存为
jpeg图像。光流的x维度和y维度会被拼接到图像中。

1. 读取/保存

```python
flow = np.random.rand(800, 600, 2).astype(np.float32)
# 保存光流到flo文件 (~3.7M)
mmcv.flowwrite(flow, 'uncompressed.flo')
# 保存光流为jpeg图像 (~230K)，图像的尺寸为 (800, 1200)
mmcv.flowwrite(flow, 'compressed.jpg', quantize=True, concat_axis=1)

# 读取光流文件，以下两种方式读取的光流尺寸均为 (800, 600, 2)
flow = mmcv.flowread('uncompressed.flo')
flow = mmcv.flowread('compressed.jpg', quantize=True, concat_axis=1)
```

2. 可视化

使用 `mmcv.flowshow()` 可视化光流

```python
mmcv.flowshow(flow)
```

![progress](../../en/_static/flow_visualization.png)

1. 流变换

```python
img1 = mmcv.imread('img1.jpg')
flow = mmcv.flowread('flow.flo')
warpped_img2 = mmcv.flow_warp(img1, flow)
```

img1 (左) and img2 (右)

![raw images](../../en/_static/flow_raw_images.png)

光流 (img2 -> img1)

![optical flow](../../en/_static/flow_img2toimg1.png)

变换后的图像和真实图像的差异

![warpped image](../../en/_static/flow_warp_diff.png)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/io.md
================================================
## 文件输入输出

文件输入输出模块提供了两个通用的 API 接口用于读取和保存不同格式的文件。

```{note}
在 v1.3.16 及之后的版本中，IO 模块支持从不同后端读取数据并支持将数据至不同后端。更多细节请访问 PR [#1330](https://github.com/open-mmlab/mmcv/pull/1330)。
```

### 读取和保存数据

`mmcv` 提供了一个通用的 api 用于读取和保存数据，目前支持的格式有 json、yaml 和 pickle。

#### 从硬盘读取数据或者将数据保存至硬盘

```python
import mmcv

# 从文件中读取数据
data = mmcv.load('test.json')
data = mmcv.load('test.yaml')
data = mmcv.load('test.pkl')
# 从文件对象中读取数据
with open('test.json', 'r') as f:
    data = mmcv.load(f, file_format='json')

# 将数据序列化为字符串
json_str = mmcv.dump(data, file_format='json')

# 将数据保存至文件 (根据文件名后缀反推文件类型)
mmcv.dump(data, 'out.pkl')

# 将数据保存至文件对象
with open('test.yaml', 'w') as f:
    data = mmcv.dump(data, f, file_format='yaml')
```

#### 从其他后端加载或者保存至其他后端

```python
import mmcv

# 从 s3 文件读取数据
data = mmcv.load('s3://bucket-name/test.json')
data = mmcv.load('s3://bucket-name/test.yaml')
data = mmcv.load('s3://bucket-name/test.pkl')

# 将数据保存至 s3 文件 (根据文件名后缀反推文件类型)
mmcv.dump(data, 's3://bucket-name/out.pkl')
```

我们提供了易于拓展的方式以支持更多的文件格式。我们只需要创建一个继承自 `BaseFileHandler` 的
文件句柄类并将其注册到 `mmcv` 中即可。句柄类至少需要重写三个方法。

```python
import mmcv

# 支持为文件句柄类注册多个文件格式
# @mmcv.register_handler(['txt', 'log'])
@mmcv.register_handler('txt')
class TxtHandler1(mmcv.BaseFileHandler):

    def load_from_fileobj(self, file):
        return file.read()

    def dump_to_fileobj(self, obj, file):
        file.write(str(obj))

    def dump_to_str(self, obj, **kwargs):
        return str(obj)
```

以 `PickleHandler` 为例

```python
import pickle

class PickleHandler(mmcv.BaseFileHandler):

    def load_from_fileobj(self, file, **kwargs):
        return pickle.load(file, **kwargs)

    def load_from_path(self, filepath, **kwargs):
        return super(PickleHandler, self).load_from_path(
            filepath, mode='rb', **kwargs)

    def dump_to_str(self, obj, **kwargs):
        kwargs.setdefault('protocol', 2)
        return pickle.dumps(obj, **kwargs)

    def dump_to_fileobj(self, obj, file, **kwargs):
        kwargs.setdefault('protocol', 2)
        pickle.dump(obj, file, **kwargs)

    def dump_to_path(self, obj, filepath, **kwargs):
        super(PickleHandler, self).dump_to_path(
            obj, filepath, mode='wb', **kwargs)
```

### 读取文件并返回列表或字典

例如， `a.txt` 是文本文件，一共有5行内容。

```
a
b
c
d
e
```
#### 从硬盘读取

使用 `list_from_file` 读取 `a.txt`

```python
>>> mmcv.list_from_file('a.txt')
['a', 'b', 'c', 'd', 'e']
>>> mmcv.list_from_file('a.txt', offset=2)
['c', 'd', 'e']
>>> mmcv.list_from_file('a.txt', max_num=2)
['a', 'b']
>>> mmcv.list_from_file('a.txt', prefix='/mnt/')
['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e']
```

同样， `b.txt` 也是文本文件，一共有3行内容

```
1 cat
2 dog cow
3 panda
```

使用 `dict_from_file` 读取 `b.txt`

```python
>>> mmcv.dict_from_file('b.txt')
{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}
>>> mmcv.dict_from_file('b.txt', key_type=int)
{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'}
```

#### 从其他后端读取

使用 `list_from_file` 读取 `s3://bucket-name/a.txt`

```python
>>> mmcv.list_from_file('s3://bucket-name/a.txt')
['a', 'b', 'c', 'd', 'e']
>>> mmcv.list_from_file('s3://bucket-name/a.txt', offset=2)
['c', 'd', 'e']
>>> mmcv.list_from_file('s3://bucket-name/a.txt', max_num=2)
['a', 'b']
>>> mmcv.list_from_file('s3://bucket-name/a.txt', prefix='/mnt/')
['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e']
```

使用 `dict_from_file` 读取 `b.txt`

```python
>>> mmcv.dict_from_file('s3://bucket-name/b.txt')
{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}
>>> mmcv.dict_from_file('s3://bucket-name/b.txt', key_type=int)
{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'}
```

### 读取和保存权重文件

#### 从硬盘读取权重文件或者将权重文件保存至硬盘

我们可以通过下面的方式从磁盘读取权重文件或者将权重文件保存至磁盘

```python
import torch

filepath1 = '/path/of/your/checkpoint1.pth'
filepath2 = '/path/of/your/checkpoint2.pth'
# 从 filepath1 读取权重文件
checkpoint = torch.load(filepath1)
# 将权重文件保存至 filepath2
torch.save(checkpoint, filepath2)
```

MMCV 提供了很多后端，`HardDiskBackend` 是其中一个，我们可以通过它来读取或者保存权重文件。

```python
import io
from mmcv.fileio.file_client import HardDiskBackend

disk_backend = HardDiskBackend()
with io.BytesIO(disk_backend.get(filepath1)) as buffer:
    checkpoint = torch.load(buffer)
with io.BytesIO() as buffer:
    torch.save(checkpoint, f)
    disk_backend.put(f.getvalue(), filepath2)
```

如果我们想在接口中实现根据文件路径自动选择对应的后端，我们可以使用 `FileClient`。
例如，我们想实现两个方法，分别是读取权重以及保存权重，它们需支持不同类型的文件路径，可以是磁盘路径，也可以是网络路径或者其他路径。

```python
from mmcv.fileio.file_client import FileClient

def load_checkpoint(path):
    file_client = FileClient.infer(uri=path)
    with io.BytesIO(file_client.get(path)) as buffer:
        checkpoint = torch.load(buffer)
    return checkpoint

def save_checkpoint(checkpoint, path):
    with io.BytesIO() as buffer:
        torch.save(checkpoint, buffer)
        file_client.put(buffer.getvalue(), path)

file_client = FileClient.infer_client(uri=filepath1)
checkpoint = load_checkpoint(filepath1)
save_checkpoint(checkpoint, filepath2)
```

#### 从网络远端读取权重文件

```{note}
目前只支持从网络远端读取权重文件，暂不支持将权重文件写入网络远端
```

```python
import io
import torch
from mmcv.fileio.file_client import HTTPBackend, FileClient

filepath = 'http://path/of/your/checkpoint.pth'
checkpoint = torch.utils.model_zoo.load_url(filepath)

http_backend = HTTPBackend()
with io.BytesIO(http_backend.get(filepath)) as buffer:
    checkpoint = torch.load(buffer)

file_client = FileClient.infer_client(uri=filepath)
with io.BytesIO(file_client.get(filepath)) as buffer:
    checkpoint = torch.load(buffer)
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/ops.md
================================================
## CUDA 算子

MMCV 提供了检测、分割等任务中常用的 CUDA 算子

- ActiveRotatedFilter
- AssignScoreWithK
- BallQuery
- BBoxOverlaps
- CARAFE
- CrissCrossAttention
- ContextBlock
- ConvexIoU
- CornerPool
- Deformable Convolution v1/v2
- Deformable RoIPool
- DynamicScatter
- GatherPoints
- FurthestPointSample
- FurthestPointSampleWithDist
- GeneralizedAttention
- KNN
- MaskedConv
- MinAreaPolygon
- NMS
- PointsInPolygons
- PSAMask
- RotatedFeatureAlign
- RoIPointPool3d
- RoIPool
- RiRoIAlignRotated
- RoIAlign
- RoIAwarePool3d
- SimpleRoIAlign
- SigmoidFocalLoss
- SoftmaxFocalLoss
- SoftNMS
- Synchronized BatchNorm
- Voxelization
- ThreeInterpolate
- ThreeNN
- Weight standardization
- Correlation


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/registry.md
================================================
## 注册器
MMCV 使用 [注册器](https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/registry.py) 来管理具有相似功能的不同模块, 例如, 检测器中的主干网络、头部、和模型颈部。
在 OpenMMLab 家族中的绝大部分开源项目使用注册器去管理数据集和模型的模块，例如 [MMDetection](https://github.com/open-mmlab/mmdetection), [MMDetection3D](https://github.com/open-mmlab/mmdetection3d), [MMClassification](https://github.com/open-mmlab/mmclassification), [MMEditing](https://github.com/open-mmlab/mmediting) 等。

### 什么是注册器
在MMCV中，注册器可以看作类到字符串的映射。
一个注册器中的类通常有相似的接口，但是可以实现不同的算法或支持不同的数据集。
借助注册器，用户可以通过使用相应的字符串查找并实例化该类，并根据他们的需要实例化对应模块。
一个典型的案例是，OpenMMLab　中的大部分开源项目的配置系统，这些系统通过配置文件来使用注册器创建钩子、执行器、模型和数据集。
可以在[这里](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.Registry)找到注册器接口使用文档。

使用 `registry`（注册器）管理代码库中的模型，需要以下三个步骤。

1. 创建一个构建方法（可选，在大多数情况下您可以只使用默认方法）
2. 创建注册器
3. 使用此注册器来管理模块

`Registry`（注册器）的参数 `build_func`（构建函数） 用来自定以如何实例化类的实例，默认使用 [这里](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.build_from_cfg)实现的`build_from_cfg`。

### 一个简单的例子

这里是一个使用注册器管理包中模块的简单示例。您可以在 OpenMMLab 开源项目中找到更多实例。

假设我们要实现一系列数据集转换器（Dataset Converter），用于将不同格式的数据转换为标准数据格式。我们先创建一个名为converters的目录作为包，在包中我们创建一个文件来实现构建器（builder），命名为converters/builder.py，如下

```python
from mmcv.utils import Registry
# 创建转换器（converter）的注册器（registry）
CONVERTERS = Registry('converter')
```

然后我们在包中可以实现不同的转换器（converter）。例如，在 `converters/converter1.py` 中实现 `Converter1`。

```python
from .builder import CONVERTERS

# 使用注册器管理模块
@CONVERTERS.register_module()
class Converter1(object):
    def __init__(self, a, b):
        self.a = a
        self.b = b
```
使用注册器管理模块的关键步骤是，将实现的模块注册到注册表 `CONVERTERS` 中。通过 `@CONVERTERS.register_module()` 装饰所实现的模块，字符串和类之间的映射就可以由 `CONVERTERS` 构建和维护，如下所示：

通过这种方式，就可以通过 `CONVERTERS` 建立字符串与类之间的映射，如下所示：

```python
'Converter1' -> <class 'Converter1'>
```
```{note}
只有模块所在的文件被导入时，注册机制才会被触发，所以您需要在某处导入该文件。更多详情请查看 https://github.com/open-mmlab/mmdetection/issues/5974。
```
如果模块被成功注册了，你可以通过配置文件使用这个转换器（converter），如下所示：

```python
converter_cfg = dict(type='Converter1', a=a_value, b=b_value)
converter = CONVERTERS.build(converter_cfg)
```

### 自定义构建函数

假设我们想自定义 `converters` 的构建流程，我们可以实现一个自定义的 `build_func` （构建函数）并将其传递到注册器中。

```python
from mmcv.utils import Registry

# 创建一个构建函数
def build_converter(cfg, registry, *args, **kwargs):
    cfg_ = cfg.copy()
    converter_type = cfg_.pop('type')
    if converter_type not in registry:
        raise KeyError(f'Unrecognized converter type {converter_type}')
    else:
        converter_cls = registry.get(converter_type)

    converter = converter_cls(*args, **kwargs, **cfg_)
    return converter

# 创建一个用于转换器（converters）的注册器，并传递（registry）``build_converter`` 函数
CONVERTERS = Registry('converter', build_func=build_converter)
```

```{note}
注：在这个例子中，我们演示了如何使用参数：`build_func` 自定义构建类的实例的方法。
该功能类似于默认的`build_from_cfg`。在大多数情况下，默认就足够了。
```

`build_model_from_cfg`也实现了在`nn.Sequentail`中构建PyTorch模块，你可以直接使用它们。

### 注册器层结构

你也可以从多个 OpenMMLab 开源框架中构建模块，例如，你可以把所有 [MMClassification](https://github.com/open-mmlab/mmclassification) 中的主干网络（backbone）用到 [MMDetection](https://github.com/open-mmlab/mmdetection) 的目标检测中，你也可以融合 [MMDetection](https://github.com/open-mmlab/mmdetection) 中的目标检测模型 和 [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) 语义分割模型。

下游代码库中所有 `MODELS` 注册器都是MMCV `MODELS` 注册器的子注册器。基本上，使用以下两种方法从子注册器或相邻兄弟注册器构建模块。

1. 从子注册器中构建

   例如：

   我们在 MMDetection 中定义：

   ```python
   from mmcv.utils import Registry
   from mmcv.cnn import MODELS as MMCV_MODELS
   MODELS = Registry('model', parent=MMCV_MODELS)

   @MODELS.register_module()
   class NetA(nn.Module):
       def forward(self, x):
           return x
   ```

   我们在 MMClassification 中定义：

   ```python
   from mmcv.utils import Registry
   from mmcv.cnn import MODELS as MMCV_MODELS
   MODELS = Registry('model', parent=MMCV_MODELS)

   @MODELS.register_module()
   class NetB(nn.Module):
       def forward(self, x):
           return x + 1
   ```

   我们可以通过以下代码在 MMDetection 或 MMClassification 中构建两个网络：

   ```python
   from mmdet.models import MODELS
   net_a = MODELS.build(cfg=dict(type='NetA'))
   net_b = MODELS.build(cfg=dict(type='mmcls.NetB'))
   ```

   或

   ```python
   from mmcls.models import MODELS
   net_a = MODELS.build(cfg=dict(type='mmdet.NetA'))
   net_b = MODELS.build(cfg=dict(type='NetB'))
   ```

2. 从父注册器中构建

   MMCV中的共享`MODELS`注册器是所有下游代码库的父注册器（根注册器）：

   ```python
   from mmcv.cnn import MODELS as MMCV_MODELS
   net_a = MMCV_MODELS.build(cfg=dict(type='mmdet.NetA'))
   net_b = MMCV_MODELS.build(cfg=dict(type='mmcls.NetB'))
   ```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/runner.md
================================================
## 执行器

执行器模块负责模型训练过程调度，主要目的是让用户使用更少的代码以及灵活可配置方式开启训练。其具备如下核心特性:

- 支持以 `EpochBasedRunner` 和 `IterBasedRunner` 为单位的迭代模式以满足不同场景
- 支持定制工作流以满足训练过程中各状态自由切换，目前支持训练和验证两个工作流。工作流可以简单理解为一个完成的训练和验证迭代过程。
- 配合各类默认和自定义 Hook，对外提供了灵活扩展能力

### EpochBasedRunner

顾名思义，`EpochBasedRunner` 是指以 epoch 为周期的工作流，例如设置 workflow = [('train', 2), ('val', 1)] 表示循环迭代地训练 2 个 epoch，然后验证 1 个 epoch。MMDetection 目标检测框架默认采用的是 `EpochBasedRunner`。

其抽象逻辑如下所示：

```python
# 训练终止条件
while curr_epoch < max_epochs:
    # 遍历用户设置的工作流，例如 workflow = [('train', 2)，('val', 1)]
    for i, flow in enumerate(workflow):
        # mode 是工作流函数，例如 train, epochs 是迭代次数
        mode, epochs = flow
        # 要么调用 self.train()，要么调用 self.val()
        epoch_runner = getattr(self, mode)
        # 运行对应工作流函数
        for _ in range(epochs):
            epoch_runner(data_loaders[i], **kwargs)
```
目前支持训练和验证两个工作流，以训练函数为例，其抽象逻辑是：

```python
# epoch_runner 目前可以是 train 或者 val
def train(self, data_loader, **kwargs):
    # 遍历 dataset，共返回一个 epoch 的 batch 数据
    for i, data_batch in enumerate(data_loader):
        self.call_hook('before_train_iter')
        # 验证时候 train_mode=False
        self.run_iter(data_batch, train_mode=True, **kwargs)
        self.call_hook('after_train_iter')
   self.call_hook('after_train_epoch')
```

### IterBasedRunner
不同于 `EpochBasedRunner`，`IterBasedRunner` 是指以 iter 为周期的工作流，例如设置 workflow = [('train', 2)， ('val', 1)] 表示循环迭代的训练 2 个 iter，然后验证 1 个 iter，MMSegmentation 语义分割框架默认采用的是  `IterBasedRunner`。

其抽象逻辑如下所示：

```python
# 虽然是 iter 单位，但是某些场合需要 epoch 信息，由 IterLoader 提供
iter_loaders = [IterLoader(x) for x in data_loaders]
# 训练终止条件
while curr_iter < max_iters:
    # 遍历用户设置的工作流，例如 workflow = [('train', 2)， ('val', 1)]
    for i, flow in enumerate(workflow):
        # mode 是工作流函数，例如 train, iters 是迭代次数
        mode, iters = flow
        # 要么调用 self.train()，要么调用 self.val()
        iter_runner = getattr(self, mode)
        # 运行对应工作流函数
        for _ in range(iters):
            iter_runner(iter_loaders[i], **kwargs)
```
目前支持训练和验证两个工作流，以验证函数为例，其抽象逻辑是：

```python
# iter_runner 目前可以是 train 或者 val
def val(self, data_loader, **kwargs):
    # 获取 batch 数据，用于一次迭代
    data_batch = next(data_loader)
    self.call_hook('before_val_iter')
    outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
    self.outputs = outputs
    self.call_hook('after_val_iter')
```

除了上述基础功能外，`EpochBasedRunner` 和 `IterBasedRunner` 还提供了 resume 、 save_checkpoint 和注册 hook 功能。

### 一个简单例子
以最常用的分类任务为例详细说明 `runner` 的使用方法。 开启任何一个训练任务，都需要包括如下步骤：

**(1) dataloader、model 和优化器等类初始化**

```python
# 模型类初始化
model=...
# 优化器类初始化，典型值 cfg.optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
optimizer = build_optimizer(model, cfg.optimizer)
# 工作流对应的 dataloader 初始化
data_loaders = [
        build_dataloader(
            ds,
            cfg.data.samples_per_gpu,
            cfg.data.workers_per_gpu,
            ...) for ds in dataset
    ]
```

**(2) runner 类初始化**

```python
runner = build_runner(
    # cfg.runner 典型配置为
    # runner = dict(type='EpochBasedRunner', max_epochs=200)
    cfg.runner,
    default_args=dict(
        model=model,
        batch_processor=None,
        optimizer=optimizer,
        logger=logger))
```

**(3) 注册默认训练所必须的 hook，和用户自定义 hook**

```python
# 注册定制必需的 hook
runner.register_training_hooks(
    # lr相关配置，典型为
    # lr_config = dict(policy='step', step=[100, 150])
    cfg.lr_config,
    # 优化相关配置，例如 grad_clip 等
    optimizer_config,
    # 权重保存相关配置，典型为
    # checkpoint_config = dict(interval=1)，每个单位都保存权重
    cfg.checkpoint_config,
    # 日志相关配置
    cfg.log_config,
    ...)

# 注册用户自定义 hook
# 例如想使用 ema 功能，则可以设置 custom_hooks=[dict(type='EMAHook')]
if cfg.get('custom_hooks', None):
    custom_hooks = cfg.custom_hooks
    for hook_cfg in cfg.custom_hooks:
        hook_cfg = hook_cfg.copy()
        priority = hook_cfg.pop('priority', 'NORMAL')
        hook = build_from_cfg(hook_cfg, HOOKS)
        runner.register_hook(hook, priority=priority)
```

然后可以进行 resume 或者 load_checkpoint 对权重进行加载。

**(4) 开启训练流**

```python
# workflow 典型为 workflow = [('train', 1)]
# 此时就真正开启了训练
runner.run(data_loaders, cfg.workflow)
```

关于 workflow 设置，以 `EpochBasedRunner` 为例，详情如下：

- 假设只想运行训练工作流，则可以设置 workflow = [('train', 1)]，表示只进行迭代训练
- 假设想运行训练和验证工作流，则可以设置 workflow = [('train',  3), ('val', 1)]，表示先训练 3 个 epoch ，然后切换到 val 工作流，运行 1 个 epoch，然后循环，直到训练 epoch 次数达到指定值
- 工作流设置还自由定制，例如你可以先验证再训练 workflow = [('val', 1), ('train', 1)]

上述代码都已经封装到了各个代码库的 train.py 中，用户只需要设置相应的配置即可，上述流程会自动运行。


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/utils.md
================================================
## 辅助函数

### 进度条

如果你想跟踪函数批处理任务的进度，可以使用 `track_progress` 。它能以进度条的形式展示任务的完成情况以及剩余任务所需的时间（内部实现为for循环）。

```python
import mmcv

def func(item):
    # 执行相关操作
    pass

tasks = [item_1, item_2, ..., item_n]

mmcv.track_progress(func, tasks)
```

效果如下
![progress](../../en/_static/progress.*)

如果你想可视化多进程任务的进度，你可以使用 `track_parallel_progress` 。

```python
mmcv.track_parallel_progress(func, tasks, 8)  # 8 workers
```

![progress](../../_static/parallel_progress.*)

如果你想要迭代或枚举数据列表并可视化进度,你可以使用 `track_iter_progress` 。

```python
import mmcv

tasks = [item_1, item_2, ..., item_n]

for task in mmcv.track_iter_progress(tasks):
    # do something like print
    print(task)

for i, task in enumerate(mmcv.track_iter_progress(tasks)):
    # do something like print
    print(i)
    print(task)
```

### 计时器

mmcv提供的 `Timer` 可以很方便地计算代码块的执行时间。

```python
import time

with mmcv.Timer():
    # simulate some code block
    time.sleep(1)
```

你也可以使用 `since_start()` 和 `since_last_check()` 。前者返回计时器启动后的运行时长，后者返回最近一次查看计时器后的运行时长。


```python
timer = mmcv.Timer()
# code block 1 here
print(timer.since_start())
# code block 2 here
print(timer.since_last_check())
print(timer.since_start())
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/docs/zh_cn/understand_mmcv/visualization.md
================================================
## 可视化

`mmcv` 可以展示图像以及标注（目前只支持标注框）

```python
# 展示图像文件
mmcv.imshow('a.jpg')

# 展示已加载的图像
img = np.random.rand(100, 100, 3)
mmcv.imshow(img)

# 展示带有标注框的图像
img = np.random.rand(100, 100, 3)
bboxes = np.array([[0, 0, 50, 50], [20, 20, 60, 60]])
mmcv.imshow_bboxes(img, bboxes)
```

`mmcv` 也可以展示特殊的图像，例如光流

```python
flow = mmcv.flowread('test.flo')
mmcv.flowshow(flow)
```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/examples/train.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10

from mmcv.parallel import MMDataParallel
from mmcv.runner import EpochBasedRunner
from mmcv.utils import get_logger


class Model(nn.Module):

    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def train_step(self, data, optimizer):
        images, labels = data
        predicts = self(images)  # -> self.__call__() -> self.forward()
        loss = self.loss_fn(predicts, labels)
        return {'loss': loss}


if __name__ == '__main__':
    model = Model()
    if torch.cuda.is_available():
        # only use gpu:0 to train
        # Solved issue https://github.com/open-mmlab/mmcv/issues/1470
        model = MMDataParallel(model.cuda(), device_ids=[0])

    # dataset and dataloader
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    trainset = CIFAR10(
        root='data', train=True, download=True, transform=transform)
    trainloader = DataLoader(
        trainset, batch_size=128, shuffle=True, num_workers=2)

    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    logger = get_logger('mmcv')
    # runner is a scheduler to manage the training
    runner = EpochBasedRunner(
        model,
        optimizer=optimizer,
        work_dir='./work_dir',
        logger=logger,
        max_epochs=4)

    # learning rate scheduler config
    lr_config = dict(policy='step', step=[2, 3])
    # configuration of optimizer
    optimizer_config = dict(grad_clip=None)
    # configuration of saving checkpoints periodically
    checkpoint_config = dict(interval=1)
    # save log periodically and multiple hooks can be used simultaneously
    log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')])
    # register hooks to runner and those hooks will be invoked automatically
    runner.register_training_hooks(
        lr_config=lr_config,
        optimizer_config=optimizer_config,
        checkpoint_config=checkpoint_config,
        log_config=log_config)

    runner.run([trainloader], [('train', 1)])


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
# flake8: noqa
from .arraymisc import *
from .fileio import *
from .image import *
from .utils import *
from .version import *
from .video import *
from .visualization import *

# The following modules are not imported to this level, so mmcv may be used
# without PyTorch.
# - runner
# - parallel
# - op


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/arraymisc/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .quantization import dequantize, quantize

__all__ = ['quantize', 'dequantize']


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/arraymisc/quantization.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np


def quantize(arr, min_val, max_val, levels, dtype=np.int64):
    """Quantize an array of (-inf, inf) to [0, levels-1].

    Args:
        arr (ndarray): Input array.
        min_val (scalar): Minimum value to be clipped.
        max_val (scalar): Maximum value to be clipped.
        levels (int): Quantization levels.
        dtype (np.type): The type of the quantized array.

    Returns:
        tuple: Quantized array.
    """
    if not (isinstance(levels, int) and levels > 1):
        raise ValueError(
            f'levels must be a positive integer, but got {levels}')
    if min_val >= max_val:
        raise ValueError(
            f'min_val ({min_val}) must be smaller than max_val ({max_val})')

    arr = np.clip(arr, min_val, max_val) - min_val
    quantized_arr = np.minimum(
        np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1)

    return quantized_arr


def dequantize(arr, min_val, max_val, levels, dtype=np.float64):
    """Dequantize an array.

    Args:
        arr (ndarray): Input array.
        min_val (scalar): Minimum value to be clipped.
        max_val (scalar): Maximum value to be clipped.
        levels (int): Quantization levels.
        dtype (np.type): The type of the dequantized array.

    Returns:
        tuple: Dequantized array.
    """
    if not (isinstance(levels, int) and levels > 1):
        raise ValueError(
            f'levels must be a positive integer, but got {levels}')
    if min_val >= max_val:
        raise ValueError(
            f'min_val ({min_val}) must be smaller than max_val ({max_val})')

    dequantized_arr = (arr + 0.5).astype(dtype) * (max_val -
                                                   min_val) / levels + min_val

    return dequantized_arr


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .alexnet import AlexNet
# yapf: disable
from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS,
                     PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS,
                     ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule,
                     ConvTranspose2d, ConvTranspose3d, ConvWS2d,
                     DepthwiseSeparableConvModule, GeneralizedAttention,
                     HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d,
                     NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish,
                     build_activation_layer, build_conv_layer,
                     build_norm_layer, build_padding_layer, build_plugin_layer,
                     build_upsample_layer, conv_ws_2d, is_norm)
from .builder import MODELS, build_model_from_cfg
# yapf: enable
from .resnet import ResNet, make_res_layer
from .utils import (INITIALIZERS, Caffe2XavierInit, ConstantInit, KaimingInit,
                    NormalInit, PretrainedInit, TruncNormalInit, UniformInit,
                    XavierInit, bias_init_with_prob, caffe2_xavier_init,
                    constant_init, fuse_conv_bn, get_model_complexity_info,
                    initialize, kaiming_init, normal_init, trunc_normal_init,
                    uniform_init, xavier_init)
from .vgg import VGG, make_vgg_layer

__all__ = [
    'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer',
    'constant_init', 'xavier_init', 'normal_init', 'trunc_normal_init',
    'uniform_init', 'kaiming_init', 'caffe2_xavier_init',
    'bias_init_with_prob', 'ConvModule', 'build_activation_layer',
    'build_conv_layer', 'build_norm_layer', 'build_padding_layer',
    'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'NonLocal1d',
    'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'HSigmoid', 'Swish', 'HSwish',
    'GeneralizedAttention', 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS',
    'PADDING_LAYERS', 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale',
    'get_model_complexity_info', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d',
    'fuse_conv_bn', 'DepthwiseSeparableConvModule', 'Linear', 'Conv2d',
    'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d',
    'initialize', 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit',
    'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit',
    'Caffe2XavierInit', 'MODELS', 'build_model_from_cfg'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/alexnet.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import logging

import torch.nn as nn


class AlexNet(nn.Module):
    """AlexNet backbone.

    Args:
        num_classes (int): number of classes for classification.
    """

    def __init__(self, num_classes=-1):
        super(AlexNet, self).__init__()
        self.num_classes = num_classes
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        if self.num_classes > 0:
            self.classifier = nn.Sequential(
                nn.Dropout(),
                nn.Linear(256 * 6 * 6, 4096),
                nn.ReLU(inplace=True),
                nn.Dropout(),
                nn.Linear(4096, 4096),
                nn.ReLU(inplace=True),
                nn.Linear(4096, num_classes),
            )

    def init_weights(self, pretrained=None):
        if isinstance(pretrained, str):
            logger = logging.getLogger()
            from ..runner import load_checkpoint
            load_checkpoint(self, pretrained, strict=False, logger=logger)
        elif pretrained is None:
            # use default initializer
            pass
        else:
            raise TypeError('pretrained must be a str or None')

    def forward(self, x):

        x = self.features(x)
        if self.num_classes > 0:
            x = x.view(x.size(0), 256 * 6 * 6)
            x = self.classifier(x)

        return x


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .activation import build_activation_layer
from .context_block import ContextBlock
from .conv import build_conv_layer
from .conv2d_adaptive_padding import Conv2dAdaptivePadding
from .conv_module import ConvModule
from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d
from .depthwise_separable_conv_module import DepthwiseSeparableConvModule
from .drop import Dropout, DropPath
from .generalized_attention import GeneralizedAttention
from .hsigmoid import HSigmoid
from .hswish import HSwish
from .non_local import NonLocal1d, NonLocal2d, NonLocal3d
from .norm import build_norm_layer, is_norm
from .padding import build_padding_layer
from .plugin import build_plugin_layer
from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS,
                       PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS)
from .scale import Scale
from .swish import Swish
from .upsample import build_upsample_layer
from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d,
                       Linear, MaxPool2d, MaxPool3d)

__all__ = [
    'ConvModule', 'build_activation_layer', 'build_conv_layer',
    'build_norm_layer', 'build_padding_layer', 'build_upsample_layer',
    'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d',
    'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention',
    'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS',
    'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d',
    'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear',
    'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d',
    'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'Dropout', 'DropPath'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/activation.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F

from mmcv.utils import TORCH_VERSION, build_from_cfg, digit_version
from .registry import ACTIVATION_LAYERS

for module in [
        nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU,
        nn.Sigmoid, nn.Tanh
]:
    ACTIVATION_LAYERS.register_module(module=module)


@ACTIVATION_LAYERS.register_module(name='Clip')
@ACTIVATION_LAYERS.register_module()
class Clamp(nn.Module):
    """Clamp activation layer.

    This activation function is to clamp the feature map value within
    :math:`[min, max]`. More details can be found in ``torch.clamp()``.

    Args:
        min (Number | optional): Lower-bound of the range to be clamped to.
            Default to -1.
        max (Number | optional): Upper-bound of the range to be clamped to.
            Default to 1.
    """

    def __init__(self, min=-1., max=1.):
        super(Clamp, self).__init__()
        self.min = min
        self.max = max

    def forward(self, x):
        """Forward function.

        Args:
            x (torch.Tensor): The input tensor.

        Returns:
            torch.Tensor: Clamped tensor.
        """
        return torch.clamp(x, min=self.min, max=self.max)


class GELU(nn.Module):
    r"""Applies the Gaussian Error Linear Units function:

    .. math::
        \text{GELU}(x) = x * \Phi(x)
    where :math:`\Phi(x)` is the Cumulative Distribution Function for
    Gaussian Distribution.

    Shape:
        - Input: :math:`(N, *)` where `*` means, any number of additional
          dimensions
        - Output: :math:`(N, *)`, same shape as the input

    .. image:: scripts/activation_images/GELU.png

    Examples::

        >>> m = nn.GELU()
        >>> input = torch.randn(2)
        >>> output = m(input)
    """

    def forward(self, input):
        return F.gelu(input)


if (TORCH_VERSION == 'parrots'
        or digit_version(TORCH_VERSION) < digit_version('1.4')):
    ACTIVATION_LAYERS.register_module(module=GELU)
else:
    ACTIVATION_LAYERS.register_module(module=nn.GELU)


def build_activation_layer(cfg):
    """Build activation layer.

    Args:
        cfg (dict): The activation layer config, which should contain:

            - type (str): Layer type.
            - layer args: Args needed to instantiate an activation layer.

    Returns:
        nn.Module: Created activation layer.
    """
    return build_from_cfg(cfg, ACTIVATION_LAYERS)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/context_block.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch import nn

from ..utils import constant_init, kaiming_init
from .registry import PLUGIN_LAYERS


def last_zero_init(m):
    if isinstance(m, nn.Sequential):
        constant_init(m[-1], val=0)
    else:
        constant_init(m, val=0)


@PLUGIN_LAYERS.register_module()
class ContextBlock(nn.Module):
    """ContextBlock module in GCNet.

    See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond'
    (https://arxiv.org/abs/1904.11492) for details.

    Args:
        in_channels (int): Channels of the input feature map.
        ratio (float): Ratio of channels of transform bottleneck
        pooling_type (str): Pooling method for context modeling.
            Options are 'att' and 'avg', stand for attention pooling and
            average pooling respectively. Default: 'att'.
        fusion_types (Sequence[str]): Fusion method for feature fusion,
            Options are 'channels_add', 'channel_mul', stand for channelwise
            addition and multiplication respectively. Default: ('channel_add',)
    """

    _abbr_ = 'context_block'

    def __init__(self,
                 in_channels,
                 ratio,
                 pooling_type='att',
                 fusion_types=('channel_add', )):
        super(ContextBlock, self).__init__()
        assert pooling_type in ['avg', 'att']
        assert isinstance(fusion_types, (list, tuple))
        valid_fusion_types = ['channel_add', 'channel_mul']
        assert all([f in valid_fusion_types for f in fusion_types])
        assert len(fusion_types) > 0, 'at least one fusion should be used'
        self.in_channels = in_channels
        self.ratio = ratio
        self.planes = int(in_channels * ratio)
        self.pooling_type = pooling_type
        self.fusion_types = fusion_types
        if pooling_type == 'att':
            self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1)
            self.softmax = nn.Softmax(dim=2)
        else:
            self.avg_pool = nn.AdaptiveAvgPool2d(1)
        if 'channel_add' in fusion_types:
            self.channel_add_conv = nn.Sequential(
                nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
                nn.LayerNorm([self.planes, 1, 1]),
                nn.ReLU(inplace=True),  # yapf: disable
                nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
        else:
            self.channel_add_conv = None
        if 'channel_mul' in fusion_types:
            self.channel_mul_conv = nn.Sequential(
                nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
                nn.LayerNorm([self.planes, 1, 1]),
                nn.ReLU(inplace=True),  # yapf: disable
                nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
        else:
            self.channel_mul_conv = None
        self.reset_parameters()

    def reset_parameters(self):
        if self.pooling_type == 'att':
            kaiming_init(self.conv_mask, mode='fan_in')
            self.conv_mask.inited = True

        if self.channel_add_conv is not None:
            last_zero_init(self.channel_add_conv)
        if self.channel_mul_conv is not None:
            last_zero_init(self.channel_mul_conv)

    def spatial_pool(self, x):
        batch, channel, height, width = x.size()
        if self.pooling_type == 'att':
            input_x = x
            # [N, C, H * W]
            input_x = input_x.view(batch, channel, height * width)
            # [N, 1, C, H * W]
            input_x = input_x.unsqueeze(1)
            # [N, 1, H, W]
            context_mask = self.conv_mask(x)
            # [N, 1, H * W]
            context_mask = context_mask.view(batch, 1, height * width)
            # [N, 1, H * W]
            context_mask = self.softmax(context_mask)
            # [N, 1, H * W, 1]
            context_mask = context_mask.unsqueeze(-1)
            # [N, 1, C, 1]
            context = torch.matmul(input_x, context_mask)
            # [N, C, 1, 1]
            context = context.view(batch, channel, 1, 1)
        else:
            # [N, C, 1, 1]
            context = self.avg_pool(x)

        return context

    def forward(self, x):
        # [N, C, 1, 1]
        context = self.spatial_pool(x)

        out = x
        if self.channel_mul_conv is not None:
            # [N, C, 1, 1]
            channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
            out = out * channel_mul_term
        if self.channel_add_conv is not None:
            # [N, C, 1, 1]
            channel_add_term = self.channel_add_conv(context)
            out = out + channel_add_term

        return out


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/conv.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from torch import nn

from .registry import CONV_LAYERS

CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d)
CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d)
CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d)
CONV_LAYERS.register_module('Conv', module=nn.Conv2d)


def build_conv_layer(cfg, *args, **kwargs):
    """Build convolution layer.

    Args:
        cfg (None or dict): The conv layer config, which should contain:
            - type (str): Layer type.
            - layer args: Args needed to instantiate an conv layer.
        args (argument list): Arguments passed to the `__init__`
            method of the corresponding conv layer.
        kwargs (keyword arguments): Keyword arguments passed to the `__init__`
            method of the corresponding conv layer.

    Returns:
        nn.Module: Created conv layer.
    """
    if cfg is None:
        cfg_ = dict(type='Conv2d')
    else:
        if not isinstance(cfg, dict):
            raise TypeError('cfg must be a dict')
        if 'type' not in cfg:
            raise KeyError('the cfg dict must contain the key "type"')
        cfg_ = cfg.copy()

    layer_type = cfg_.pop('type')
    if layer_type not in CONV_LAYERS:
        raise KeyError(f'Unrecognized norm type {layer_type}')
    else:
        conv_layer = CONV_LAYERS.get(layer_type)

    layer = conv_layer(*args, **kwargs, **cfg_)

    return layer


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/conv2d_adaptive_padding.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import math

from torch import nn
from torch.nn import functional as F

from .registry import CONV_LAYERS


@CONV_LAYERS.register_module()
class Conv2dAdaptivePadding(nn.Conv2d):
    """Implementation of 2D convolution in tensorflow with `padding` as "same",
    which applies padding to input (if needed) so that input image gets fully
    covered by filter and stride you specified. For stride 1, this will ensure
    that output image size is same as input. For stride of 2, output dimensions
    will be half, for example.

    Args:
        in_channels (int): Number of channels in the input image
        out_channels (int): Number of channels produced by the convolution
        kernel_size (int or tuple): Size of the convolving kernel
        stride (int or tuple, optional): Stride of the convolution. Default: 1
        padding (int or tuple, optional): Zero-padding added to both sides of
            the input. Default: 0
        dilation (int or tuple, optional): Spacing between kernel elements.
            Default: 1
        groups (int, optional): Number of blocked connections from input
            channels to output channels. Default: 1
        bias (bool, optional): If ``True``, adds a learnable bias to the
            output. Default: ``True``
    """

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True):
        super().__init__(in_channels, out_channels, kernel_size, stride, 0,
                         dilation, groups, bias)

    def forward(self, x):
        img_h, img_w = x.size()[-2:]
        kernel_h, kernel_w = self.weight.size()[-2:]
        stride_h, stride_w = self.stride
        output_h = math.ceil(img_h / stride_h)
        output_w = math.ceil(img_w / stride_w)
        pad_h = (
            max((output_h - 1) * self.stride[0] +
                (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0))
        pad_w = (
            max((output_w - 1) * self.stride[1] +
                (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0))
        if pad_h > 0 or pad_w > 0:
            x = F.pad(x, [
                pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2
            ])
        return F.conv2d(x, self.weight, self.bias, self.stride, self.padding,
                        self.dilation, self.groups)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/conv_module.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import warnings

import torch.nn as nn

from mmcv.utils import _BatchNorm, _InstanceNorm
from ..utils import constant_init, kaiming_init
from .activation import build_activation_layer
from .conv import build_conv_layer
from .norm import build_norm_layer
from .padding import build_padding_layer
from .registry import PLUGIN_LAYERS


@PLUGIN_LAYERS.register_module()
class ConvModule(nn.Module):
    """A conv block that bundles conv/norm/activation layers.

    This block simplifies the usage of convolution layers, which are commonly
    used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
    It is based upon three build methods: `build_conv_layer()`,
    `build_norm_layer()` and `build_activation_layer()`.

    Besides, we add some additional features in this module.
    1. Automatically set `bias` of the conv layer.
    2. Spectral norm is supported.
    3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only
    supports zero and circular padding, and we add "reflect" padding mode.

    Args:
        in_channels (int): Number of channels in the input feature map.
            Same as that in ``nn._ConvNd``.
        out_channels (int): Number of channels produced by the convolution.
            Same as that in ``nn._ConvNd``.
        kernel_size (int | tuple[int]): Size of the convolving kernel.
            Same as that in ``nn._ConvNd``.
        stride (int | tuple[int]): Stride of the convolution.
            Same as that in ``nn._ConvNd``.
        padding (int | tuple[int]): Zero-padding added to both sides of
            the input. Same as that in ``nn._ConvNd``.
        dilation (int | tuple[int]): Spacing between kernel elements.
            Same as that in ``nn._ConvNd``.
        groups (int): Number of blocked connections from input channels to
            output channels. Same as that in ``nn._ConvNd``.
        bias (bool | str): If specified as `auto`, it will be decided by the
            norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise
            False. Default: "auto".
        conv_cfg (dict): Config dict for convolution layer. Default: None,
            which means using conv2d.
        norm_cfg (dict): Config dict for normalization layer. Default: None.
        act_cfg (dict): Config dict for activation layer.
            Default: dict(type='ReLU').
        inplace (bool): Whether to use inplace mode for activation.
            Default: True.
        with_spectral_norm (bool): Whether use spectral norm in conv module.
            Default: False.
        padding_mode (str): If the `padding_mode` has not been supported by
            current `Conv2d` in PyTorch, we will use our own padding layer
            instead. Currently, we support ['zeros', 'circular'] with official
            implementation and ['reflect'] with our own implementation.
            Default: 'zeros'.
        order (tuple[str]): The order of conv/norm/activation layers. It is a
            sequence of "conv", "norm" and "act". Common examples are
            ("conv", "norm", "act") and ("act", "conv", "norm").
            Default: ('conv', 'norm', 'act').
    """

    _abbr_ = 'conv_block'

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias='auto',
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=dict(type='ReLU'),
                 inplace=True,
                 with_spectral_norm=False,
                 padding_mode='zeros',
                 order=('conv', 'norm', 'act')):
        super(ConvModule, self).__init__()
        assert conv_cfg is None or isinstance(conv_cfg, dict)
        assert norm_cfg is None or isinstance(norm_cfg, dict)
        assert act_cfg is None or isinstance(act_cfg, dict)
        official_padding_mode = ['zeros', 'circular']
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.inplace = inplace
        self.with_spectral_norm = with_spectral_norm
        self.with_explicit_padding = padding_mode not in official_padding_mode
        self.order = order
        assert isinstance(self.order, tuple) and len(self.order) == 3
        assert set(order) == set(['conv', 'norm', 'act'])

        self.with_norm = norm_cfg is not None
        self.with_activation = act_cfg is not None
        # if the conv layer is before a norm layer, bias is unnecessary.
        if bias == 'auto':
            bias = not self.with_norm
        self.with_bias = bias

        if self.with_explicit_padding:
            pad_cfg = dict(type=padding_mode)
            self.padding_layer = build_padding_layer(pad_cfg, padding)

        # reset padding to 0 for conv module
        conv_padding = 0 if self.with_explicit_padding else padding
        # build convolution layer
        self.conv = build_conv_layer(
            conv_cfg,
            in_channels,
            out_channels,
            kernel_size,
            stride=stride,
            padding=conv_padding,
            dilation=dilation,
            groups=groups,
            bias=bias)
        # export the attributes of self.conv to a higher level for convenience
        self.in_channels = self.conv.in_channels
        self.out_channels = self.conv.out_channels
        self.kernel_size = self.conv.kernel_size
        self.stride = self.conv.stride
        self.padding = padding
        self.dilation = self.conv.dilation
        self.transposed = self.conv.transposed
        self.output_padding = self.conv.output_padding
        self.groups = self.conv.groups

        if self.with_spectral_norm:
            self.conv = nn.utils.spectral_norm(self.conv)

        # build normalization layers
        if self.with_norm:
            # norm layer is after conv layer
            if order.index('norm') > order.index('conv'):
                norm_channels = out_channels
            else:
                norm_channels = in_channels
            self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels)
            self.add_module(self.norm_name, norm)
            if self.with_bias:
                if isinstance(norm, (_BatchNorm, _InstanceNorm)):
                    warnings.warn(
                        'Unnecessary conv bias before batch/instance norm')
        else:
            self.norm_name = None

        # build activation layer
        if self.with_activation:
            act_cfg_ = act_cfg.copy()
            # nn.Tanh has no 'inplace' argument
            if act_cfg_['type'] not in [
                    'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish'
            ]:
                act_cfg_.setdefault('inplace', inplace)
            self.activate = build_activation_layer(act_cfg_)

        # Use msra init by default
        self.init_weights()

    @property
    def norm(self):
        if self.norm_name:
            return getattr(self, self.norm_name)
        else:
            return None

    def init_weights(self):
        # 1. It is mainly for customized conv layers with their own
        #    initialization manners by calling their own ``init_weights()``,
        #    and we do not want ConvModule to override the initialization.
        # 2. For customized conv layers without their own initialization
        #    manners (that is, they don't have their own ``init_weights()``)
        #    and PyTorch's conv layers, they will be initialized by
        #    this method with default ``kaiming_init``.
        # Note: For PyTorch's conv layers, they will be overwritten by our
        #    initialization implementation using default ``kaiming_init``.
        if not hasattr(self.conv, 'init_weights'):
            if self.with_activation and self.act_cfg['type'] == 'LeakyReLU':
                nonlinearity = 'leaky_relu'
                a = self.act_cfg.get('negative_slope', 0.01)
            else:
                nonlinearity = 'relu'
                a = 0
            kaiming_init(self.conv, a=a, nonlinearity=nonlinearity)
        if self.with_norm:
            constant_init(self.norm, 1, bias=0)

    def forward(self, x, activate=True, norm=True):
        for layer in self.order:
            if layer == 'conv':
                if self.with_explicit_padding:
                    x = self.padding_layer(x)
                x = self.conv(x)
            elif layer == 'norm' and norm and self.with_norm:
                x = self.norm(x)
            elif layer == 'act' and activate and self.with_activation:
                x = self.activate(x)
        return x


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/conv_ws.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F

from .registry import CONV_LAYERS


def conv_ws_2d(input,
               weight,
               bias=None,
               stride=1,
               padding=0,
               dilation=1,
               groups=1,
               eps=1e-5):
    c_in = weight.size(0)
    weight_flat = weight.view(c_in, -1)
    mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
    std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
    weight = (weight - mean) / (std + eps)
    return F.conv2d(input, weight, bias, stride, padding, dilation, groups)


@CONV_LAYERS.register_module('ConvWS')
class ConvWS2d(nn.Conv2d):

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 eps=1e-5):
        super(ConvWS2d, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            bias=bias)
        self.eps = eps

    def forward(self, x):
        return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
                          self.dilation, self.groups, self.eps)


@CONV_LAYERS.register_module(name='ConvAWS')
class ConvAWS2d(nn.Conv2d):
    """AWS (Adaptive Weight Standardization)

    This is a variant of Weight Standardization
    (https://arxiv.org/pdf/1903.10520.pdf)
    It is used in DetectoRS to avoid NaN
    (https://arxiv.org/pdf/2006.02334.pdf)

    Args:
        in_channels (int): Number of channels in the input image
        out_channels (int): Number of channels produced by the convolution
        kernel_size (int or tuple): Size of the conv kernel
        stride (int or tuple, optional): Stride of the convolution. Default: 1
        padding (int or tuple, optional): Zero-padding added to both sides of
            the input. Default: 0
        dilation (int or tuple, optional): Spacing between kernel elements.
            Default: 1
        groups (int, optional): Number of blocked connections from input
            channels to output channels. Default: 1
        bias (bool, optional): If set True, adds a learnable bias to the
            output. Default: True
    """

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True):
        super().__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            bias=bias)
        self.register_buffer('weight_gamma',
                             torch.ones(self.out_channels, 1, 1, 1))
        self.register_buffer('weight_beta',
                             torch.zeros(self.out_channels, 1, 1, 1))

    def _get_weight(self, weight):
        weight_flat = weight.view(weight.size(0), -1)
        mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1)
        std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1)
        weight = (weight - mean) / std
        weight = self.weight_gamma * weight + self.weight_beta
        return weight

    def forward(self, x):
        weight = self._get_weight(self.weight)
        return F.conv2d(x, weight, self.bias, self.stride, self.padding,
                        self.dilation, self.groups)

    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
                              missing_keys, unexpected_keys, error_msgs):
        """Override default load function.

        AWS overrides the function _load_from_state_dict to recover
        weight_gamma and weight_beta if they are missing. If weight_gamma and
        weight_beta are found in the checkpoint, this function will return
        after super()._load_from_state_dict. Otherwise, it will compute the
        mean and std of the pretrained weights and store them in weight_beta
        and weight_gamma.
        """

        self.weight_gamma.data.fill_(-1)
        local_missing_keys = []
        super()._load_from_state_dict(state_dict, prefix, local_metadata,
                                      strict, local_missing_keys,
                                      unexpected_keys, error_msgs)
        if self.weight_gamma.data.mean() > 0:
            for k in local_missing_keys:
                missing_keys.append(k)
            return
        weight = self.weight.data
        weight_flat = weight.view(weight.size(0), -1)
        mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1)
        std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1)
        self.weight_beta.data.copy_(mean)
        self.weight_gamma.data.copy_(std)
        missing_gamma_beta = [
            k for k in local_missing_keys
            if k.endswith('weight_gamma') or k.endswith('weight_beta')
        ]
        for k in missing_gamma_beta:
            local_missing_keys.remove(k)
        for k in local_missing_keys:
            missing_keys.append(k)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/depthwise_separable_conv_module.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn

from .conv_module import ConvModule


class DepthwiseSeparableConvModule(nn.Module):
    """Depthwise separable convolution module.

    See https://arxiv.org/pdf/1704.04861.pdf for details.

    This module can replace a ConvModule with the conv block replaced by two
    conv block: depthwise conv block and pointwise conv block. The depthwise
    conv block contains depthwise-conv/norm/activation layers. The pointwise
    conv block contains pointwise-conv/norm/activation layers. It should be
    noted that there will be norm/activation layer in the depthwise conv block
    if `norm_cfg` and `act_cfg` are specified.

    Args:
        in_channels (int): Number of channels in the input feature map.
            Same as that in ``nn._ConvNd``.
        out_channels (int): Number of channels produced by the convolution.
            Same as that in ``nn._ConvNd``.
        kernel_size (int | tuple[int]): Size of the convolving kernel.
            Same as that in ``nn._ConvNd``.
        stride (int | tuple[int]): Stride of the convolution.
            Same as that in ``nn._ConvNd``. Default: 1.
        padding (int | tuple[int]): Zero-padding added to both sides of
            the input. Same as that in ``nn._ConvNd``. Default: 0.
        dilation (int | tuple[int]): Spacing between kernel elements.
            Same as that in ``nn._ConvNd``. Default: 1.
        norm_cfg (dict): Default norm config for both depthwise ConvModule and
            pointwise ConvModule. Default: None.
        act_cfg (dict): Default activation config for both depthwise ConvModule
            and pointwise ConvModule. Default: dict(type='ReLU').
        dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is
            'default', it will be the same as `norm_cfg`. Default: 'default'.
        dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is
            'default', it will be the same as `act_cfg`. Default: 'default'.
        pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is
            'default', it will be the same as `norm_cfg`. Default: 'default'.
        pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is
            'default', it will be the same as `act_cfg`. Default: 'default'.
        kwargs (optional): Other shared arguments for depthwise and pointwise
            ConvModule. See ConvModule for ref.
    """

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 norm_cfg=None,
                 act_cfg=dict(type='ReLU'),
                 dw_norm_cfg='default',
                 dw_act_cfg='default',
                 pw_norm_cfg='default',
                 pw_act_cfg='default',
                 **kwargs):
        super(DepthwiseSeparableConvModule, self).__init__()
        assert 'groups' not in kwargs, 'groups should not be specified'

        # if norm/activation config of depthwise/pointwise ConvModule is not
        # specified, use default config.
        dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg
        dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg
        pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg
        pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg

        # depthwise convolution
        self.depthwise_conv = ConvModule(
            in_channels,
            in_channels,
            kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=in_channels,
            norm_cfg=dw_norm_cfg,
            act_cfg=dw_act_cfg,
            **kwargs)

        self.pointwise_conv = ConvModule(
            in_channels,
            out_channels,
            1,
            norm_cfg=pw_norm_cfg,
            act_cfg=pw_act_cfg,
            **kwargs)

    def forward(self, x):
        x = self.depthwise_conv(x)
        x = self.pointwise_conv(x)
        return x


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/drop.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn

from mmcv import build_from_cfg
from .registry import DROPOUT_LAYERS


def drop_path(x, drop_prob=0., training=False):
    """Drop paths (Stochastic Depth) per sample (when applied in main path of
    residual blocks).

    We follow the implementation
    https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py  # noqa: E501
    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    # handle tensors with different dimensions, not just 4D tensors.
    shape = (x.shape[0], ) + (1, ) * (x.ndim - 1)
    random_tensor = keep_prob + torch.rand(
        shape, dtype=x.dtype, device=x.device)
    output = x.div(keep_prob) * random_tensor.floor()
    return output


@DROPOUT_LAYERS.register_module()
class DropPath(nn.Module):
    """Drop paths (Stochastic Depth) per sample  (when applied in main path of
    residual blocks).

    We follow the implementation
    https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py  # noqa: E501

    Args:
        drop_prob (float): Probability of the path to be zeroed. Default: 0.1
    """

    def __init__(self, drop_prob=0.1):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)


@DROPOUT_LAYERS.register_module()
class Dropout(nn.Dropout):
    """A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of
    ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with
    ``DropPath``

    Args:
        drop_prob (float): Probability of the elements to be
            zeroed. Default: 0.5.
        inplace (bool):  Do the operation inplace or not. Default: False.
    """

    def __init__(self, drop_prob=0.5, inplace=False):
        super().__init__(p=drop_prob, inplace=inplace)


def build_dropout(cfg, default_args=None):
    """Builder for drop out layers."""
    return build_from_cfg(cfg, DROPOUT_LAYERS, default_args)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/generalized_attention.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import math

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from ..utils import kaiming_init
from .registry import PLUGIN_LAYERS


@PLUGIN_LAYERS.register_module()
class GeneralizedAttention(nn.Module):
    """GeneralizedAttention module.

    See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks'
    (https://arxiv.org/abs/1711.07971) for details.

    Args:
        in_channels (int): Channels of the input feature map.
        spatial_range (int): The spatial range. -1 indicates no spatial range
            constraint. Default: -1.
        num_heads (int): The head number of empirical_attention module.
            Default: 9.
        position_embedding_dim (int): The position embedding dimension.
            Default: -1.
        position_magnitude (int): A multiplier acting on coord difference.
            Default: 1.
        kv_stride (int): The feature stride acting on key/value feature map.
            Default: 2.
        q_stride (int): The feature stride acting on query feature map.
            Default: 1.
        attention_type (str): A binary indicator string for indicating which
            items in generalized empirical_attention module are used.
            Default: '1111'.

            - '1000' indicates 'query and key content' (appr - appr) item,
            - '0100' indicates 'query content and relative position'
              (appr - position) item,
            - '0010' indicates 'key content only' (bias - appr) item,
            - '0001' indicates 'relative position only' (bias - position) item.
    """

    _abbr_ = 'gen_attention_block'

    def __init__(self,
                 in_channels,
                 spatial_range=-1,
                 num_heads=9,
                 position_embedding_dim=-1,
                 position_magnitude=1,
                 kv_stride=2,
                 q_stride=1,
                 attention_type='1111'):

        super(GeneralizedAttention, self).__init__()

        # hard range means local range for non-local operation
        self.position_embedding_dim = (
            position_embedding_dim
            if position_embedding_dim > 0 else in_channels)

        self.position_magnitude = position_magnitude
        self.num_heads = num_heads
        self.in_channels = in_channels
        self.spatial_range = spatial_range
        self.kv_stride = kv_stride
        self.q_stride = q_stride
        self.attention_type = [bool(int(_)) for _ in attention_type]
        self.qk_embed_dim = in_channels // num_heads
        out_c = self.qk_embed_dim * num_heads

        if self.attention_type[0] or self.attention_type[1]:
            self.query_conv = nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_c,
                kernel_size=1,
                bias=False)
            self.query_conv.kaiming_init = True

        if self.attention_type[0] or self.attention_type[2]:
            self.key_conv = nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_c,
                kernel_size=1,
                bias=False)
            self.key_conv.kaiming_init = True

        self.v_dim = in_channels // num_heads
        self.value_conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=self.v_dim * num_heads,
            kernel_size=1,
            bias=False)
        self.value_conv.kaiming_init = True

        if self.attention_type[1] or self.attention_type[3]:
            self.appr_geom_fc_x = nn.Linear(
                self.position_embedding_dim // 2, out_c, bias=False)
            self.appr_geom_fc_x.kaiming_init = True

            self.appr_geom_fc_y = nn.Linear(
                self.position_embedding_dim // 2, out_c, bias=False)
            self.appr_geom_fc_y.kaiming_init = True

        if self.attention_type[2]:
            stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
            appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv
            self.appr_bias = nn.Parameter(appr_bias_value)

        if self.attention_type[3]:
            stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
            geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv
            self.geom_bias = nn.Parameter(geom_bias_value)

        self.proj_conv = nn.Conv2d(
            in_channels=self.v_dim * num_heads,
            out_channels=in_channels,
            kernel_size=1,
            bias=True)
        self.proj_conv.kaiming_init = True
        self.gamma = nn.Parameter(torch.zeros(1))

        if self.spatial_range >= 0:
            # only works when non local is after 3*3 conv
            if in_channels == 256:
                max_len = 84
            elif in_channels == 512:
                max_len = 42

            max_len_kv = int((max_len - 1.0) / self.kv_stride + 1)
            local_constraint_map = np.ones(
                (max_len, max_len, max_len_kv, max_len_kv), dtype=int)
            for iy in range(max_len):
                for ix in range(max_len):
                    local_constraint_map[
                        iy, ix,
                        max((iy - self.spatial_range) //
                            self.kv_stride, 0):min((iy + self.spatial_range +
                                                    1) // self.kv_stride +
                                                   1, max_len),
                        max((ix - self.spatial_range) //
                            self.kv_stride, 0):min((ix + self.spatial_range +
                                                    1) // self.kv_stride +
                                                   1, max_len)] = 0

            self.local_constraint_map = nn.Parameter(
                torch.from_numpy(local_constraint_map).byte(),
                requires_grad=False)

        if self.q_stride > 1:
            self.q_downsample = nn.AvgPool2d(
                kernel_size=1, stride=self.q_stride)
        else:
            self.q_downsample = None

        if self.kv_stride > 1:
            self.kv_downsample = nn.AvgPool2d(
                kernel_size=1, stride=self.kv_stride)
        else:
            self.kv_downsample = None

        self.init_weights()

    def get_position_embedding(self,
                               h,
                               w,
                               h_kv,
                               w_kv,
                               q_stride,
                               kv_stride,
                               device,
                               dtype,
                               feat_dim,
                               wave_length=1000):
        # the default type of Tensor is float32, leading to type mismatch
        # in fp16 mode. Cast it to support fp16 mode.
        h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype)
        h_idxs = h_idxs.view((h, 1)) * q_stride

        w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype)
        w_idxs = w_idxs.view((w, 1)) * q_stride

        h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to(
            device=device, dtype=dtype)
        h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride

        w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to(
            device=device, dtype=dtype)
        w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride

        # (h, h_kv, 1)
        h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0)
        h_diff *= self.position_magnitude

        # (w, w_kv, 1)
        w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0)
        w_diff *= self.position_magnitude

        feat_range = torch.arange(0, feat_dim / 4).to(
            device=device, dtype=dtype)

        dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype)
        dim_mat = dim_mat**((4. / feat_dim) * feat_range)
        dim_mat = dim_mat.view((1, 1, -1))

        embedding_x = torch.cat(
            ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2)

        embedding_y = torch.cat(
            ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2)

        return embedding_x, embedding_y

    def forward(self, x_input):
        num_heads = self.num_heads

        # use empirical_attention
        if self.q_downsample is not None:
            x_q = self.q_downsample(x_input)
        else:
            x_q = x_input
        n, _, h, w = x_q.shape

        if self.kv_downsample is not None:
            x_kv = self.kv_downsample(x_input)
        else:
            x_kv = x_input
        _, _, h_kv, w_kv = x_kv.shape

        if self.attention_type[0] or self.attention_type[1]:
            proj_query = self.query_conv(x_q).view(
                (n, num_heads, self.qk_embed_dim, h * w))
            proj_query = proj_query.permute(0, 1, 3, 2)

        if self.attention_type[0] or self.attention_type[2]:
            proj_key = self.key_conv(x_kv).view(
                (n, num_heads, self.qk_embed_dim, h_kv * w_kv))

        if self.attention_type[1] or self.attention_type[3]:
            position_embed_x, position_embed_y = self.get_position_embedding(
                h, w, h_kv, w_kv, self.q_stride, self.kv_stride,
                x_input.device, x_input.dtype, self.position_embedding_dim)
            # (n, num_heads, w, w_kv, dim)
            position_feat_x = self.appr_geom_fc_x(position_embed_x).\
                view(1, w, w_kv, num_heads, self.qk_embed_dim).\
                permute(0, 3, 1, 2, 4).\
                repeat(n, 1, 1, 1, 1)

            # (n, num_heads, h, h_kv, dim)
            position_feat_y = self.appr_geom_fc_y(position_embed_y).\
                view(1, h, h_kv, num_heads, self.qk_embed_dim).\
                permute(0, 3, 1, 2, 4).\
                repeat(n, 1, 1, 1, 1)

            position_feat_x /= math.sqrt(2)
            position_feat_y /= math.sqrt(2)

        # accelerate for saliency only
        if (np.sum(self.attention_type) == 1) and self.attention_type[2]:
            appr_bias = self.appr_bias.\
                view(1, num_heads, 1, self.qk_embed_dim).\
                repeat(n, 1, 1, 1)

            energy = torch.matmul(appr_bias, proj_key).\
                view(n, num_heads, 1, h_kv * w_kv)

            h = 1
            w = 1
        else:
            # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for
            if not self.attention_type[0]:
                energy = torch.zeros(
                    n,
                    num_heads,
                    h,
                    w,
                    h_kv,
                    w_kv,
                    dtype=x_input.dtype,
                    device=x_input.device)

            # attention_type[0]: appr - appr
            # attention_type[1]: appr - position
            # attention_type[2]: bias - appr
            # attention_type[3]: bias - position
            if self.attention_type[0] or self.attention_type[2]:
                if self.attention_type[0] and self.attention_type[2]:
                    appr_bias = self.appr_bias.\
                        view(1, num_heads, 1, self.qk_embed_dim)
                    energy = torch.matmul(proj_query + appr_bias, proj_key).\
                        view(n, num_heads, h, w, h_kv, w_kv)

                elif self.attention_type[0]:
                    energy = torch.matmul(proj_query, proj_key).\
                        view(n, num_heads, h, w, h_kv, w_kv)

                elif self.attention_type[2]:
                    appr_bias = self.appr_bias.\
                        view(1, num_heads, 1, self.qk_embed_dim).\
                        repeat(n, 1, 1, 1)

                    energy += torch.matmul(appr_bias, proj_key).\
                        view(n, num_heads, 1, 1, h_kv, w_kv)

            if self.attention_type[1] or self.attention_type[3]:
                if self.attention_type[1] and self.attention_type[3]:
                    geom_bias = self.geom_bias.\
                        view(1, num_heads, 1, self.qk_embed_dim)

                    proj_query_reshape = (proj_query + geom_bias).\
                        view(n, num_heads, h, w, self.qk_embed_dim)

                    energy_x = torch.matmul(
                        proj_query_reshape.permute(0, 1, 3, 2, 4),
                        position_feat_x.permute(0, 1, 2, 4, 3))
                    energy_x = energy_x.\
                        permute(0, 1, 3, 2, 4).unsqueeze(4)

                    energy_y = torch.matmul(
                        proj_query_reshape,
                        position_feat_y.permute(0, 1, 2, 4, 3))
                    energy_y = energy_y.unsqueeze(5)

                    energy += energy_x + energy_y

                elif self.attention_type[1]:
                    proj_query_reshape = proj_query.\
                        view(n, num_heads, h, w, self.qk_embed_dim)
                    proj_query_reshape = proj_query_reshape.\
                        permute(0, 1, 3, 2, 4)
                    position_feat_x_reshape = position_feat_x.\
                        permute(0, 1, 2, 4, 3)
                    position_feat_y_reshape = position_feat_y.\
                        permute(0, 1, 2, 4, 3)

                    energy_x = torch.matmul(proj_query_reshape,
                                            position_feat_x_reshape)
                    energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4)

                    energy_y = torch.matmul(proj_query_reshape,
                                            position_feat_y_reshape)
                    energy_y = energy_y.unsqueeze(5)

                    energy += energy_x + energy_y

                elif self.attention_type[3]:
                    geom_bias = self.geom_bias.\
                        view(1, num_heads, self.qk_embed_dim, 1).\
                        repeat(n, 1, 1, 1)

                    position_feat_x_reshape = position_feat_x.\
                        view(n, num_heads, w*w_kv, self.qk_embed_dim)

                    position_feat_y_reshape = position_feat_y.\
                        view(n, num_heads, h * h_kv, self.qk_embed_dim)

                    energy_x = torch.matmul(position_feat_x_reshape, geom_bias)
                    energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv)

                    energy_y = torch.matmul(position_feat_y_reshape, geom_bias)
                    energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1)

                    energy += energy_x + energy_y

            energy = energy.view(n, num_heads, h * w, h_kv * w_kv)

        if self.spatial_range >= 0:
            cur_local_constraint_map = \
                self.local_constraint_map[:h, :w, :h_kv, :w_kv].\
                contiguous().\
                view(1, 1, h*w, h_kv*w_kv)

            energy = energy.masked_fill_(cur_local_constraint_map,
                                         float('-inf'))

        attention = F.softmax(energy, 3)

        proj_value = self.value_conv(x_kv)
        proj_value_reshape = proj_value.\
            view((n, num_heads, self.v_dim, h_kv * w_kv)).\
            permute(0, 1, 3, 2)

        out = torch.matmul(attention, proj_value_reshape).\
            permute(0, 1, 3, 2).\
            contiguous().\
            view(n, self.v_dim * self.num_heads, h, w)

        out = self.proj_conv(out)

        # output is downsampled, upsample back to input size
        if self.q_downsample is not None:
            out = F.interpolate(
                out,
                size=x_input.shape[2:],
                mode='bilinear',
                align_corners=False)

        out = self.gamma * out + x_input
        return out

    def init_weights(self):
        for m in self.modules():
            if hasattr(m, 'kaiming_init') and m.kaiming_init:
                kaiming_init(
                    m,
                    mode='fan_in',
                    nonlinearity='leaky_relu',
                    bias=0,
                    distribution='uniform',
                    a=1)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/hsigmoid.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import warnings

import torch.nn as nn

from .registry import ACTIVATION_LAYERS


@ACTIVATION_LAYERS.register_module()
class HSigmoid(nn.Module):
    """Hard Sigmoid Module. Apply the hard sigmoid function:
    Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value)
    Default: Hsigmoid(x) = min(max((x + 3) / 6, 0), 1)

    Note:
        In MMCV v1.4.4, we modified the default value of args to align with
        PyTorch official.

    Args:
        bias (float): Bias of the input feature map. Default: 3.0.
        divisor (float): Divisor of the input feature map. Default: 6.0.
        min_value (float): Lower bound value. Default: 0.0.
        max_value (float): Upper bound value. Default: 1.0.

    Returns:
        Tensor: The output tensor.
    """

    def __init__(self, bias=3.0, divisor=6.0, min_value=0.0, max_value=1.0):
        super(HSigmoid, self).__init__()
        warnings.warn(
            'In MMCV v1.4.4, we modified the default value of args to align '
            'with PyTorch official. Previous Implementation: '
            'Hsigmoid(x) = min(max((x + 1) / 2, 0), 1). '
            'Current Implementation: '
            'Hsigmoid(x) = min(max((x + 3) / 6, 0), 1).')
        self.bias = bias
        self.divisor = divisor
        assert self.divisor != 0
        self.min_value = min_value
        self.max_value = max_value

    def forward(self, x):
        x = (x + self.bias) / self.divisor

        return x.clamp_(self.min_value, self.max_value)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/hswish.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn

from .registry import ACTIVATION_LAYERS


@ACTIVATION_LAYERS.register_module()
class HSwish(nn.Module):
    """Hard Swish Module.

    This module applies the hard swish function:

    .. math::
        Hswish(x) = x * ReLU6(x + 3) / 6

    Args:
        inplace (bool): can optionally do the operation in-place.
            Default: False.

    Returns:
        Tensor: The output tensor.
    """

    def __init__(self, inplace=False):
        super(HSwish, self).__init__()
        self.act = nn.ReLU6(inplace)

    def forward(self, x):
        return x * self.act(x + 3) / 6


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/non_local.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from abc import ABCMeta

import torch
import torch.nn as nn

from ..utils import constant_init, normal_init
from .conv_module import ConvModule
from .registry import PLUGIN_LAYERS


class _NonLocalNd(nn.Module, metaclass=ABCMeta):
    """Basic Non-local module.

    This module is proposed in
    "Non-local Neural Networks"
    Paper reference: https://arxiv.org/abs/1711.07971
    Code reference: https://github.com/AlexHex7/Non-local_pytorch

    Args:
        in_channels (int): Channels of the input feature map.
        reduction (int): Channel reduction ratio. Default: 2.
        use_scale (bool): Whether to scale pairwise_weight by
            `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`.
            Default: True.
        conv_cfg (None | dict): The config dict for convolution layers.
            If not specified, it will use `nn.Conv2d` for convolution layers.
            Default: None.
        norm_cfg (None | dict): The config dict for normalization layers.
            Default: None. (This parameter is only applicable to conv_out.)
        mode (str): Options are `gaussian`, `concatenation`,
            `embedded_gaussian` and `dot_product`. Default: embedded_gaussian.
    """

    def __init__(self,
                 in_channels,
                 reduction=2,
                 use_scale=True,
                 conv_cfg=None,
                 norm_cfg=None,
                 mode='embedded_gaussian',
                 **kwargs):
        super(_NonLocalNd, self).__init__()
        self.in_channels = in_channels
        self.reduction = reduction
        self.use_scale = use_scale
        self.inter_channels = max(in_channels // reduction, 1)
        self.mode = mode

        if mode not in [
                'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation'
        ]:
            raise ValueError("Mode should be in 'gaussian', 'concatenation', "
                             f"'embedded_gaussian' or 'dot_product', but got "
                             f'{mode} instead.')

        # g, theta, phi are defaulted as `nn.ConvNd`.
        # Here we use ConvModule for potential usage.
        self.g = ConvModule(
            self.in_channels,
            self.inter_channels,
            kernel_size=1,
            conv_cfg=conv_cfg,
            act_cfg=None)
        self.conv_out = ConvModule(
            self.inter_channels,
            self.in_channels,
            kernel_size=1,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=None)

        if self.mode != 'gaussian':
            self.theta = ConvModule(
                self.in_channels,
                self.inter_channels,
                kernel_size=1,
                conv_cfg=conv_cfg,
                act_cfg=None)
            self.phi = ConvModule(
                self.in_channels,
                self.inter_channels,
                kernel_size=1,
                conv_cfg=conv_cfg,
                act_cfg=None)

        if self.mode == 'concatenation':
            self.concat_project = ConvModule(
                self.inter_channels * 2,
                1,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=False,
                act_cfg=dict(type='ReLU'))

        self.init_weights(**kwargs)

    def init_weights(self, std=0.01, zeros_init=True):
        if self.mode != 'gaussian':
            for m in [self.g, self.theta, self.phi]:
                normal_init(m.conv, std=std)
        else:
            normal_init(self.g.conv, std=std)
        if zeros_init:
            if self.conv_out.norm_cfg is None:
                constant_init(self.conv_out.conv, 0)
            else:
                constant_init(self.conv_out.norm, 0)
        else:
            if self.conv_out.norm_cfg is None:
                normal_init(self.conv_out.conv, std=std)
            else:
                normal_init(self.conv_out.norm, std=std)

    def gaussian(self, theta_x, phi_x):
        # NonLocal1d pairwise_weight: [N, H, H]
        # NonLocal2d pairwise_weight: [N, HxW, HxW]
        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
        pairwise_weight = torch.matmul(theta_x, phi_x)
        pairwise_weight = pairwise_weight.softmax(dim=-1)
        return pairwise_weight

    def embedded_gaussian(self, theta_x, phi_x):
        # NonLocal1d pairwise_weight: [N, H, H]
        # NonLocal2d pairwise_weight: [N, HxW, HxW]
        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
        pairwise_weight = torch.matmul(theta_x, phi_x)
        if self.use_scale:
            # theta_x.shape[-1] is `self.inter_channels`
            pairwise_weight /= theta_x.shape[-1]**0.5
        pairwise_weight = pairwise_weight.softmax(dim=-1)
        return pairwise_weight

    def dot_product(self, theta_x, phi_x):
        # NonLocal1d pairwise_weight: [N, H, H]
        # NonLocal2d pairwise_weight: [N, HxW, HxW]
        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
        pairwise_weight = torch.matmul(theta_x, phi_x)
        pairwise_weight /= pairwise_weight.shape[-1]
        return pairwise_weight

    def concatenation(self, theta_x, phi_x):
        # NonLocal1d pairwise_weight: [N, H, H]
        # NonLocal2d pairwise_weight: [N, HxW, HxW]
        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
        h = theta_x.size(2)
        w = phi_x.size(3)
        theta_x = theta_x.repeat(1, 1, 1, w)
        phi_x = phi_x.repeat(1, 1, h, 1)

        concat_feature = torch.cat([theta_x, phi_x], dim=1)
        pairwise_weight = self.concat_project(concat_feature)
        n, _, h, w = pairwise_weight.size()
        pairwise_weight = pairwise_weight.view(n, h, w)
        pairwise_weight /= pairwise_weight.shape[-1]

        return pairwise_weight

    def forward(self, x):
        # Assume `reduction = 1`, then `inter_channels = C`
        # or `inter_channels = C` when `mode="gaussian"`

        # NonLocal1d x: [N, C, H]
        # NonLocal2d x: [N, C, H, W]
        # NonLocal3d x: [N, C, T, H, W]
        n = x.size(0)

        # NonLocal1d g_x: [N, H, C]
        # NonLocal2d g_x: [N, HxW, C]
        # NonLocal3d g_x: [N, TxHxW, C]
        g_x = self.g(x).view(n, self.inter_channels, -1)
        g_x = g_x.permute(0, 2, 1)

        # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H]
        # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW]
        # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW]
        if self.mode == 'gaussian':
            theta_x = x.view(n, self.in_channels, -1)
            theta_x = theta_x.permute(0, 2, 1)
            if self.sub_sample:
                phi_x = self.phi(x).view(n, self.in_channels, -1)
            else:
                phi_x = x.view(n, self.in_channels, -1)
        elif self.mode == 'concatenation':
            theta_x = self.theta(x).view(n, self.inter_channels, -1, 1)
            phi_x = self.phi(x).view(n, self.inter_channels, 1, -1)
        else:
            theta_x = self.theta(x).view(n, self.inter_channels, -1)
            theta_x = theta_x.permute(0, 2, 1)
            phi_x = self.phi(x).view(n, self.inter_channels, -1)

        pairwise_func = getattr(self, self.mode)
        # NonLocal1d pairwise_weight: [N, H, H]
        # NonLocal2d pairwise_weight: [N, HxW, HxW]
        # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
        pairwise_weight = pairwise_func(theta_x, phi_x)

        # NonLocal1d y: [N, H, C]
        # NonLocal2d y: [N, HxW, C]
        # NonLocal3d y: [N, TxHxW, C]
        y = torch.matmul(pairwise_weight, g_x)
        # NonLocal1d y: [N, C, H]
        # NonLocal2d y: [N, C, H, W]
        # NonLocal3d y: [N, C, T, H, W]
        y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels,
                                                    *x.size()[2:])

        output = x + self.conv_out(y)

        return output


class NonLocal1d(_NonLocalNd):
    """1D Non-local module.

    Args:
        in_channels (int): Same as `NonLocalND`.
        sub_sample (bool): Whether to apply max pooling after pairwise
            function (Note that the `sub_sample` is applied on spatial only).
            Default: False.
        conv_cfg (None | dict): Same as `NonLocalND`.
            Default: dict(type='Conv1d').
    """

    def __init__(self,
                 in_channels,
                 sub_sample=False,
                 conv_cfg=dict(type='Conv1d'),
                 **kwargs):
        super(NonLocal1d, self).__init__(
            in_channels, conv_cfg=conv_cfg, **kwargs)

        self.sub_sample = sub_sample

        if sub_sample:
            max_pool_layer = nn.MaxPool1d(kernel_size=2)
            self.g = nn.Sequential(self.g, max_pool_layer)
            if self.mode != 'gaussian':
                self.phi = nn.Sequential(self.phi, max_pool_layer)
            else:
                self.phi = max_pool_layer


@PLUGIN_LAYERS.register_module()
class NonLocal2d(_NonLocalNd):
    """2D Non-local module.

    Args:
        in_channels (int): Same as `NonLocalND`.
        sub_sample (bool): Whether to apply max pooling after pairwise
            function (Note that the `sub_sample` is applied on spatial only).
            Default: False.
        conv_cfg (None | dict): Same as `NonLocalND`.
            Default: dict(type='Conv2d').
    """

    _abbr_ = 'nonlocal_block'

    def __init__(self,
                 in_channels,
                 sub_sample=False,
                 conv_cfg=dict(type='Conv2d'),
                 **kwargs):
        super(NonLocal2d, self).__init__(
            in_channels, conv_cfg=conv_cfg, **kwargs)

        self.sub_sample = sub_sample

        if sub_sample:
            max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
            self.g = nn.Sequential(self.g, max_pool_layer)
            if self.mode != 'gaussian':
                self.phi = nn.Sequential(self.phi, max_pool_layer)
            else:
                self.phi = max_pool_layer


class NonLocal3d(_NonLocalNd):
    """3D Non-local module.

    Args:
        in_channels (int): Same as `NonLocalND`.
        sub_sample (bool): Whether to apply max pooling after pairwise
            function (Note that the `sub_sample` is applied on spatial only).
            Default: False.
        conv_cfg (None | dict): Same as `NonLocalND`.
            Default: dict(type='Conv3d').
    """

    def __init__(self,
                 in_channels,
                 sub_sample=False,
                 conv_cfg=dict(type='Conv3d'),
                 **kwargs):
        super(NonLocal3d, self).__init__(
            in_channels, conv_cfg=conv_cfg, **kwargs)
        self.sub_sample = sub_sample

        if sub_sample:
            max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
            self.g = nn.Sequential(self.g, max_pool_layer)
            if self.mode != 'gaussian':
                self.phi = nn.Sequential(self.phi, max_pool_layer)
            else:
                self.phi = max_pool_layer


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/norm.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import inspect

import torch.nn as nn

from mmcv.utils import is_tuple_of
from mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm
from .registry import NORM_LAYERS

NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d)
NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d)
NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d)
NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d)
NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm)
NORM_LAYERS.register_module('GN', module=nn.GroupNorm)
NORM_LAYERS.register_module('LN', module=nn.LayerNorm)
NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d)
NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d)
NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d)
NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d)


def infer_abbr(class_type):
    """Infer abbreviation from the class name.

    When we build a norm layer with `build_norm_layer()`, we want to preserve
    the norm type in variable names, e.g, self.bn1, self.gn. This method will
    infer the abbreviation to map class types to abbreviations.

    Rule 1: If the class has the property "_abbr_", return the property.
    Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or
    InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and
    "in" respectively.
    Rule 3: If the class name contains "batch", "group", "layer" or "instance",
    the abbreviation of this layer will be "bn", "gn", "ln" and "in"
    respectively.
    Rule 4: Otherwise, the abbreviation falls back to "norm".

    Args:
        class_type (type): The norm layer type.

    Returns:
        str: The inferred abbreviation.
    """
    if not inspect.isclass(class_type):
        raise TypeError(
            f'class_type must be a type, but got {type(class_type)}')
    if hasattr(class_type, '_abbr_'):
        return class_type._abbr_
    if issubclass(class_type, _InstanceNorm):  # IN is a subclass of BN
        return 'in'
    elif issubclass(class_type, _BatchNorm):
        return 'bn'
    elif issubclass(class_type, nn.GroupNorm):
        return 'gn'
    elif issubclass(class_type, nn.LayerNorm):
        return 'ln'
    else:
        class_name = class_type.__name__.lower()
        if 'batch' in class_name:
            return 'bn'
        elif 'group' in class_name:
            return 'gn'
        elif 'layer' in class_name:
            return 'ln'
        elif 'instance' in class_name:
            return 'in'
        else:
            return 'norm_layer'


def build_norm_layer(cfg, num_features, postfix=''):
    """Build normalization layer.

    Args:
        cfg (dict): The norm layer config, which should contain:

            - type (str): Layer type.
            - layer args: Args needed to instantiate a norm layer.
            - requires_grad (bool, optional): Whether stop gradient updates.
        num_features (int): Number of input channels.
        postfix (int | str): The postfix to be appended into norm abbreviation
            to create named layer.

    Returns:
        tuple[str, nn.Module]: The first element is the layer name consisting
        of abbreviation and postfix, e.g., bn1, gn. The second element is the
        created norm layer.
    """
    if not isinstance(cfg, dict):
        raise TypeError('cfg must be a dict')
    if 'type' not in cfg:
        raise KeyError('the cfg dict must contain the key "type"')
    cfg_ = cfg.copy()

    layer_type = cfg_.pop('type')
    if layer_type not in NORM_LAYERS:
        raise KeyError(f'Unrecognized norm type {layer_type}')

    norm_layer = NORM_LAYERS.get(layer_type)
    abbr = infer_abbr(norm_layer)

    assert isinstance(postfix, (int, str))
    name = abbr + str(postfix)

    requires_grad = cfg_.pop('requires_grad', True)
    cfg_.setdefault('eps', 1e-5)
    if layer_type != 'GN':
        layer = norm_layer(num_features, **cfg_)
        if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'):
            layer._specify_ddp_gpu_num(1)
    else:
        assert 'num_groups' in cfg_
        layer = norm_layer(num_channels=num_features, **cfg_)

    for param in layer.parameters():
        param.requires_grad = requires_grad

    return name, layer


def is_norm(layer, exclude=None):
    """Check if a layer is a normalization layer.

    Args:
        layer (nn.Module): The layer to be checked.
        exclude (type | tuple[type]): Types to be excluded.

    Returns:
        bool: Whether the layer is a norm layer.
    """
    if exclude is not None:
        if not isinstance(exclude, tuple):
            exclude = (exclude, )
        if not is_tuple_of(exclude, type):
            raise TypeError(
                f'"exclude" must be either None or type or a tuple of types, '
                f'but got {type(exclude)}: {exclude}')

    if exclude and isinstance(layer, exclude):
        return False

    all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm)
    return isinstance(layer, all_norm_bases)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/padding.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn

from .registry import PADDING_LAYERS

PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d)
PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d)
PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d)


def build_padding_layer(cfg, *args, **kwargs):
    """Build padding layer.

    Args:
        cfg (None or dict): The padding layer config, which should contain:
            - type (str): Layer type.
            - layer args: Args needed to instantiate a padding layer.

    Returns:
        nn.Module: Created padding layer.
    """
    if not isinstance(cfg, dict):
        raise TypeError('cfg must be a dict')
    if 'type' not in cfg:
        raise KeyError('the cfg dict must contain the key "type"')

    cfg_ = cfg.copy()
    padding_type = cfg_.pop('type')
    if padding_type not in PADDING_LAYERS:
        raise KeyError(f'Unrecognized padding type {padding_type}.')
    else:
        padding_layer = PADDING_LAYERS.get(padding_type)

    layer = padding_layer(*args, **kwargs, **cfg_)

    return layer


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/plugin.py
================================================
import inspect
import platform

from .registry import PLUGIN_LAYERS

if platform.system() == 'Windows':
    import regex as re
else:
    import re


def infer_abbr(class_type):
    """Infer abbreviation from the class name.

    This method will infer the abbreviation to map class types to
    abbreviations.

    Rule 1: If the class has the property "abbr", return the property.
    Rule 2: Otherwise, the abbreviation falls back to snake case of class
    name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``.

    Args:
        class_type (type): The norm layer type.

    Returns:
        str: The inferred abbreviation.
    """

    def camel2snack(word):
        """Convert camel case word into snack case.

        Modified from `inflection lib
        <https://inflection.readthedocs.io/en/latest/#inflection.underscore>`_.

        Example::

            >>> camel2snack("FancyBlock")
            'fancy_block'
        """

        word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word)
        word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word)
        word = word.replace('-', '_')
        return word.lower()

    if not inspect.isclass(class_type):
        raise TypeError(
            f'class_type must be a type, but got {type(class_type)}')
    if hasattr(class_type, '_abbr_'):
        return class_type._abbr_
    else:
        return camel2snack(class_type.__name__)


def build_plugin_layer(cfg, postfix='', **kwargs):
    """Build plugin layer.

    Args:
        cfg (None or dict): cfg should contain:

            - type (str): identify plugin layer type.
            - layer args: args needed to instantiate a plugin layer.
        postfix (int, str): appended into norm abbreviation to
            create named layer. Default: ''.

    Returns:
        tuple[str, nn.Module]: The first one is the concatenation of
        abbreviation and postfix. The second is the created plugin layer.
    """
    if not isinstance(cfg, dict):
        raise TypeError('cfg must be a dict')
    if 'type' not in cfg:
        raise KeyError('the cfg dict must contain the key "type"')
    cfg_ = cfg.copy()

    layer_type = cfg_.pop('type')
    if layer_type not in PLUGIN_LAYERS:
        raise KeyError(f'Unrecognized plugin type {layer_type}')

    plugin_layer = PLUGIN_LAYERS.get(layer_type)
    abbr = infer_abbr(plugin_layer)

    assert isinstance(postfix, (int, str))
    name = abbr + str(postfix)

    layer = plugin_layer(**kwargs, **cfg_)

    return name, layer


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/registry.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.utils import Registry

CONV_LAYERS = Registry('conv layer')
NORM_LAYERS = Registry('norm layer')
ACTIVATION_LAYERS = Registry('activation layer')
PADDING_LAYERS = Registry('padding layer')
UPSAMPLE_LAYERS = Registry('upsample layer')
PLUGIN_LAYERS = Registry('plugin layer')

DROPOUT_LAYERS = Registry('drop out layers')
POSITIONAL_ENCODING = Registry('position encoding')
ATTENTION = Registry('attention')
FEEDFORWARD_NETWORK = Registry('feed-forward Network')
TRANSFORMER_LAYER = Registry('transformerLayer')
TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/scale.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn


class Scale(nn.Module):
    """A learnable scale parameter.

    This layer scales the input by a learnable factor. It multiplies a
    learnable scale parameter of shape (1,) with input of any shape.

    Args:
        scale (float): Initial value of scale factor. Default: 1.0
    """

    def __init__(self, scale=1.0):
        super(Scale, self).__init__()
        self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))

    def forward(self, x):
        return x * self.scale


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/swish.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn

from .registry import ACTIVATION_LAYERS


@ACTIVATION_LAYERS.register_module()
class Swish(nn.Module):
    """Swish Module.

    This module applies the swish function:

    .. math::
        Swish(x) = x * Sigmoid(x)

    Returns:
        Tensor: The output tensor.
    """

    def __init__(self):
        super(Swish, self).__init__()

    def forward(self, x):
        return x * torch.sigmoid(x)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/transformer.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import math
import warnings
from typing import Sequence

import torch
import torch.nn as nn
import torch.nn.functional as F

from mmcv.cnn import (Linear, build_activation_layer, build_conv_layer,
                      build_norm_layer)
from mmcv.runner.base_module import BaseModule, ModuleList, Sequential
from mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning,
                        to_2tuple)
from .drop import build_dropout
from .registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING,
                       TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE)

# Avoid BC-breaking of importing MultiScaleDeformableAttention from this file
try:
    from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention  # noqa F401
    warnings.warn(
        ImportWarning(
            '``MultiScaleDeformableAttention`` has been moved to '
            '``mmcv.ops.multi_scale_deform_attn``, please change original path '  # noqa E501
            '``from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` '  # noqa E501
            'to ``from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` '  # noqa E501
        ))

except ImportError:
    warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from '
                  '``mmcv.ops.multi_scale_deform_attn``, '
                  'You should install ``mmcv-full`` if you need this module. ')


def build_positional_encoding(cfg, default_args=None):
    """Builder for Position Encoding."""
    return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args)


def build_attention(cfg, default_args=None):
    """Builder for attention."""
    return build_from_cfg(cfg, ATTENTION, default_args)


def build_feedforward_network(cfg, default_args=None):
    """Builder for feed-forward network (FFN)."""
    return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args)


def build_transformer_layer(cfg, default_args=None):
    """Builder for transformer layer."""
    return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args)


def build_transformer_layer_sequence(cfg, default_args=None):
    """Builder for transformer encoder and transformer decoder."""
    return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args)


class AdaptivePadding(nn.Module):
    """Applies padding adaptively to the input.

    This module can make input get fully covered by filter
    you specified. It support two modes "same" and "corner". The
    "same" mode is same with "SAME" padding mode in TensorFlow, pad
    zero around input. The "corner"  mode would pad zero
    to bottom right.

    Args:
        kernel_size (int | tuple): Size of the kernel. Default: 1.
        stride (int | tuple): Stride of the filter. Default: 1.
        dilation (int | tuple): Spacing between kernel elements.
            Default: 1.
        padding (str): Support "same" and "corner", "corner" mode
            would pad zero to bottom right, and "same" mode would
            pad zero around input. Default: "corner".

    Example:
        >>> kernel_size = 16
        >>> stride = 16
        >>> dilation = 1
        >>> input = torch.rand(1, 1, 15, 17)
        >>> adap_pad = AdaptivePadding(
        >>>     kernel_size=kernel_size,
        >>>     stride=stride,
        >>>     dilation=dilation,
        >>>     padding="corner")
        >>> out = adap_pad(input)
        >>> assert (out.shape[2], out.shape[3]) == (16, 32)
        >>> input = torch.rand(1, 1, 16, 17)
        >>> out = adap_pad(input)
        >>> assert (out.shape[2], out.shape[3]) == (16, 32)
    """

    def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'):
        super(AdaptivePadding, self).__init__()
        assert padding in ('same', 'corner')

        kernel_size = to_2tuple(kernel_size)
        stride = to_2tuple(stride)
        dilation = to_2tuple(dilation)

        self.padding = padding
        self.kernel_size = kernel_size
        self.stride = stride
        self.dilation = dilation

    def get_pad_shape(self, input_shape):
        """Calculate the padding size of input.

        Args:
            input_shape (:obj:`torch.Size`): arrange as (H, W).

        Returns:
            Tuple[int]: The padding size along the
            original H and W directions
        """
        input_h, input_w = input_shape
        kernel_h, kernel_w = self.kernel_size
        stride_h, stride_w = self.stride
        output_h = math.ceil(input_h / stride_h)
        output_w = math.ceil(input_w / stride_w)
        pad_h = max((output_h - 1) * stride_h +
                    (kernel_h - 1) * self.dilation[0] + 1 - input_h, 0)
        pad_w = max((output_w - 1) * stride_w +
                    (kernel_w - 1) * self.dilation[1] + 1 - input_w, 0)
        return pad_h, pad_w

    def forward(self, x):
        """Add padding to `x`

        Args:
            x (Tensor): Input tensor has shape (B, C, H, W).

        Returns:
            Tensor: The tensor with adaptive padding
        """
        pad_h, pad_w = self.get_pad_shape(x.size()[-2:])
        if pad_h > 0 or pad_w > 0:
            if self.padding == 'corner':
                x = F.pad(x, [0, pad_w, 0, pad_h])
            elif self.padding == 'same':
                x = F.pad(x, [
                    pad_w // 2, pad_w - pad_w // 2, pad_h // 2,
                    pad_h - pad_h // 2
                ])
        return x


class PatchEmbed(BaseModule):
    """Image to Patch Embedding.

    We use a conv layer to implement PatchEmbed.

    Args:
        in_channels (int): The num of input channels. Default: 3
        embed_dims (int): The dimensions of embedding. Default: 768
        conv_type (str): The type of convolution
            to generate patch embedding. Default: "Conv2d".
        kernel_size (int): The kernel_size of embedding conv. Default: 16.
        stride (int): The slide stride of embedding conv.
            Default: 16.
        padding (int | tuple | string): The padding length of
            embedding conv. When it is a string, it means the mode
            of adaptive padding, support "same" and "corner" now.
            Default: "corner".
        dilation (int): The dilation rate of embedding conv. Default: 1.
        bias (bool): Bias of embed conv. Default: True.
        norm_cfg (dict, optional): Config dict for normalization layer.
            Default: None.
        input_size (int | tuple | None): The size of input, which will be
            used to calculate the out size. Only works when `dynamic_size`
            is False. Default: None.
        init_cfg (`mmcv.ConfigDict`, optional): The Config for initialization.
            Default: None.
    """

    def __init__(self,
                 in_channels=3,
                 embed_dims=768,
                 conv_type='Conv2d',
                 kernel_size=16,
                 stride=16,
                 padding='corner',
                 dilation=1,
                 bias=True,
                 norm_cfg=None,
                 input_size=None,
                 init_cfg=None):
        super(PatchEmbed, self).__init__(init_cfg=init_cfg)

        self.embed_dims = embed_dims
        if stride is None:
            stride = kernel_size

        kernel_size = to_2tuple(kernel_size)
        stride = to_2tuple(stride)
        dilation = to_2tuple(dilation)

        if isinstance(padding, str):
            self.adaptive_padding = AdaptivePadding(
                kernel_size=kernel_size,
                stride=stride,
                dilation=dilation,
                padding=padding)
            # disable the padding of conv
            padding = 0
        else:
            self.adaptive_padding = None
        padding = to_2tuple(padding)

        self.projection = build_conv_layer(
            dict(type=conv_type),
            in_channels=in_channels,
            out_channels=embed_dims,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias=bias)

        if norm_cfg is not None:
            self.norm = build_norm_layer(norm_cfg, embed_dims)[1]
        else:
            self.norm = None

        if input_size:
            input_size = to_2tuple(input_size)
            # `init_out_size` would be used outside to
            # calculate the num_patches
            # e.g. when `use_abs_pos_embed` outside
            self.init_input_size = input_size
            if self.adaptive_padding:
                pad_h, pad_w = self.adaptive_padding.get_pad_shape(input_size)
                input_h, input_w = input_size
                input_h = input_h + pad_h
                input_w = input_w + pad_w
                input_size = (input_h, input_w)

            # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
            h_out = (input_size[0] + 2 * padding[0] - dilation[0] *
                     (kernel_size[0] - 1) - 1) // stride[0] + 1
            w_out = (input_size[1] + 2 * padding[1] - dilation[1] *
                     (kernel_size[1] - 1) - 1) // stride[1] + 1
            self.init_out_size = (h_out, w_out)
        else:
            self.init_input_size = None
            self.init_out_size = None

    def forward(self, x):
        """
        Args:
            x (Tensor): Has shape (B, C, H, W). In most case, C is 3.

        Returns:
            tuple: Contains merged results and its spatial shape.

            - x (Tensor): Has shape (B, out_h * out_w, embed_dims)
            - out_size (tuple[int]): Spatial shape of x, arrange as
              (out_h, out_w).
        """

        if self.adaptive_padding:
            x = self.adaptive_padding(x)

        x = self.projection(x)
        out_size = (x.shape[2], x.shape[3])
        x = x.flatten(2).transpose(1, 2)
        if self.norm is not None:
            x = self.norm(x)
        return x, out_size


class PatchMerging(BaseModule):
    """Merge patch feature map.

    This layer groups feature map by kernel_size, and applies norm and linear
    layers to the grouped feature map ((used in Swin Transformer)).
    Our implementation uses `nn.Unfold` to
    merge patches, which is about 25% faster than the original
    implementation. However, we need to modify pretrained
    models for compatibility.

    Args:
        in_channels (int): The num of input channels.
            to gets fully covered by filter and stride you specified.
        out_channels (int): The num of output channels.
        kernel_size (int | tuple, optional): the kernel size in the unfold
            layer. Defaults to 2.
        stride (int | tuple, optional): the stride of the sliding blocks in the
            unfold layer. Default: None. (Would be set as `kernel_size`)
        padding (int | tuple | string ): The padding length of
            embedding conv. When it is a string, it means the mode
            of adaptive padding, support "same" and "corner" now.
            Default: "corner".
        dilation (int | tuple, optional): dilation parameter in the unfold
            layer. Default: 1.
        bias (bool, optional): Whether to add bias in linear layer or not.
            Defaults: False.
        norm_cfg (dict, optional): Config dict for normalization layer.
            Default: dict(type='LN').
        init_cfg (dict, optional): The extra config for initialization.
            Default: None.
    """

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size=2,
                 stride=None,
                 padding='corner',
                 dilation=1,
                 bias=False,
                 norm_cfg=dict(type='LN'),
                 init_cfg=None):
        super().__init__(init_cfg=init_cfg)
        self.in_channels = in_channels
        self.out_channels = out_channels
        if stride:
            stride = stride
        else:
            stride = kernel_size

        kernel_size = to_2tuple(kernel_size)
        stride = to_2tuple(stride)
        dilation = to_2tuple(dilation)

        if isinstance(padding, str):
            self.adaptive_padding = AdaptivePadding(
                kernel_size=kernel_size,
                stride=stride,
                dilation=dilation,
                padding=padding)
            # disable the padding of unfold
            padding = 0
        else:
            self.adaptive_padding = None

        padding = to_2tuple(padding)
        self.sampler = nn.Unfold(
            kernel_size=kernel_size,
            dilation=dilation,
            padding=padding,
            stride=stride)

        sample_dim = kernel_size[0] * kernel_size[1] * in_channels

        if norm_cfg is not None:
            self.norm = build_norm_layer(norm_cfg, sample_dim)[1]
        else:
            self.norm = None

        self.reduction = nn.Linear(sample_dim, out_channels, bias=bias)

    def forward(self, x, input_size):
        """
        Args:
            x (Tensor): Has shape (B, H*W, C_in).
            input_size (tuple[int]): The spatial shape of x, arrange as (H, W).
                Default: None.

        Returns:
            tuple: Contains merged results and its spatial shape.

            - x (Tensor): Has shape (B, Merged_H * Merged_W, C_out)
            - out_size (tuple[int]): Spatial shape of x, arrange as
              (Merged_H, Merged_W).
        """
        B, L, C = x.shape
        assert isinstance(input_size, Sequence), f'Expect ' \
                                                 f'input_size is ' \
                                                 f'`Sequence` ' \
                                                 f'but get {input_size}'

        H, W = input_size
        assert L == H * W, 'input feature has wrong size'

        x = x.view(B, H, W, C).permute([0, 3, 1, 2])  # B, C, H, W

        if self.adaptive_padding:
            x = self.adaptive_padding(x)
            H, W = x.shape[-2:]

        # Use nn.Unfold to merge patch. About 25% faster than original method,
        # but need to modify pretrained model for compatibility
        # if kernel_size=2 and stride=2, x should has shape (B, 4*C, H/2*W/2)
        x = self.sampler(x)

        out_h = (H + 2 * self.sampler.padding[0] - self.sampler.dilation[0] *
                 (self.sampler.kernel_size[0] - 1) -
                 1) // self.sampler.stride[0] + 1
        out_w = (W + 2 * self.sampler.padding[1] - self.sampler.dilation[1] *
                 (self.sampler.kernel_size[1] - 1) -
                 1) // self.sampler.stride[1] + 1

        output_size = (out_h, out_w)
        x = x.transpose(1, 2)  # B, H/2*W/2, 4*C
        x = self.norm(x) if self.norm else x
        x = self.reduction(x)
        return x, output_size


@ATTENTION.register_module()
class MultiheadAttention(BaseModule):
    """A wrapper for ``torch.nn.MultiheadAttention``.

    This module implements MultiheadAttention with identity connection,
    and positional encoding  is also passed as input.

    Args:
        embed_dims (int): The embedding dimension.
        num_heads (int): Parallel attention heads.
        attn_drop (float): A Dropout layer on attn_output_weights.
            Default: 0.0.
        proj_drop (float): A Dropout layer after `nn.MultiheadAttention`.
            Default: 0.0.
        dropout_layer (obj:`ConfigDict`): The dropout_layer used
            when adding the shortcut.
        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
            Default: None.
        batch_first (bool): When it is True,  Key, Query and Value are shape of
            (batch, n, embed_dim), otherwise (n, batch, embed_dim).
             Default to False.
    """

    def __init__(self,
                 embed_dims,
                 num_heads,
                 attn_drop=0.,
                 proj_drop=0.,
                 dropout_layer=dict(type='Dropout', drop_prob=0.),
                 init_cfg=None,
                 batch_first=False,
                 **kwargs):
        super(MultiheadAttention, self).__init__(init_cfg)
        if 'dropout' in kwargs:
            warnings.warn(
                'The arguments `dropout` in MultiheadAttention '
                'has been deprecated, now you can separately '
                'set `attn_drop`(float), proj_drop(float), '
                'and `dropout_layer`(dict) ', DeprecationWarning)
            attn_drop = kwargs['dropout']
            dropout_layer['drop_prob'] = kwargs.pop('dropout')

        self.embed_dims = embed_dims
        self.num_heads = num_heads
        self.batch_first = batch_first

        self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop,
                                          **kwargs)

        self.proj_drop = nn.Dropout(proj_drop)
        self.dropout_layer = build_dropout(
            dropout_layer) if dropout_layer else nn.Identity()

    @deprecated_api_warning({'residual': 'identity'},
                            cls_name='MultiheadAttention')
    def forward(self,
                query,
                key=None,
                value=None,
                identity=None,
                query_pos=None,
                key_pos=None,
                attn_mask=None,
                key_padding_mask=None,
                **kwargs):
        """Forward function for `MultiheadAttention`.

        **kwargs allow passing a more general data flow when combining
        with other operations in `transformerlayer`.

        Args:
            query (Tensor): The input query with shape [num_queries, bs,
                embed_dims] if self.batch_first is False, else
                [bs, num_queries embed_dims].
            key (Tensor): The key tensor with shape [num_keys, bs,
                embed_dims] if self.batch_first is False, else
                [bs, num_keys, embed_dims] .
                If None, the ``query`` will be used. Defaults to None.
            value (Tensor): The value tensor with same shape as `key`.
                Same in `nn.MultiheadAttention.forward`. Defaults to None.
                If None, the `key` will be used.
            identity (Tensor): This tensor, with the same shape as x,
                will be used for the identity link.
                If None, `x` will be used. Defaults to None.
            query_pos (Tensor): The positional encoding for query, with
                the same shape as `x`. If not None, it will
                be added to `x` before forward function. Defaults to None.
            key_pos (Tensor): The positional encoding for `key`, with the
                same shape as `key`. Defaults to None. If not None, it will
                be added to `key` before forward function. If None, and
                `query_pos` has the same shape as `key`, then `query_pos`
                will be used for `key_pos`. Defaults to None.
            attn_mask (Tensor): ByteTensor mask with shape [num_queries,
                num_keys]. Same in `nn.MultiheadAttention.forward`.
                Defaults to None.
            key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys].
                Defaults to None.

        Returns:
            Tensor: forwarded results with shape
            [num_queries, bs, embed_dims]
            if self.batch_first is False, else
            [bs, num_queries embed_dims].
        """

        if key is None:
            key = query
        if value is None:
            value = key
        if identity is None:
            identity = query
        if key_pos is None:
            if query_pos is not None:
                # use query_pos if key_pos is not available
                if query_pos.shape == key.shape:
                    key_pos = query_pos
                else:
                    warnings.warn(f'position encoding of key is'
                                  f'missing in {self.__class__.__name__}.')
        if query_pos is not None:
            query = query + query_pos
        if key_pos is not None:
            key = key + key_pos

        # Because the dataflow('key', 'query', 'value') of
        # ``torch.nn.MultiheadAttention`` is (num_query, batch,
        # embed_dims), We should adjust the shape of dataflow from
        # batch_first (batch, num_query, embed_dims) to num_query_first
        # (num_query ,batch, embed_dims), and recover ``attn_output``
        # from num_query_first to batch_first.
        if self.batch_first:
            query = query.transpose(0, 1)
            key = key.transpose(0, 1)
            value = value.transpose(0, 1)

        out = self.attn(
            query=query,
            key=key,
            value=value,
            attn_mask=attn_mask,
            key_padding_mask=key_padding_mask)[0]

        if self.batch_first:
            out = out.transpose(0, 1)

        return identity + self.dropout_layer(self.proj_drop(out))


@FEEDFORWARD_NETWORK.register_module()
class FFN(BaseModule):
    """Implements feed-forward networks (FFNs) with identity connection.

    Args:
        embed_dims (int): The feature dimension. Same as
            `MultiheadAttention`. Defaults: 256.
        feedforward_channels (int): The hidden dimension of FFNs.
            Defaults: 1024.
        num_fcs (int, optional): The number of fully-connected layers in
            FFNs. Default: 2.
        act_cfg (dict, optional): The activation config for FFNs.
            Default: dict(type='ReLU')
        ffn_drop (float, optional): Probability of an element to be
            zeroed in FFN. Default 0.0.
        add_identity (bool, optional): Whether to add the
            identity connection. Default: `True`.
        dropout_layer (obj:`ConfigDict`): The dropout_layer used
            when adding the shortcut.
        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
            Default: None.
    """

    @deprecated_api_warning(
        {
            'dropout': 'ffn_drop',
            'add_residual': 'add_identity'
        },
        cls_name='FFN')
    def __init__(self,
                 embed_dims=256,
                 feedforward_channels=1024,
                 num_fcs=2,
                 act_cfg=dict(type='ReLU', inplace=True),
                 ffn_drop=0.,
                 dropout_layer=None,
                 add_identity=True,
                 init_cfg=None,
                 **kwargs):
        super(FFN, self).__init__(init_cfg)
        assert num_fcs >= 2, 'num_fcs should be no less ' \
            f'than 2. got {num_fcs}.'
        self.embed_dims = embed_dims
        self.feedforward_channels = feedforward_channels
        self.num_fcs = num_fcs
        self.act_cfg = act_cfg
        self.activate = build_activation_layer(act_cfg)

        layers = []
        in_channels = embed_dims
        for _ in range(num_fcs - 1):
            layers.append(
                Sequential(
                    Linear(in_channels, feedforward_channels), self.activate,
                    nn.Dropout(ffn_drop)))
            in_channels = feedforward_channels
        layers.append(Linear(feedforward_channels, embed_dims))
        layers.append(nn.Dropout(ffn_drop))
        self.layers = Sequential(*layers)
        self.dropout_layer = build_dropout(
            dropout_layer) if dropout_layer else torch.nn.Identity()
        self.add_identity = add_identity

    @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN')
    def forward(self, x, identity=None):
        """Forward function for `FFN`.

        The function would add x to the output tensor if residue is None.
        """
        out = self.layers(x)
        if not self.add_identity:
            return self.dropout_layer(out)
        if identity is None:
            identity = x
        return identity + self.dropout_layer(out)


@TRANSFORMER_LAYER.register_module()
class BaseTransformerLayer(BaseModule):
    """Base `TransformerLayer` for vision transformer.

    It can be built from `mmcv.ConfigDict` and support more flexible
    customization, for example, using any number of `FFN or LN ` and
    use different kinds of `attention` by specifying a list of `ConfigDict`
    named `attn_cfgs`. It is worth mentioning that it supports `prenorm`
    when you specifying `norm` as the first element of `operation_order`.
    More details about the `prenorm`: `On Layer Normalization in the
    Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .

    Args:
        attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):
            Configs for `self_attention` or `cross_attention` modules,
            The order of the configs in the list should be consistent with
            corresponding attentions in operation_order.
            If it is a dict, all of the attention modules in operation_order
            will be built with this config. Default: None.
        ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):
            Configs for FFN, The order of the configs in the list should be
            consistent with corresponding ffn in operation_order.
            If it is a dict, all of the attention modules in operation_order
            will be built with this config.
        operation_order (tuple[str]): The execution order of operation
            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').
            Support `prenorm` when you specifying first element as `norm`.
            Default：None.
        norm_cfg (dict): Config dict for normalization layer.
            Default: dict(type='LN').
        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
            Default: None.
        batch_first (bool): Key, Query and Value are shape
            of (batch, n, embed_dim)
            or (n, batch, embed_dim). Default to False.
    """

    def __init__(self,
                 attn_cfgs=None,
                 ffn_cfgs=dict(
                     type='FFN',
                     embed_dims=256,
                     feedforward_channels=1024,
                     num_fcs=2,
                     ffn_drop=0.,
                     act_cfg=dict(type='ReLU', inplace=True),
                 ),
                 operation_order=None,
                 norm_cfg=dict(type='LN'),
                 init_cfg=None,
                 batch_first=False,
                 **kwargs):

        deprecated_args = dict(
            feedforward_channels='feedforward_channels',
            ffn_dropout='ffn_drop',
            ffn_num_fcs='num_fcs')
        for ori_name, new_name in deprecated_args.items():
            if ori_name in kwargs:
                warnings.warn(
                    f'The arguments `{ori_name}` in BaseTransformerLayer '
                    f'has been deprecated, now you should set `{new_name}` '
                    f'and other FFN related arguments '
                    f'to a dict named `ffn_cfgs`. ', DeprecationWarning)
                ffn_cfgs[new_name] = kwargs[ori_name]

        super(BaseTransformerLayer, self).__init__(init_cfg)

        self.batch_first = batch_first

        assert set(operation_order) & set(
            ['self_attn', 'norm', 'ffn', 'cross_attn']) == \
            set(operation_order), f'The operation_order of' \
            f' {self.__class__.__name__} should ' \
            f'contains all four operation type ' \
            f"{['self_attn', 'norm', 'ffn', 'cross_attn']}"

        num_attn = operation_order.count('self_attn') + operation_order.count(
            'cross_attn')
        if isinstance(attn_cfgs, dict):
            attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]
        else:
            assert num_attn == len(attn_cfgs), f'The length ' \
                f'of attn_cfg {num_attn} is ' \
                f'not consistent with the number of attention' \
                f'in operation_order {operation_order}.'

        self.num_attn = num_attn
        self.operation_order = operation_order
        self.norm_cfg = norm_cfg
        self.pre_norm = operation_order[0] == 'norm'
        self.attentions = ModuleList()

        index = 0
        for operation_name in operation_order:
            if operation_name in ['self_attn', 'cross_attn']:
                if 'batch_first' in attn_cfgs[index]:
                    assert self.batch_first == attn_cfgs[index]['batch_first']
                else:
                    attn_cfgs[index]['batch_first'] = self.batch_first
                attention = build_attention(attn_cfgs[index])
                # Some custom attentions used as `self_attn`
                # or `cross_attn` can have different behavior.
                attention.operation_name = operation_name
                self.attentions.append(attention)
                index += 1

        self.embed_dims = self.attentions[0].embed_dims

        self.ffns = ModuleList()
        num_ffns = operation_order.count('ffn')
        if isinstance(ffn_cfgs, dict):
            ffn_cfgs = ConfigDict(ffn_cfgs)
        if isinstance(ffn_cfgs, dict):
            ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]
        assert len(ffn_cfgs) == num_ffns
        for ffn_index in range(num_ffns):
            if 'embed_dims' not in ffn_cfgs[ffn_index]:
                ffn_cfgs[ffn_index]['embed_dims'] = self.embed_dims
            else:
                assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims
            self.ffns.append(
                build_feedforward_network(ffn_cfgs[ffn_index],
                                          dict(type='FFN')))

        self.norms = ModuleList()
        num_norms = operation_order.count('norm')
        for _ in range(num_norms):
            self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])

    def forward(self,
                query,
                key=None,
                value=None,
                query_pos=None,
                key_pos=None,
                attn_masks=None,
                query_key_padding_mask=None,
                key_padding_mask=None,
                **kwargs):
        """Forward function for `TransformerDecoderLayer`.

        **kwargs contains some specific arguments of attentions.

        Args:
            query (Tensor): The input query with shape
                [num_queries, bs, embed_dims] if
                self.batch_first is False, else
                [bs, num_queries embed_dims].
            key (Tensor): The key tensor with shape [num_keys, bs,
                embed_dims] if self.batch_first is False, else
                [bs, num_keys, embed_dims] .
            value (Tensor): The value tensor with same shape as `key`.
            query_pos (Tensor): The positional encoding for `query`.
                Default: None.
            key_pos (Tensor): The positional encoding for `key`.
                Default: None.
            attn_masks (List[Tensor] | None): 2D Tensor used in
                calculation of corresponding attention. The length of
                it should equal to the number of `attention` in
                `operation_order`. Default: None.
            query_key_padding_mask (Tensor): ByteTensor for `query`, with
                shape [bs, num_queries]. Only used in `self_attn` layer.
                Defaults to None.
            key_padding_mask (Tensor): ByteTensor for `query`, with
                shape [bs, num_keys]. Default: None.

        Returns:
            Tensor: forwarded results with shape [num_queries, bs, embed_dims].
        """

        norm_index = 0
        attn_index = 0
        ffn_index = 0
        identity = query
        if attn_masks is None:
            attn_masks = [None for _ in range(self.num_attn)]
        elif isinstance(attn_masks, torch.Tensor):
            attn_masks = [
                copy.deepcopy(attn_masks) for _ in range(self.num_attn)
            ]
            warnings.warn(f'Use same attn_mask in all attentions in '
                          f'{self.__class__.__name__} ')
        else:
            assert len(attn_masks) == self.num_attn, f'The length of ' \
                        f'attn_masks {len(attn_masks)} must be equal ' \
                        f'to the number of attention in ' \
                        f'operation_order {self.num_attn}'

        for layer in self.operation_order:
            if layer == 'self_attn':
                temp_key = temp_value = query
                query = self.attentions[attn_index](
                    query,
                    temp_key,
                    temp_value,
                    identity if self.pre_norm else None,
                    query_pos=query_pos,
                    key_pos=query_pos,
                    attn_mask=attn_masks[attn_index],
                    key_padding_mask=query_key_padding_mask,
                    **kwargs)
                attn_index += 1
                identity = query

            elif layer == 'norm':
                query = self.norms[norm_index](query)
                norm_index += 1

            elif layer == 'cross_attn':
                query = self.attentions[attn_index](
                    query,
                    key,
                    value,
                    identity if self.pre_norm else None,
                    query_pos=query_pos,
                    key_pos=key_pos,
                    attn_mask=attn_masks[attn_index],
                    key_padding_mask=key_padding_mask,
                    **kwargs)
                attn_index += 1
                identity = query

            elif layer == 'ffn':
                query = self.ffns[ffn_index](
                    query, identity if self.pre_norm else None)
                ffn_index += 1

        return query


@TRANSFORMER_LAYER_SEQUENCE.register_module()
class TransformerLayerSequence(BaseModule):
    """Base class for TransformerEncoder and TransformerDecoder in vision
    transformer.

    As base-class of Encoder and Decoder in vision transformer.
    Support customization such as specifying different kind
    of `transformer_layer` in `transformer_coder`.

    Args:
        transformerlayer (list[obj:`mmcv.ConfigDict`] |
            obj:`mmcv.ConfigDict`): Config of transformerlayer
            in TransformerCoder. If it is obj:`mmcv.ConfigDict`,
             it would be repeated `num_layer` times to a
             list[`mmcv.ConfigDict`]. Default: None.
        num_layers (int): The number of `TransformerLayer`. Default: None.
        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
            Default: None.
    """

    def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None):
        super(TransformerLayerSequence, self).__init__(init_cfg)
        if isinstance(transformerlayers, dict):
            transformerlayers = [
                copy.deepcopy(transformerlayers) for _ in range(num_layers)
            ]
        else:
            assert isinstance(transformerlayers, list) and \
                   len(transformerlayers) == num_layers
        self.num_layers = num_layers
        self.layers = ModuleList()
        for i in range(num_layers):
            self.layers.append(build_transformer_layer(transformerlayers[i]))
        self.embed_dims = self.layers[0].embed_dims
        self.pre_norm = self.layers[0].pre_norm

    def forward(self,
                query,
                key,
                value,
                query_pos=None,
                key_pos=None,
                attn_masks=None,
                query_key_padding_mask=None,
                key_padding_mask=None,
                **kwargs):
        """Forward function for `TransformerCoder`.

        Args:
            query (Tensor): Input query with shape
                `(num_queries, bs, embed_dims)`.
            key (Tensor): The key tensor with shape
                `(num_keys, bs, embed_dims)`.
            value (Tensor): The value tensor with shape
                `(num_keys, bs, embed_dims)`.
            query_pos (Tensor): The positional encoding for `query`.
                Default: None.
            key_pos (Tensor): The positional encoding for `key`.
                Default: None.
            attn_masks (List[Tensor], optional): Each element is 2D Tensor
                which is used in calculation of corresponding attention in
                operation_order. Default: None.
            query_key_padding_mask (Tensor): ByteTensor for `query`, with
                shape [bs, num_queries]. Only used in self-attention
                Default: None.
            key_padding_mask (Tensor): ByteTensor for `query`, with
                shape [bs, num_keys]. Default: None.

        Returns:
            Tensor:  results with shape [num_queries, bs, embed_dims].
        """
        for layer in self.layers:
            query = layer(
                query,
                key,
                value,
                query_pos=query_pos,
                key_pos=key_pos,
                attn_masks=attn_masks,
                query_key_padding_mask=query_key_padding_mask,
                key_padding_mask=key_padding_mask,
                **kwargs)
        return query


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/upsample.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
import torch.nn.functional as F

from ..utils import xavier_init
from .registry import UPSAMPLE_LAYERS

UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample)
UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample)


@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle')
class PixelShufflePack(nn.Module):
    """Pixel Shuffle upsample layer.

    This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to
    achieve a simple upsampling with pixel shuffle.

    Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        scale_factor (int): Upsample ratio.
        upsample_kernel (int): Kernel size of the conv layer to expand the
            channels.
    """

    def __init__(self, in_channels, out_channels, scale_factor,
                 upsample_kernel):
        super(PixelShufflePack, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.scale_factor = scale_factor
        self.upsample_kernel = upsample_kernel
        self.upsample_conv = nn.Conv2d(
            self.in_channels,
            self.out_channels * scale_factor * scale_factor,
            self.upsample_kernel,
            padding=(self.upsample_kernel - 1) // 2)
        self.init_weights()

    def init_weights(self):
        xavier_init(self.upsample_conv, distribution='uniform')

    def forward(self, x):
        x = self.upsample_conv(x)
        x = F.pixel_shuffle(x, self.scale_factor)
        return x


def build_upsample_layer(cfg, *args, **kwargs):
    """Build upsample layer.

    Args:
        cfg (dict): The upsample layer config, which should contain:

            - type (str): Layer type.
            - scale_factor (int): Upsample ratio, which is not applicable to
              deconv.
            - layer args: Args needed to instantiate a upsample layer.
        args (argument list): Arguments passed to the ``__init__``
            method of the corresponding conv layer.
        kwargs (keyword arguments): Keyword arguments passed to the
            ``__init__`` method of the corresponding conv layer.

    Returns:
        nn.Module: Created upsample layer.
    """
    if not isinstance(cfg, dict):
        raise TypeError(f'cfg must be a dict, but got {type(cfg)}')
    if 'type' not in cfg:
        raise KeyError(
            f'the cfg dict must contain the key "type", but got {cfg}')
    cfg_ = cfg.copy()

    layer_type = cfg_.pop('type')
    if layer_type not in UPSAMPLE_LAYERS:
        raise KeyError(f'Unrecognized upsample type {layer_type}')
    else:
        upsample = UPSAMPLE_LAYERS.get(layer_type)

    if upsample is nn.Upsample:
        cfg_['mode'] = layer_type
    layer = upsample(*args, **kwargs, **cfg_)
    return layer


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/bricks/wrappers.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py  # noqa: E501

Wrap some nn modules to support empty tensor input. Currently, these wrappers
are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask
heads are trained on only positive RoIs.
"""
import math

import torch
import torch.nn as nn
from torch.nn.modules.utils import _pair, _triple

from .registry import CONV_LAYERS, UPSAMPLE_LAYERS

if torch.__version__ == 'parrots':
    TORCH_VERSION = torch.__version__
else:
    # torch.__version__ could be 1.3.1+cu92, we only need the first two
    # for comparison
    TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2])


def obsolete_torch_version(torch_version, version_threshold):
    return torch_version == 'parrots' or torch_version <= version_threshold


class NewEmptyTensorOp(torch.autograd.Function):

    @staticmethod
    def forward(ctx, x, new_shape):
        ctx.shape = x.shape
        return x.new_empty(new_shape)

    @staticmethod
    def backward(ctx, grad):
        shape = ctx.shape
        return NewEmptyTensorOp.apply(grad, shape), None


@CONV_LAYERS.register_module('Conv', force=True)
class Conv2d(nn.Conv2d):

    def forward(self, x):
        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
            out_shape = [x.shape[0], self.out_channels]
            for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size,
                                     self.padding, self.stride, self.dilation):
                o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1
                out_shape.append(o)
            empty = NewEmptyTensorOp.apply(x, out_shape)
            if self.training:
                # produce dummy gradient to avoid DDP warning.
                dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
                return empty + dummy
            else:
                return empty

        return super().forward(x)


@CONV_LAYERS.register_module('Conv3d', force=True)
class Conv3d(nn.Conv3d):

    def forward(self, x):
        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
            out_shape = [x.shape[0], self.out_channels]
            for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size,
                                     self.padding, self.stride, self.dilation):
                o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1
                out_shape.append(o)
            empty = NewEmptyTensorOp.apply(x, out_shape)
            if self.training:
                # produce dummy gradient to avoid DDP warning.
                dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
                return empty + dummy
            else:
                return empty

        return super().forward(x)


@CONV_LAYERS.register_module()
@CONV_LAYERS.register_module('deconv')
@UPSAMPLE_LAYERS.register_module('deconv', force=True)
class ConvTranspose2d(nn.ConvTranspose2d):

    def forward(self, x):
        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
            out_shape = [x.shape[0], self.out_channels]
            for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size,
                                         self.padding, self.stride,
                                         self.dilation, self.output_padding):
                out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op)
            empty = NewEmptyTensorOp.apply(x, out_shape)
            if self.training:
                # produce dummy gradient to avoid DDP warning.
                dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
                return empty + dummy
            else:
                return empty

        return super().forward(x)


@CONV_LAYERS.register_module()
@CONV_LAYERS.register_module('deconv3d')
@UPSAMPLE_LAYERS.register_module('deconv3d', force=True)
class ConvTranspose3d(nn.ConvTranspose3d):

    def forward(self, x):
        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
            out_shape = [x.shape[0], self.out_channels]
            for i, k, p, s, d, op in zip(x.shape[-3:], self.kernel_size,
                                         self.padding, self.stride,
                                         self.dilation, self.output_padding):
                out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op)
            empty = NewEmptyTensorOp.apply(x, out_shape)
            if self.training:
                # produce dummy gradient to avoid DDP warning.
                dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
                return empty + dummy
            else:
                return empty

        return super().forward(x)


class MaxPool2d(nn.MaxPool2d):

    def forward(self, x):
        # PyTorch 1.9 does not support empty tensor inference yet
        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)):
            out_shape = list(x.shape[:2])
            for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size),
                                     _pair(self.padding), _pair(self.stride),
                                     _pair(self.dilation)):
                o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1
                o = math.ceil(o) if self.ceil_mode else math.floor(o)
                out_shape.append(o)
            empty = NewEmptyTensorOp.apply(x, out_shape)
            return empty

        return super().forward(x)


class MaxPool3d(nn.MaxPool3d):

    def forward(self, x):
        # PyTorch 1.9 does not support empty tensor inference yet
        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)):
            out_shape = list(x.shape[:2])
            for i, k, p, s, d in zip(x.shape[-3:], _triple(self.kernel_size),
                                     _triple(self.padding),
                                     _triple(self.stride),
                                     _triple(self.dilation)):
                o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1
                o = math.ceil(o) if self.ceil_mode else math.floor(o)
                out_shape.append(o)
            empty = NewEmptyTensorOp.apply(x, out_shape)
            return empty

        return super().forward(x)


class Linear(torch.nn.Linear):

    def forward(self, x):
        # empty tensor forward of Linear layer is supported in Pytorch 1.6
        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)):
            out_shape = [x.shape[0], self.out_features]
            empty = NewEmptyTensorOp.apply(x, out_shape)
            if self.training:
                # produce dummy gradient to avoid DDP warning.
                dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
                return empty + dummy
            else:
                return empty

        return super().forward(x)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/builder.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from ..runner import Sequential
from ..utils import Registry, build_from_cfg


def build_model_from_cfg(cfg, registry, default_args=None):
    """Build a PyTorch model from config dict(s). Different from
    ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built.

    Args:
        cfg (dict, list[dict]): The config of modules, is is either a config
            dict or a list of config dicts. If cfg is a list, a
            the built modules will be wrapped with ``nn.Sequential``.
        registry (:obj:`Registry`): A registry the module belongs to.
        default_args (dict, optional): Default arguments to build the module.
            Defaults to None.

    Returns:
        nn.Module: A built nn module.
    """
    if isinstance(cfg, list):
        modules = [
            build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
        ]
        return Sequential(*modules)
    else:
        return build_from_cfg(cfg, registry, default_args)


MODELS = Registry('model', build_func=build_model_from_cfg)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/resnet.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import logging

import torch.nn as nn
import torch.utils.checkpoint as cp

from .utils import constant_init, kaiming_init


def conv3x3(in_planes, out_planes, stride=1, dilation=1):
    """3x3 convolution with padding."""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        padding=dilation,
        dilation=dilation,
        bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self,
                 inplanes,
                 planes,
                 stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 with_cp=False):
        super(BasicBlock, self).__init__()
        assert style in ['pytorch', 'caffe']
        self.conv1 = conv3x3(inplanes, planes, stride, dilation)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride
        self.dilation = dilation
        assert not with_cp

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self,
                 inplanes,
                 planes,
                 stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 with_cp=False):
        """Bottleneck block.

        If style is "pytorch", the stride-two layer is the 3x3 conv layer, if
        it is "caffe", the stride-two layer is the first 1x1 conv layer.
        """
        super(Bottleneck, self).__init__()
        assert style in ['pytorch', 'caffe']
        if style == 'pytorch':
            conv1_stride = 1
            conv2_stride = stride
        else:
            conv1_stride = stride
            conv2_stride = 1
        self.conv1 = nn.Conv2d(
            inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False)
        self.conv2 = nn.Conv2d(
            planes,
            planes,
            kernel_size=3,
            stride=conv2_stride,
            padding=dilation,
            dilation=dilation,
            bias=False)

        self.bn1 = nn.BatchNorm2d(planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(
            planes, planes * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
        self.dilation = dilation
        self.with_cp = with_cp

    def forward(self, x):

        def _inner_forward(x):
            residual = x

            out = self.conv1(x)
            out = self.bn1(out)
            out = self.relu(out)

            out = self.conv2(out)
            out = self.bn2(out)
            out = self.relu(out)

            out = self.conv3(out)
            out = self.bn3(out)

            if self.downsample is not None:
                residual = self.downsample(x)

            out += residual

            return out

        if self.with_cp and x.requires_grad:
            out = cp.checkpoint(_inner_forward, x)
        else:
            out = _inner_forward(x)

        out = self.relu(out)

        return out


def make_res_layer(block,
                   inplanes,
                   planes,
                   blocks,
                   stride=1,
                   dilation=1,
                   style='pytorch',
                   with_cp=False):
    downsample = None
    if stride != 1 or inplanes != planes * block.expansion:
        downsample = nn.Sequential(
            nn.Conv2d(
                inplanes,
                planes * block.expansion,
                kernel_size=1,
                stride=stride,
                bias=False),
            nn.BatchNorm2d(planes * block.expansion),
        )

    layers = []
    layers.append(
        block(
            inplanes,
            planes,
            stride,
            dilation,
            downsample,
            style=style,
            with_cp=with_cp))
    inplanes = planes * block.expansion
    for _ in range(1, blocks):
        layers.append(
            block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp))

    return nn.Sequential(*layers)


class ResNet(nn.Module):
    """ResNet backbone.

    Args:
        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
        num_stages (int): Resnet stages, normally 4.
        strides (Sequence[int]): Strides of the first block of each stage.
        dilations (Sequence[int]): Dilation of each stage.
        out_indices (Sequence[int]): Output from which stages.
        style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
            layer is the 3x3 conv layer, otherwise the stride-two layer is
            the first 1x1 conv layer.
        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
            not freezing any parameters.
        bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
            running stats (mean and var).
        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
            memory while slowing down the training speed.
    """

    arch_settings = {
        18: (BasicBlock, (2, 2, 2, 2)),
        34: (BasicBlock, (3, 4, 6, 3)),
        50: (Bottleneck, (3, 4, 6, 3)),
        101: (Bottleneck, (3, 4, 23, 3)),
        152: (Bottleneck, (3, 8, 36, 3))
    }

    def __init__(self,
                 depth,
                 num_stages=4,
                 strides=(1, 2, 2, 2),
                 dilations=(1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 style='pytorch',
                 frozen_stages=-1,
                 bn_eval=True,
                 bn_frozen=False,
                 with_cp=False):
        super(ResNet, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for resnet')
        assert num_stages >= 1 and num_stages <= 4
        block, stage_blocks = self.arch_settings[depth]
        stage_blocks = stage_blocks[:num_stages]
        assert len(strides) == len(dilations) == num_stages
        assert max(out_indices) < num_stages

        self.out_indices = out_indices
        self.style = style
        self.frozen_stages = frozen_stages
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.with_cp = with_cp

        self.inplanes = 64
        self.conv1 = nn.Conv2d(
            3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.res_layers = []
        for i, num_blocks in enumerate(stage_blocks):
            stride = strides[i]
            dilation = dilations[i]
            planes = 64 * 2**i
            res_layer = make_res_layer(
                block,
                self.inplanes,
                planes,
                num_blocks,
                stride=stride,
                dilation=dilation,
                style=self.style,
                with_cp=with_cp)
            self.inplanes = planes * block.expansion
            layer_name = f'layer{i + 1}'
            self.add_module(layer_name, res_layer)
            self.res_layers.append(layer_name)

        self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1)

    def init_weights(self, pretrained=None):
        if isinstance(pretrained, str):
            logger = logging.getLogger()
            from ..runner import load_checkpoint
            load_checkpoint(self, pretrained, strict=False, logger=logger)
        elif pretrained is None:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, nn.BatchNorm2d):
                    constant_init(m, 1)
        else:
            raise TypeError('pretrained must be a str or None')

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        outs = []
        for i, layer_name in enumerate(self.res_layers):
            res_layer = getattr(self, layer_name)
            x = res_layer(x)
            if i in self.out_indices:
                outs.append(x)
        if len(outs) == 1:
            return outs[0]
        else:
            return tuple(outs)

    def train(self, mode=True):
        super(ResNet, self).train(mode)
        if self.bn_eval:
            for m in self.modules():
                if isinstance(m, nn.BatchNorm2d):
                    m.eval()
                    if self.bn_frozen:
                        for params in m.parameters():
                            params.requires_grad = False
        if mode and self.frozen_stages >= 0:
            for param in self.conv1.parameters():
                param.requires_grad = False
            for param in self.bn1.parameters():
                param.requires_grad = False
            self.bn1.eval()
            self.bn1.weight.requires_grad = False
            self.bn1.bias.requires_grad = False
            for i in range(1, self.frozen_stages + 1):
                mod = getattr(self, f'layer{i}')
                mod.eval()
                for param in mod.parameters():
                    param.requires_grad = False


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/utils/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .flops_counter import get_model_complexity_info
from .fuse_conv_bn import fuse_conv_bn
from .sync_bn import revert_sync_batchnorm
from .weight_init import (INITIALIZERS, Caffe2XavierInit, ConstantInit,
                          KaimingInit, NormalInit, PretrainedInit,
                          TruncNormalInit, UniformInit, XavierInit,
                          bias_init_with_prob, caffe2_xavier_init,
                          constant_init, initialize, kaiming_init, normal_init,
                          trunc_normal_init, uniform_init, xavier_init)

__all__ = [
    'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init',
    'constant_init', 'kaiming_init', 'normal_init', 'trunc_normal_init',
    'uniform_init', 'xavier_init', 'fuse_conv_bn', 'initialize',
    'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit',
    'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit',
    'Caffe2XavierInit', 'revert_sync_batchnorm'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/utils/flops_counter.py
================================================
# Modified from flops-counter.pytorch by Vladislav Sovrasov
# original repo: https://github.com/sovrasov/flops-counter.pytorch

# MIT License

# Copyright (c) 2018 Vladislav Sovrasov

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import sys
import warnings
from functools import partial

import numpy as np
import torch
import torch.nn as nn

import mmcv


def get_model_complexity_info(model,
                              input_shape,
                              print_per_layer_stat=True,
                              as_strings=True,
                              input_constructor=None,
                              flush=False,
                              ost=sys.stdout):
    """Get complexity information of a model.

    This method can calculate FLOPs and parameter counts of a model with
    corresponding input shape. It can also print complexity information for
    each layer in a model.

    Supported layers are listed as below:
        - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``.
        - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``,
          ``nn.LeakyReLU``, ``nn.ReLU6``.
        - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``,
          ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``,
          ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``,
          ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``,
          ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``.
        - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``,
          ``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``,
          ``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``.
        - Linear: ``nn.Linear``.
        - Deconvolution: ``nn.ConvTranspose2d``.
        - Upsample: ``nn.Upsample``.

    Args:
        model (nn.Module): The model for complexity calculation.
        input_shape (tuple): Input shape used for calculation.
        print_per_layer_stat (bool): Whether to print complexity information
            for each layer in a model. Default: True.
        as_strings (bool): Output FLOPs and params counts in a string form.
            Default: True.
        input_constructor (None | callable): If specified, it takes a callable
            method that generates input. otherwise, it will generate a random
            tensor with input shape to calculate FLOPs. Default: None.
        flush (bool): same as that in :func:`print`. Default: False.
        ost (stream): same as ``file`` param in :func:`print`.
            Default: sys.stdout.

    Returns:
        tuple[float | str]: If ``as_strings`` is set to True, it will return
        FLOPs and parameter counts in a string format. otherwise, it will
        return those in a float number format.
    """
    assert type(input_shape) is tuple
    assert len(input_shape) >= 1
    assert isinstance(model, nn.Module)
    flops_model = add_flops_counting_methods(model)
    flops_model.eval()
    flops_model.start_flops_count()
    if input_constructor:
        input = input_constructor(input_shape)
        _ = flops_model(**input)
    else:
        try:
            batch = torch.ones(()).new_empty(
                (1, *input_shape),
                dtype=next(flops_model.parameters()).dtype,
                device=next(flops_model.parameters()).device)
        except StopIteration:
            # Avoid StopIteration for models which have no parameters,
            # like `nn.Relu()`, `nn.AvgPool2d`, etc.
            batch = torch.ones(()).new_empty((1, *input_shape))

        _ = flops_model(batch)

    flops_count, params_count = flops_model.compute_average_flops_cost()
    if print_per_layer_stat:
        print_model_with_flops(
            flops_model, flops_count, params_count, ost=ost, flush=flush)
    flops_model.stop_flops_count()

    if as_strings:
        return flops_to_string(flops_count), params_to_string(params_count)

    return flops_count, params_count


def flops_to_string(flops, units='GFLOPs', precision=2):
    """Convert FLOPs number into a string.

    Note that Here we take a multiply-add counts as one FLOP.

    Args:
        flops (float): FLOPs number to be converted.
        units (str | None): Converted FLOPs units. Options are None, 'GFLOPs',
            'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically
            choose the most suitable unit for FLOPs. Default: 'GFLOPs'.
        precision (int): Digit number after the decimal point. Default: 2.

    Returns:
        str: The converted FLOPs number with units.

    Examples:
        >>> flops_to_string(1e9)
        '1.0 GFLOPs'
        >>> flops_to_string(2e5, 'MFLOPs')
        '0.2 MFLOPs'
        >>> flops_to_string(3e-9, None)
        '3e-09 FLOPs'
    """
    if units is None:
        if flops // 10**9 > 0:
            return str(round(flops / 10.**9, precision)) + ' GFLOPs'
        elif flops // 10**6 > 0:
            return str(round(flops / 10.**6, precision)) + ' MFLOPs'
        elif flops // 10**3 > 0:
            return str(round(flops / 10.**3, precision)) + ' KFLOPs'
        else:
            return str(flops) + ' FLOPs'
    else:
        if units == 'GFLOPs':
            return str(round(flops / 10.**9, precision)) + ' ' + units
        elif units == 'MFLOPs':
            return str(round(flops / 10.**6, precision)) + ' ' + units
        elif units == 'KFLOPs':
            return str(round(flops / 10.**3, precision)) + ' ' + units
        else:
            return str(flops) + ' FLOPs'


def params_to_string(num_params, units=None, precision=2):
    """Convert parameter number into a string.

    Args:
        num_params (float): Parameter number to be converted.
        units (str | None): Converted FLOPs units. Options are None, 'M',
            'K' and ''. If set to None, it will automatically choose the most
            suitable unit for Parameter number. Default: None.
        precision (int): Digit number after the decimal point. Default: 2.

    Returns:
        str: The converted parameter number with units.

    Examples:
        >>> params_to_string(1e9)
        '1000.0 M'
        >>> params_to_string(2e5)
        '200.0 k'
        >>> params_to_string(3e-9)
        '3e-09'
    """
    if units is None:
        if num_params // 10**6 > 0:
            return str(round(num_params / 10**6, precision)) + ' M'
        elif num_params // 10**3:
            return str(round(num_params / 10**3, precision)) + ' k'
        else:
            return str(num_params)
    else:
        if units == 'M':
            return str(round(num_params / 10.**6, precision)) + ' ' + units
        elif units == 'K':
            return str(round(num_params / 10.**3, precision)) + ' ' + units
        else:
            return str(num_params)


def print_model_with_flops(model,
                           total_flops,
                           total_params,
                           units='GFLOPs',
                           precision=3,
                           ost=sys.stdout,
                           flush=False):
    """Print a model with FLOPs for each layer.

    Args:
        model (nn.Module): The model to be printed.
        total_flops (float): Total FLOPs of the model.
        total_params (float): Total parameter counts of the model.
        units (str | None): Converted FLOPs units. Default: 'GFLOPs'.
        precision (int): Digit number after the decimal point. Default: 3.
        ost (stream): same as `file` param in :func:`print`.
            Default: sys.stdout.
        flush (bool): same as that in :func:`print`. Default: False.

    Example:
        >>> class ExampleModel(nn.Module):

        >>> def __init__(self):
        >>>     super().__init__()
        >>>     self.conv1 = nn.Conv2d(3, 8, 3)
        >>>     self.conv2 = nn.Conv2d(8, 256, 3)
        >>>     self.conv3 = nn.Conv2d(256, 8, 3)
        >>>     self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        >>>     self.flatten = nn.Flatten()
        >>>     self.fc = nn.Linear(8, 1)

        >>> def forward(self, x):
        >>>     x = self.conv1(x)
        >>>     x = self.conv2(x)
        >>>     x = self.conv3(x)
        >>>     x = self.avg_pool(x)
        >>>     x = self.flatten(x)
        >>>     x = self.fc(x)
        >>>     return x

        >>> model = ExampleModel()
        >>> x = (3, 16, 16)
        to print the complexity information state for each layer, you can use
        >>> get_model_complexity_info(model, x)
        or directly use
        >>> print_model_with_flops(model, 4579784.0, 37361)
        ExampleModel(
          0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs,
          (conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1))  # noqa: E501
          (conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1))
          (conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1))
          (avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1))
          (flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, )
          (fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True)
        )
    """

    def accumulate_params(self):
        if is_supported_instance(self):
            return self.__params__
        else:
            sum = 0
            for m in self.children():
                sum += m.accumulate_params()
            return sum

    def accumulate_flops(self):
        if is_supported_instance(self):
            return self.__flops__ / model.__batch_counter__
        else:
            sum = 0
            for m in self.children():
                sum += m.accumulate_flops()
            return sum

    def flops_repr(self):
        accumulated_num_params = self.accumulate_params()
        accumulated_flops_cost = self.accumulate_flops()
        return ', '.join([
            params_to_string(
                accumulated_num_params, units='M', precision=precision),
            '{:.3%} Params'.format(accumulated_num_params / total_params),
            flops_to_string(
                accumulated_flops_cost, units=units, precision=precision),
            '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops),
            self.original_extra_repr()
        ])

    def add_extra_repr(m):
        m.accumulate_flops = accumulate_flops.__get__(m)
        m.accumulate_params = accumulate_params.__get__(m)
        flops_extra_repr = flops_repr.__get__(m)
        if m.extra_repr != flops_extra_repr:
            m.original_extra_repr = m.extra_repr
            m.extra_repr = flops_extra_repr
            assert m.extra_repr != m.original_extra_repr

    def del_extra_repr(m):
        if hasattr(m, 'original_extra_repr'):
            m.extra_repr = m.original_extra_repr
            del m.original_extra_repr
        if hasattr(m, 'accumulate_flops'):
            del m.accumulate_flops

    model.apply(add_extra_repr)
    print(model, file=ost, flush=flush)
    model.apply(del_extra_repr)


def get_model_parameters_number(model):
    """Calculate parameter number of a model.

    Args:
        model (nn.module): The model for parameter number calculation.

    Returns:
        float: Parameter number of the model.
    """
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return num_params


def add_flops_counting_methods(net_main_module):
    # adding additional methods to the existing module object,
    # this is done this way so that each function has access to self object
    net_main_module.start_flops_count = start_flops_count.__get__(
        net_main_module)
    net_main_module.stop_flops_count = stop_flops_count.__get__(
        net_main_module)
    net_main_module.reset_flops_count = reset_flops_count.__get__(
        net_main_module)
    net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__(  # noqa: E501
        net_main_module)

    net_main_module.reset_flops_count()

    return net_main_module


def compute_average_flops_cost(self):
    """Compute average FLOPs cost.

    A method to compute average FLOPs cost, which will be available after
    `add_flops_counting_methods()` is called on a desired net object.

    Returns:
        float: Current mean flops consumption per image.
    """
    batches_count = self.__batch_counter__
    flops_sum = 0
    for module in self.modules():
        if is_supported_instance(module):
            flops_sum += module.__flops__
    params_sum = get_model_parameters_number(self)
    return flops_sum / batches_count, params_sum


def start_flops_count(self):
    """Activate the computation of mean flops consumption per image.

    A method to activate the computation of mean flops consumption per image.
    which will be available after ``add_flops_counting_methods()`` is called on
    a desired net object. It should be called before running the network.
    """
    add_batch_counter_hook_function(self)

    def add_flops_counter_hook_function(module):
        if is_supported_instance(module):
            if hasattr(module, '__flops_handle__'):
                return

            else:
                handle = module.register_forward_hook(
                    get_modules_mapping()[type(module)])

            module.__flops_handle__ = handle

    self.apply(partial(add_flops_counter_hook_function))


def stop_flops_count(self):
    """Stop computing the mean flops consumption per image.

    A method to stop computing the mean flops consumption per image, which will
    be available after ``add_flops_counting_methods()`` is called on a desired
    net object. It can be called to pause the computation whenever.
    """
    remove_batch_counter_hook_function(self)
    self.apply(remove_flops_counter_hook_function)


def reset_flops_count(self):
    """Reset statistics computed so far.

    A method to Reset computed statistics, which will be available after
    `add_flops_counting_methods()` is called on a desired net object.
    """
    add_batch_counter_variables_or_reset(self)
    self.apply(add_flops_counter_variable_or_reset)


# ---- Internal functions
def empty_flops_counter_hook(module, input, output):
    module.__flops__ += 0


def upsample_flops_counter_hook(module, input, output):
    output_size = output[0]
    batch_size = output_size.shape[0]
    output_elements_count = batch_size
    for val in output_size.shape[1:]:
        output_elements_count *= val
    module.__flops__ += int(output_elements_count)


def relu_flops_counter_hook(module, input, output):
    active_elements_count = output.numel()
    module.__flops__ += int(active_elements_count)


def linear_flops_counter_hook(module, input, output):
    input = input[0]
    output_last_dim = output.shape[
        -1]  # pytorch checks dimensions, so here we don't care much
    module.__flops__ += int(np.prod(input.shape) * output_last_dim)


def pool_flops_counter_hook(module, input, output):
    input = input[0]
    module.__flops__ += int(np.prod(input.shape))


def norm_flops_counter_hook(module, input, output):
    input = input[0]

    batch_flops = np.prod(input.shape)
    if (getattr(module, 'affine', False)
            or getattr(module, 'elementwise_affine', False)):
        batch_flops *= 2
    module.__flops__ += int(batch_flops)


def deconv_flops_counter_hook(conv_module, input, output):
    # Can have multiple inputs, getting the first one
    input = input[0]

    batch_size = input.shape[0]
    input_height, input_width = input.shape[2:]

    kernel_height, kernel_width = conv_module.kernel_size
    in_channels = conv_module.in_channels
    out_channels = conv_module.out_channels
    groups = conv_module.groups

    filters_per_channel = out_channels // groups
    conv_per_position_flops = (
        kernel_height * kernel_width * in_channels * filters_per_channel)

    active_elements_count = batch_size * input_height * input_width
    overall_conv_flops = conv_per_position_flops * active_elements_count
    bias_flops = 0
    if conv_module.bias is not None:
        output_height, output_width = output.shape[2:]
        bias_flops = out_channels * batch_size * output_height * output_height
    overall_flops = overall_conv_flops + bias_flops

    conv_module.__flops__ += int(overall_flops)


def conv_flops_counter_hook(conv_module, input, output):
    # Can have multiple inputs, getting the first one
    input = input[0]

    batch_size = input.shape[0]
    output_dims = list(output.shape[2:])

    kernel_dims = list(conv_module.kernel_size)
    in_channels = conv_module.in_channels
    out_channels = conv_module.out_channels
    groups = conv_module.groups

    filters_per_channel = out_channels // groups
    conv_per_position_flops = int(
        np.prod(kernel_dims)) * in_channels * filters_per_channel

    active_elements_count = batch_size * int(np.prod(output_dims))

    overall_conv_flops = conv_per_position_flops * active_elements_count

    bias_flops = 0

    if conv_module.bias is not None:

        bias_flops = out_channels * active_elements_count

    overall_flops = overall_conv_flops + bias_flops

    conv_module.__flops__ += int(overall_flops)


def batch_counter_hook(module, input, output):
    batch_size = 1
    if len(input) > 0:
        # Can have multiple inputs, getting the first one
        input = input[0]
        batch_size = len(input)
    else:
        warnings.warn('No positional inputs found for a module, '
                      'assuming batch size is 1.')
    module.__batch_counter__ += batch_size


def add_batch_counter_variables_or_reset(module):

    module.__batch_counter__ = 0


def add_batch_counter_hook_function(module):
    if hasattr(module, '__batch_counter_handle__'):
        return

    handle = module.register_forward_hook(batch_counter_hook)
    module.__batch_counter_handle__ = handle


def remove_batch_counter_hook_function(module):
    if hasattr(module, '__batch_counter_handle__'):
        module.__batch_counter_handle__.remove()
        del module.__batch_counter_handle__


def add_flops_counter_variable_or_reset(module):
    if is_supported_instance(module):
        if hasattr(module, '__flops__') or hasattr(module, '__params__'):
            warnings.warn('variables __flops__ or __params__ are already '
                          'defined for the module' + type(module).__name__ +
                          ' ptflops can affect your code!')
        module.__flops__ = 0
        module.__params__ = get_model_parameters_number(module)


def is_supported_instance(module):
    if type(module) in get_modules_mapping():
        return True
    return False


def remove_flops_counter_hook_function(module):
    if is_supported_instance(module):
        if hasattr(module, '__flops_handle__'):
            module.__flops_handle__.remove()
            del module.__flops_handle__


def get_modules_mapping():
    return {
        # convolutions
        nn.Conv1d: conv_flops_counter_hook,
        nn.Conv2d: conv_flops_counter_hook,
        mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook,
        nn.Conv3d: conv_flops_counter_hook,
        mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook,
        # activations
        nn.ReLU: relu_flops_counter_hook,
        nn.PReLU: relu_flops_counter_hook,
        nn.ELU: relu_flops_counter_hook,
        nn.LeakyReLU: relu_flops_counter_hook,
        nn.ReLU6: relu_flops_counter_hook,
        # poolings
        nn.MaxPool1d: pool_flops_counter_hook,
        nn.AvgPool1d: pool_flops_counter_hook,
        nn.AvgPool2d: pool_flops_counter_hook,
        nn.MaxPool2d: pool_flops_counter_hook,
        mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook,
        nn.MaxPool3d: pool_flops_counter_hook,
        mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook,
        nn.AvgPool3d: pool_flops_counter_hook,
        nn.AdaptiveMaxPool1d: pool_flops_counter_hook,
        nn.AdaptiveAvgPool1d: pool_flops_counter_hook,
        nn.AdaptiveMaxPool2d: pool_flops_counter_hook,
        nn.AdaptiveAvgPool2d: pool_flops_counter_hook,
        nn.AdaptiveMaxPool3d: pool_flops_counter_hook,
        nn.AdaptiveAvgPool3d: pool_flops_counter_hook,
        # normalizations
        nn.BatchNorm1d: norm_flops_counter_hook,
        nn.BatchNorm2d: norm_flops_counter_hook,
        nn.BatchNorm3d: norm_flops_counter_hook,
        nn.GroupNorm: norm_flops_counter_hook,
        nn.InstanceNorm1d: norm_flops_counter_hook,
        nn.InstanceNorm2d: norm_flops_counter_hook,
        nn.InstanceNorm3d: norm_flops_counter_hook,
        nn.LayerNorm: norm_flops_counter_hook,
        # FC
        nn.Linear: linear_flops_counter_hook,
        mmcv.cnn.bricks.Linear: linear_flops_counter_hook,
        # Upscale
        nn.Upsample: upsample_flops_counter_hook,
        # Deconvolution
        nn.ConvTranspose2d: deconv_flops_counter_hook,
        mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook,
    }


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/utils/fuse_conv_bn.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn


def _fuse_conv_bn(conv, bn):
    """Fuse conv and bn into one module.

    Args:
        conv (nn.Module): Conv to be fused.
        bn (nn.Module): BN to be fused.

    Returns:
        nn.Module: Fused module.
    """
    conv_w = conv.weight
    conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
        bn.running_mean)

    factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
    conv.weight = nn.Parameter(conv_w *
                               factor.reshape([conv.out_channels, 1, 1, 1]))
    conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
    return conv


def fuse_conv_bn(module):
    """Recursively fuse conv and bn in a module.

    During inference, the functionary of batch norm layers is turned off
    but only the mean and var alone channels are used, which exposes the
    chance to fuse it with the preceding conv layers to save computations and
    simplify network structures.

    Args:
        module (nn.Module): Module to be fused.

    Returns:
        nn.Module: Fused module.
    """
    last_conv = None
    last_conv_name = None

    for name, child in module.named_children():
        if isinstance(child,
                      (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)):
            if last_conv is None:  # only fuse BN that is after Conv
                continue
            fused_conv = _fuse_conv_bn(last_conv, child)
            module._modules[last_conv_name] = fused_conv
            # To reduce changes, set BN as Identity instead of deleting it.
            module._modules[name] = nn.Identity()
            last_conv = None
        elif isinstance(child, nn.Conv2d):
            last_conv = child
            last_conv_name = name
        else:
            fuse_conv_bn(child)
    return module


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/utils/sync_bn.py
================================================
import torch

import mmcv


class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm):
    """A general BatchNorm layer without input dimension check.

    Reproduced from @kapily's work:
    (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547)
    The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc
    is `_check_input_dim` that is designed for tensor sanity checks.
    The check has been bypassed in this class for the convenience of converting
    SyncBatchNorm.
    """

    def _check_input_dim(self, input):
        return


def revert_sync_batchnorm(module):
    """Helper function to convert all `SyncBatchNorm` (SyncBN) and
    `mmcv.ops.sync_bn.SyncBatchNorm`(MMSyncBN) layers in the model to
    `BatchNormXd` layers.

    Adapted from @kapily's work:
    (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547)

    Args:
        module (nn.Module): The module containing `SyncBatchNorm` layers.

    Returns:
        module_output: The converted module with `BatchNormXd` layers.
    """
    module_output = module
    module_checklist = [torch.nn.modules.batchnorm.SyncBatchNorm]
    if hasattr(mmcv, 'ops'):
        module_checklist.append(mmcv.ops.SyncBatchNorm)
    if isinstance(module, tuple(module_checklist)):
        module_output = _BatchNormXd(module.num_features, module.eps,
                                     module.momentum, module.affine,
                                     module.track_running_stats)
        if module.affine:
            # no_grad() may not be needed here but
            # just to be consistent with `convert_sync_batchnorm()`
            with torch.no_grad():
                module_output.weight = module.weight
                module_output.bias = module.bias
        module_output.running_mean = module.running_mean
        module_output.running_var = module.running_var
        module_output.num_batches_tracked = module.num_batches_tracked
        module_output.training = module.training
        # qconfig exists in quantized models
        if hasattr(module, 'qconfig'):
            module_output.qconfig = module.qconfig
    for name, child in module.named_children():
        module_output.add_module(name, revert_sync_batchnorm(child))
    del module
    return module_output


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/utils/weight_init.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import math
import warnings

import numpy as np
import torch
import torch.nn as nn
from torch import Tensor
from mmcv.utils import Registry, build_from_cfg, print_log, get_logger

INITIALIZERS = Registry('initializer')


def update_init_info(module, init_info):
    """Update the `_params_init_info` in the module if the value of parameters
    are changed.

    Args:
        module (obj:`nn.Module`): The module of PyTorch with a user-defined
            attribute `_params_init_info` which records the initialization
            information.
        init_info (str): The string that describes the initialization.
    """
    assert hasattr(
        module,
        '_params_init_info'), f'Can not find `_params_init_info` in {module}'
    for name, param in module.named_parameters():

        assert param in module._params_init_info, (
            f'Find a new :obj:`Parameter` '
            f'named `{name}` during executing the '
            f'`init_weights` of '
            f'`{module.__class__.__name__}`. '
            f'Please do not add or '
            f'replace parameters during executing '
            f'the `init_weights`. ')

        # The parameter has been changed during executing the
        # `init_weights` of module
        mean_value = param.data.mean()
        if module._params_init_info[param]['tmp_mean_value'] != mean_value:
            module._params_init_info[param]['init_info'] = init_info
            module._params_init_info[param]['tmp_mean_value'] = mean_value


def constant_init(module, val, bias=0):
    if hasattr(module, 'weight') and module.weight is not None:
        nn.init.constant_(module.weight, val)
    if hasattr(module, 'bias') and module.bias is not None:
        nn.init.constant_(module.bias, bias)


def xavier_init(module, gain=1, bias=0, distribution='normal'):
    assert distribution in ['uniform', 'normal']
    if hasattr(module, 'weight') and module.weight is not None:
        if distribution == 'uniform':
            nn.init.xavier_uniform_(module.weight, gain=gain)
        else:
            nn.init.xavier_normal_(module.weight, gain=gain)
    if hasattr(module, 'bias') and module.bias is not None:
        nn.init.constant_(module.bias, bias)


def normal_init(module, mean=0, std=1, bias=0):
    if hasattr(module, 'weight') and module.weight is not None:
        nn.init.normal_(module.weight, mean, std)
    if hasattr(module, 'bias') and module.bias is not None:
        nn.init.constant_(module.bias, bias)


def trunc_normal_init(module: nn.Module,
                      mean: float = 0,
                      std: float = 1,
                      a: float = -2,
                      b: float = 2,
                      bias: float = 0) -> None:
    if hasattr(module, 'weight') and module.weight is not None:
        trunc_normal_(module.weight, mean, std, a, b)  # type: ignore
    if hasattr(module, 'bias') and module.bias is not None:
        nn.init.constant_(module.bias, bias)  # type: ignore


def uniform_init(module, a=0, b=1, bias=0):
    if hasattr(module, 'weight') and module.weight is not None:
        nn.init.uniform_(module.weight, a, b)
    if hasattr(module, 'bias') and module.bias is not None:
        nn.init.constant_(module.bias, bias)


def kaiming_init(module,
                 a=0,
                 mode='fan_out',
                 nonlinearity='relu',
                 bias=0,
                 distribution='normal'):
    assert distribution in ['uniform', 'normal']
    if hasattr(module, 'weight') and module.weight is not None:
        if distribution == 'uniform':
            nn.init.kaiming_uniform_(
                module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
        else:
            nn.init.kaiming_normal_(
                module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
    if hasattr(module, 'bias') and module.bias is not None:
        nn.init.constant_(module.bias, bias)


def caffe2_xavier_init(module, bias=0):
    # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch
    # Acknowledgment to FAIR's internal code
    kaiming_init(
        module,
        a=1,
        mode='fan_in',
        nonlinearity='leaky_relu',
        bias=bias,
        distribution='uniform')


def bias_init_with_prob(prior_prob):
    """initialize conv/fc bias value according to a given probability value."""
    bias_init = float(-np.log((1 - prior_prob) / prior_prob))
    return bias_init


def _get_bases_name(m):
    return [b.__name__ for b in m.__class__.__bases__]


class BaseInit(object):

    def __init__(self, *, bias=0, bias_prob=None, layer=None):
        self.wholemodule = False
        if not isinstance(bias, (int, float)):
            raise TypeError(f'bias must be a number, but got a {type(bias)}')

        if bias_prob is not None:
            if not isinstance(bias_prob, float):
                raise TypeError(f'bias_prob type must be float, \
                    but got {type(bias_prob)}')

        if layer is not None:
            if not isinstance(layer, (str, list)):
                raise TypeError(f'layer must be a str or a list of str, \
                    but got a {type(layer)}')
        else:
            layer = []

        if bias_prob is not None:
            self.bias = bias_init_with_prob(bias_prob)
        else:
            self.bias = bias
        self.layer = [layer] if isinstance(layer, str) else layer

    def _get_init_info(self):
        info = f'{self.__class__.__name__}, bias={self.bias}'
        return info


@INITIALIZERS.register_module(name='Constant')
class ConstantInit(BaseInit):
    """Initialize module parameters with constant values.

    Args:
        val (int | float): the value to fill the weights in the module with
        bias (int | float): the value to fill the bias. Defaults to 0.
        bias_prob (float, optional): the probability for bias initialization.
            Defaults to None.
        layer (str | list[str], optional): the layer will be initialized.
            Defaults to None.
    """

    def __init__(self, val, **kwargs):
        super().__init__(**kwargs)
        self.val = val

    def __call__(self, module):

        def init(m):
            if self.wholemodule:
                constant_init(m, self.val, self.bias)
            else:
                layername = m.__class__.__name__
                basesname = _get_bases_name(m)
                if len(set(self.layer) & set([layername] + basesname)):
                    constant_init(m, self.val, self.bias)

        module.apply(init)
        if hasattr(module, '_params_init_info'):
            update_init_info(module, init_info=self._get_init_info())

    def _get_init_info(self):
        info = f'{self.__class__.__name__}: val={self.val}, bias={self.bias}'
        return info


@INITIALIZERS.register_module(name='Xavier')
class XavierInit(BaseInit):
    r"""Initialize module parameters with values according to the method
    described in `Understanding the difficulty of training deep feedforward
    neural networks - Glorot, X. & Bengio, Y. (2010).
    <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_

    Args:
        gain (int | float): an optional scaling factor. Defaults to 1.
        bias (int | float): the value to fill the bias. Defaults to 0.
        bias_prob (float, optional): the probability for bias initialization.
            Defaults to None.
        distribution (str): distribution either be ``'normal'``
            or ``'uniform'``. Defaults to ``'normal'``.
        layer (str | list[str], optional): the layer will be initialized.
            Defaults to None.
    """

    def __init__(self, gain=1, distribution='normal', **kwargs):
        super().__init__(**kwargs)
        self.gain = gain
        self.distribution = distribution

    def __call__(self, module):

        def init(m):
            if self.wholemodule:
                xavier_init(m, self.gain, self.bias, self.distribution)
            else:
                layername = m.__class__.__name__
                basesname = _get_bases_name(m)
                if len(set(self.layer) & set([layername] + basesname)):
                    xavier_init(m, self.gain, self.bias, self.distribution)

        module.apply(init)
        if hasattr(module, '_params_init_info'):
            update_init_info(module, init_info=self._get_init_info())

    def _get_init_info(self):
        info = f'{self.__class__.__name__}: gain={self.gain}, ' \
               f'distribution={self.distribution}, bias={self.bias}'
        return info


@INITIALIZERS.register_module(name='Normal')
class NormalInit(BaseInit):
    r"""Initialize module parameters with the values drawn from the normal
    distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.

    Args:
        mean (int | float):the mean of the normal distribution. Defaults to 0.
        std (int | float): the standard deviation of the normal distribution.
            Defaults to 1.
        bias (int | float): the value to fill the bias. Defaults to 0.
        bias_prob (float, optional): the probability for bias initialization.
            Defaults to None.
        layer (str | list[str], optional): the layer will be initialized.
            Defaults to None.

    """

    def __init__(self, mean=0, std=1, **kwargs):
        super().__init__(**kwargs)
        self.mean = mean
        self.std = std

    def __call__(self, module):

        def init(m):
            if self.wholemodule:
                normal_init(m, self.mean, self.std, self.bias)
            else:
                layername = m.__class__.__name__
                basesname = _get_bases_name(m)
                if len(set(self.layer) & set([layername] + basesname)):
                    normal_init(m, self.mean, self.std, self.bias)

        module.apply(init)
        if hasattr(module, '_params_init_info'):
            update_init_info(module, init_info=self._get_init_info())

    def _get_init_info(self):
        info = f'{self.__class__.__name__}: mean={self.mean},' \
               f' std={self.std}, bias={self.bias}'
        return info


@INITIALIZERS.register_module(name='TruncNormal')
class TruncNormalInit(BaseInit):
    r"""Initialize module parameters with the values drawn from the normal
    distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values
    outside :math:`[a, b]`.

    Args:
        mean (float): the mean of the normal distribution. Defaults to 0.
        std (float):  the standard deviation of the normal distribution.
            Defaults to 1.
        a (float): The minimum cutoff value.
        b ( float): The maximum cutoff value.
        bias (float): the value to fill the bias. Defaults to 0.
        bias_prob (float, optional): the probability for bias initialization.
            Defaults to None.
        layer (str | list[str], optional): the layer will be initialized.
            Defaults to None.

    """

    def __init__(self,
                 mean: float = 0,
                 std: float = 1,
                 a: float = -2,
                 b: float = 2,
                 **kwargs) -> None:
        super().__init__(**kwargs)
        self.mean = mean
        self.std = std
        self.a = a
        self.b = b

    def __call__(self, module: nn.Module) -> None:

        def init(m):
            if self.wholemodule:
                trunc_normal_init(m, self.mean, self.std, self.a, self.b,
                                  self.bias)
            else:
                layername = m.__class__.__name__
                basesname = _get_bases_name(m)
                if len(set(self.layer) & set([layername] + basesname)):
                    trunc_normal_init(m, self.mean, self.std, self.a, self.b,
                                      self.bias)

        module.apply(init)
        if hasattr(module, '_params_init_info'):
            update_init_info(module, init_info=self._get_init_info())

    def _get_init_info(self):
        info = f'{self.__class__.__name__}: a={self.a}, b={self.b},' \
               f' mean={self.mean}, std={self.std}, bias={self.bias}'
        return info


@INITIALIZERS.register_module(name='Uniform')
class UniformInit(BaseInit):
    r"""Initialize module parameters with values drawn from the uniform
    distribution :math:`\mathcal{U}(a, b)`.

    Args:
        a (int | float): the lower bound of the uniform distribution.
            Defaults to 0.
        b (int | float): the upper bound of the uniform distribution.
            Defaults to 1.
        bias (int | float): the value to fill the bias. Defaults to 0.
        bias_prob (float, optional): the probability for bias initialization.
            Defaults to None.
        layer (str | list[str], optional): the layer will be initialized.
            Defaults to None.
    """

    def __init__(self, a=0, b=1, **kwargs):
        super().__init__(**kwargs)
        self.a = a
        self.b = b

    def __call__(self, module):

        def init(m):
            if self.wholemodule:
                uniform_init(m, self.a, self.b, self.bias)
            else:
                layername = m.__class__.__name__
                basesname = _get_bases_name(m)
                if len(set(self.layer) & set([layername] + basesname)):
                    uniform_init(m, self.a, self.b, self.bias)

        module.apply(init)
        if hasattr(module, '_params_init_info'):
            update_init_info(module, init_info=self._get_init_info())

    def _get_init_info(self):
        info = f'{self.__class__.__name__}: a={self.a},' \
               f' b={self.b}, bias={self.bias}'
        return info


@INITIALIZERS.register_module(name='Kaiming')
class KaimingInit(BaseInit):
    r"""Initialize module parameters with the values according to the method
    described in `Delving deep into rectifiers: Surpassing human-level
    performance on ImageNet classification - He, K. et al. (2015).
    <https://www.cv-foundation.org/openaccess/content_iccv_2015/
    papers/He_Delving_Deep_into_ICCV_2015_paper.pdf>`_

    Args:
        a (int | float): the negative slope of the rectifier used after this
            layer (only used with ``'leaky_relu'``). Defaults to 0.
        mode (str):  either ``'fan_in'`` or ``'fan_out'``. Choosing
            ``'fan_in'`` preserves the magnitude of the variance of the weights
            in the forward pass. Choosing ``'fan_out'`` preserves the
            magnitudes in the backwards pass. Defaults to ``'fan_out'``.
        nonlinearity (str): the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` .
            Defaults to 'relu'.
        bias (int | float): the value to fill the bias. Defaults to 0.
        bias_prob (float, optional): the probability for bias initialization.
            Defaults to None.
        distribution (str): distribution either be ``'normal'`` or
            ``'uniform'``. Defaults to ``'normal'``.
        layer (str | list[str], optional): the layer will be initialized.
            Defaults to None.
    """

    def __init__(self,
                 a=0,
                 mode='fan_out',
                 nonlinearity='relu',
                 distribution='normal',
                 **kwargs):
        super().__init__(**kwargs)
        self.a = a
        self.mode = mode
        self.nonlinearity = nonlinearity
        self.distribution = distribution

    def __call__(self, module):

        def init(m):
            if self.wholemodule:
                kaiming_init(m, self.a, self.mode, self.nonlinearity,
                             self.bias, self.distribution)
            else:
                layername = m.__class__.__name__
                basesname = _get_bases_name(m)
                if len(set(self.layer) & set([layername] + basesname)):
                    kaiming_init(m, self.a, self.mode, self.nonlinearity,
                                 self.bias, self.distribution)

        module.apply(init)
        if hasattr(module, '_params_init_info'):
            update_init_info(module, init_info=self._get_init_info())

    def _get_init_info(self):
        info = f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' \
               f'nonlinearity={self.nonlinearity}, ' \
               f'distribution ={self.distribution}, bias={self.bias}'
        return info


@INITIALIZERS.register_module(name='Caffe2Xavier')
class Caffe2XavierInit(KaimingInit):
    # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch
    # Acknowledgment to FAIR's internal code
    def __init__(self, **kwargs):
        super().__init__(
            a=1,
            mode='fan_in',
            nonlinearity='leaky_relu',
            distribution='uniform',
            **kwargs)

    def __call__(self, module):
        super().__call__(module)


@INITIALIZERS.register_module(name='Pretrained')
class PretrainedInit(object):
    """Initialize module by loading a pretrained model.

    Args:
        checkpoint (str): the checkpoint file of the pretrained model should
            be load.
        prefix (str, optional): the prefix of a sub-module in the pretrained
            model. it is for loading a part of the pretrained model to
            initialize. For example, if we would like to only load the
            backbone of a detector model, we can set ``prefix='backbone.'``.
            Defaults to None.
        map_location (str): map tensors into proper locations.
    """

    def __init__(self, checkpoint, prefix=None, map_location=None):
        self.checkpoint = checkpoint
        self.prefix = prefix
        self.map_location = map_location

    def __call__(self, module):
        from mmcv.runner import (_load_checkpoint_with_prefix, load_checkpoint,
                                 load_state_dict)
        logger = get_logger()
        if self.prefix is None:
            print_log(f'load model from: {self.checkpoint}', logger=logger)
            load_checkpoint(
                module,
                self.checkpoint,
                map_location=self.map_location,
                strict=False,
                logger=logger)
        else:
            print_log(
                f'load {self.prefix} in model from: {self.checkpoint}',
                logger=logger)
            state_dict = _load_checkpoint_with_prefix(
                self.prefix, self.checkpoint, map_location=self.map_location)
            load_state_dict(module, state_dict, strict=False, logger=logger)

        if hasattr(module, '_params_init_info'):
            update_init_info(module, init_info=self._get_init_info())

    def _get_init_info(self):
        info = f'{self.__class__.__name__}: load from {self.checkpoint}'
        return info


def _initialize(module, cfg, wholemodule=False):
    func = build_from_cfg(cfg, INITIALIZERS)
    # wholemodule flag is for override mode, there is no layer key in override
    # and initializer will give init values for the whole module with the name
    # in override.
    func.wholemodule = wholemodule
    func(module)


def _initialize_override(module, override, cfg):
    if not isinstance(override, (dict, list)):
        raise TypeError(f'override must be a dict or a list of dict, \
                but got {type(override)}')

    override = [override] if isinstance(override, dict) else override

    for override_ in override:

        cp_override = copy.deepcopy(override_)
        name = cp_override.pop('name', None)
        if name is None:
            raise ValueError('`override` must contain the key "name",'
                             f'but got {cp_override}')
        # if override only has name key, it means use args in init_cfg
        if not cp_override:
            cp_override.update(cfg)
        # if override has name key and other args except type key, it will
        # raise error
        elif 'type' not in cp_override.keys():
            raise ValueError(
                f'`override` need "type" key, but got {cp_override}')

        if hasattr(module, name):
            _initialize(getattr(module, name), cp_override, wholemodule=True)
        else:
            raise RuntimeError(f'module did not have attribute {name}, '
                               f'but init_cfg is {cp_override}.')


def initialize(module, init_cfg):
    r"""Initialize a module.

    Args:
        module (``torch.nn.Module``): the module will be initialized.
        init_cfg (dict | list[dict]): initialization configuration dict to
            define initializer. OpenMMLab has implemented 6 initializers
            including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``,
            ``Kaiming``, and ``Pretrained``.

    Example:
        >>> module = nn.Linear(2, 3, bias=True)
        >>> init_cfg = dict(type='Constant', layer='Linear', val =1 , bias =2)
        >>> initialize(module, init_cfg)

        >>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2))
        >>> # define key ``'layer'`` for initializing layer with different
        >>> # configuration
        >>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1),
                dict(type='Constant', layer='Linear', val=2)]
        >>> initialize(module, init_cfg)

        >>> # define key``'override'`` to initialize some specific part in
        >>> # module
        >>> class FooNet(nn.Module):
        >>>     def __init__(self):
        >>>         super().__init__()
        >>>         self.feat = nn.Conv2d(3, 16, 3)
        >>>         self.reg = nn.Conv2d(16, 10, 3)
        >>>         self.cls = nn.Conv2d(16, 5, 3)
        >>> model = FooNet()
        >>> init_cfg = dict(type='Constant', val=1, bias=2, layer='Conv2d',
        >>>     override=dict(type='Constant', name='reg', val=3, bias=4))
        >>> initialize(model, init_cfg)

        >>> model = ResNet(depth=50)
        >>> # Initialize weights with the pretrained model.
        >>> init_cfg = dict(type='Pretrained',
                checkpoint='torchvision://resnet50')
        >>> initialize(model, init_cfg)

        >>> # Initialize weights of a sub-module with the specific part of
        >>> # a pretrained model by using "prefix".
        >>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\
        >>>     'retinanet_r50_fpn_1x_coco/'\
        >>>     'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth'
        >>> init_cfg = dict(type='Pretrained',
                checkpoint=url, prefix='backbone.')
    """
    if not isinstance(init_cfg, (dict, list)):
        raise TypeError(f'init_cfg must be a dict or a list of dict, \
                but got {type(init_cfg)}')

    if isinstance(init_cfg, dict):
        init_cfg = [init_cfg]

    for cfg in init_cfg:
        # should deeply copy the original config because cfg may be used by
        # other modules, e.g., one init_cfg shared by multiple bottleneck
        # blocks, the expected cfg will be changed after pop and will change
        # the initialization behavior of other modules
        cp_cfg = copy.deepcopy(cfg)
        override = cp_cfg.pop('override', None)
        _initialize(module, cp_cfg)

        if override is not None:
            cp_cfg.pop('layer', None)
            _initialize_override(module, override, cp_cfg)
        else:
            # All attributes in module have same initialization.
            pass


def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float,
                           b: float) -> Tensor:
    # Method based on
    # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
    # Modified from
    # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py
    def norm_cdf(x):
        # Computes standard normal cumulative distribution function
        return (1. + math.erf(x / math.sqrt(2.))) / 2.

    if (mean < a - 2 * std) or (mean > b + 2 * std):
        warnings.warn(
            'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. '
            'The distribution of values may be incorrect.',
            stacklevel=2)

    with torch.no_grad():
        # Values are generated by using a truncated uniform distribution and
        # then using the inverse CDF for the normal distribution.
        # Get upper and lower cdf values
        lower = norm_cdf((a - mean) / std)
        upper = norm_cdf((b - mean) / std)

        # Uniformly fill tensor with values from [lower, upper], then translate
        # to [2lower-1, 2upper-1].
        tensor.uniform_(2 * lower - 1, 2 * upper - 1)

        # Use inverse cdf transform for normal distribution to get truncated
        # standard normal
        tensor.erfinv_()

        # Transform to proper mean, std
        tensor.mul_(std * math.sqrt(2.))
        tensor.add_(mean)

        # Clamp to ensure it's in the proper range
        tensor.clamp_(min=a, max=b)
        return tensor


def trunc_normal_(tensor: Tensor,
                  mean: float = 0.,
                  std: float = 1.,
                  a: float = -2.,
                  b: float = 2.) -> Tensor:
    r"""Fills the input Tensor with values drawn from a truncated
    normal distribution. The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
    with values outside :math:`[a, b]` redrawn until they are within
    the bounds. The method used for generating the random values works
    best when :math:`a \leq \text{mean} \leq b`.

    Modified from
    https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py

    Args:
        tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`.
        mean (float): the mean of the normal distribution.
        std (float): the standard deviation of the normal distribution.
        a (float): the minimum cutoff value.
        b (float): the maximum cutoff value.
    """
    return _no_grad_trunc_normal_(tensor, mean, std, a, b)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/cnn/vgg.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import logging

import torch.nn as nn

from .utils import constant_init, kaiming_init, normal_init


def conv3x3(in_planes, out_planes, dilation=1):
    """3x3 convolution with padding."""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        padding=dilation,
        dilation=dilation)


def make_vgg_layer(inplanes,
                   planes,
                   num_blocks,
                   dilation=1,
                   with_bn=False,
                   ceil_mode=False):
    layers = []
    for _ in range(num_blocks):
        layers.append(conv3x3(inplanes, planes, dilation))
        if with_bn:
            layers.append(nn.BatchNorm2d(planes))
        layers.append(nn.ReLU(inplace=True))
        inplanes = planes
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))

    return layers


class VGG(nn.Module):
    """VGG backbone.

    Args:
        depth (int): Depth of vgg, from {11, 13, 16, 19}.
        with_bn (bool): Use BatchNorm or not.
        num_classes (int): number of classes for classification.
        num_stages (int): VGG stages, normally 5.
        dilations (Sequence[int]): Dilation of each stage.
        out_indices (Sequence[int]): Output from which stages.
        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
            not freezing any parameters.
        bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
            running stats (mean and var).
        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
    """

    arch_settings = {
        11: (1, 1, 2, 2, 2),
        13: (2, 2, 2, 2, 2),
        16: (2, 2, 3, 3, 3),
        19: (2, 2, 4, 4, 4)
    }

    def __init__(self,
                 depth,
                 with_bn=False,
                 num_classes=-1,
                 num_stages=5,
                 dilations=(1, 1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3, 4),
                 frozen_stages=-1,
                 bn_eval=True,
                 bn_frozen=False,
                 ceil_mode=False,
                 with_last_pool=True):
        super(VGG, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for vgg')
        assert num_stages >= 1 and num_stages <= 5
        stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        assert len(dilations) == num_stages
        assert max(out_indices) <= num_stages

        self.num_classes = num_classes
        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen

        self.inplanes = 3
        start_idx = 0
        vgg_layers = []
        self.range_sub_modules = []
        for i, num_blocks in enumerate(self.stage_blocks):
            num_modules = num_blocks * (2 + with_bn) + 1
            end_idx = start_idx + num_modules
            dilation = dilations[i]
            planes = 64 * 2**i if i < 4 else 512
            vgg_layer = make_vgg_layer(
                self.inplanes,
                planes,
                num_blocks,
                dilation=dilation,
                with_bn=with_bn,
                ceil_mode=ceil_mode)
            vgg_layers.extend(vgg_layer)
            self.inplanes = planes
            self.range_sub_modules.append([start_idx, end_idx])
            start_idx = end_idx
        if not with_last_pool:
            vgg_layers.pop(-1)
            self.range_sub_modules[-1][1] -= 1
        self.module_name = 'features'
        self.add_module(self.module_name, nn.Sequential(*vgg_layers))

        if self.num_classes > 0:
            self.classifier = nn.Sequential(
                nn.Linear(512 * 7 * 7, 4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096, 4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096, num_classes),
            )

    def init_weights(self, pretrained=None):
        if isinstance(pretrained, str):
            logger = logging.getLogger()
            from ..runner import load_checkpoint
            load_checkpoint(self, pretrained, strict=False, logger=logger)
        elif pretrained is None:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, nn.BatchNorm2d):
                    constant_init(m, 1)
                elif isinstance(m, nn.Linear):
                    normal_init(m, std=0.01)
        else:
            raise TypeError('pretrained must be a str or None')

    def forward(self, x):
        outs = []
        vgg_layers = getattr(self, self.module_name)
        for i in range(len(self.stage_blocks)):
            for j in range(*self.range_sub_modules[i]):
                vgg_layer = vgg_layers[j]
                x = vgg_layer(x)
            if i in self.out_indices:
                outs.append(x)
        if self.num_classes > 0:
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            outs.append(x)
        if len(outs) == 1:
            return outs[0]
        else:
            return tuple(outs)

    def train(self, mode=True):
        super(VGG, self).train(mode)
        if self.bn_eval:
            for m in self.modules():
                if isinstance(m, nn.BatchNorm2d):
                    m.eval()
                    if self.bn_frozen:
                        for params in m.parameters():
                            params.requires_grad = False
        vgg_layers = getattr(self, self.module_name)
        if mode and self.frozen_stages >= 0:
            for i in range(self.frozen_stages):
                for j in range(*self.range_sub_modules[i]):
                    mod = vgg_layers[j]
                    mod.eval()
                    for param in mod.parameters():
                        param.requires_grad = False


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/engine/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .test import (collect_results_cpu, collect_results_gpu, multi_gpu_test,
                   single_gpu_test)

__all__ = [
    'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test',
    'single_gpu_test'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/engine/test.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import pickle
import shutil
import tempfile
import time

import torch
import torch.distributed as dist

import mmcv
from mmcv.runner import get_dist_info


def single_gpu_test(model, data_loader):
    """Test model with a single gpu.

    This method tests model with a single gpu and displays test progress bar.

    Args:
        model (nn.Module): Model to be tested.
        data_loader (nn.Dataloader): Pytorch data loader.

    Returns:
        list: The prediction results.
    """
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for data in data_loader:
        with torch.no_grad():
            result = model(return_loss=False, **data)
        results.extend(result)

        # Assume result has the same length of batch_size
        # refer to https://github.com/open-mmlab/mmcv/issues/985
        batch_size = len(result)
        for _ in range(batch_size):
            prog_bar.update()
    return results


def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
    """Test model with multiple gpus.

    This method tests model with multiple gpus and collects the results
    under two different modes: gpu and cpu modes. By setting
    ``gpu_collect=True``, it encodes results to gpu tensors and use gpu
    communication for results collection. On cpu mode it saves the results on
    different gpus to ``tmpdir`` and collects them by the rank 0 worker.

    Args:
        model (nn.Module): Model to be tested.
        data_loader (nn.Dataloader): Pytorch data loader.
        tmpdir (str): Path of directory to save the temporary results from
            different gpus under cpu mode.
        gpu_collect (bool): Option to use either gpu or cpu to collect results.

    Returns:
        list: The prediction results.
    """
    model.eval()
    results = []
    dataset = data_loader.dataset
    rank, world_size = get_dist_info()
    if rank == 0:
        prog_bar = mmcv.ProgressBar(len(dataset))
    time.sleep(2)  # This line can prevent deadlock problem in some cases.
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, **data)
        results.extend(result)

        if rank == 0:
            batch_size = len(result)
            batch_size_all = batch_size * world_size
            if batch_size_all + prog_bar.completed > len(dataset):
                batch_size_all = len(dataset) - prog_bar.completed
            for _ in range(batch_size_all):
                prog_bar.update()

    # collect results from all ranks
    if gpu_collect:
        results = collect_results_gpu(results, len(dataset))
    else:
        results = collect_results_cpu(results, len(dataset), tmpdir)
    return results


def collect_results_cpu(result_part, size, tmpdir=None):
    """Collect results under cpu mode.

    On cpu mode, this function will save the results on different gpus to
    ``tmpdir`` and collect them by the rank 0 worker.

    Args:
        result_part (list): Result list containing result parts
            to be collected.
        size (int): Size of the results, commonly equal to length of
            the results.
        tmpdir (str | None): temporal directory for collected results to
            store. If set to None, it will create a random temporal directory
            for it.

    Returns:
        list: The collected results.
    """
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            mmcv.mkdir_or_exist('.dist_test')
            tmpdir = tempfile.mkdtemp(dir='.dist_test')
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, f'part_{i}.pkl')
            part_result = mmcv.load(part_file)
            # When data is severely insufficient, an empty part_result
            # on a certain gpu could makes the overall outputs empty.
            if part_result:
                part_list.append(part_result)
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results


def collect_results_gpu(result_part, size):
    """Collect results under gpu mode.

    On gpu mode, this function will encode results to gpu tensors and use gpu
    communication for results collection.

    Args:
        result_part (list): Result list containing result parts
            to be collected.
        size (int): Size of the results, commonly equal to length of
            the results.

    Returns:
        list: The collected results.
    """
    rank, world_size = get_dist_info()
    # dump result part to tensor with pickle
    part_tensor = torch.tensor(
        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
    # gather all result part tensor shape
    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
    shape_list = [shape_tensor.clone() for _ in range(world_size)]
    dist.all_gather(shape_list, shape_tensor)
    # padding result part tensor to max length
    shape_max = torch.tensor(shape_list).max()
    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
    part_send[:shape_tensor[0]] = part_tensor
    part_recv_list = [
        part_tensor.new_zeros(shape_max) for _ in range(world_size)
    ]
    # gather all result part
    dist.all_gather(part_recv_list, part_send)

    if rank == 0:
        part_list = []
        for recv, shape in zip(part_recv_list, shape_list):
            part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())
            # When data is severely insufficient, an empty part_result
            # on a certain gpu could makes the overall outputs empty.
            if part_result:
                part_list.append(part_result)
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        return ordered_results


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .file_client import BaseStorageBackend, FileClient
from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
from .io import dump, load, register_handler
from .parse import dict_from_file, list_from_file

__all__ = [
    'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler',
    'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler',
    'list_from_file', 'dict_from_file'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/file_client.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import inspect
import os
import os.path as osp
import re
import tempfile
import warnings
from abc import ABCMeta, abstractmethod
from contextlib import contextmanager
from pathlib import Path
from typing import Iterable, Iterator, Optional, Tuple, Union
from urllib.request import urlopen

import mmcv
from mmcv.utils.misc import has_method
from mmcv.utils.path import is_filepath


class BaseStorageBackend(metaclass=ABCMeta):
    """Abstract class of storage backends.

    All backends need to implement two apis: ``get()`` and ``get_text()``.
    ``get()`` reads the file as a byte stream and ``get_text()`` reads the file
    as texts.
    """

    # a flag to indicate whether the backend can create a symlink for a file
    _allow_symlink = False

    @property
    def name(self):
        return self.__class__.__name__

    @property
    def allow_symlink(self):
        return self._allow_symlink

    @abstractmethod
    def get(self, filepath):
        pass

    @abstractmethod
    def get_text(self, filepath):
        pass


class CephBackend(BaseStorageBackend):
    """Ceph storage backend (for internal use).

    Args:
        path_mapping (dict|None): path mapping dict from local path to Petrel
            path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath``
            will be replaced by ``dst``. Default: None.

    .. warning::
        :class:`mmcv.fileio.file_client.CephBackend` will be deprecated,
        please use :class:`mmcv.fileio.file_client.PetrelBackend` instead.
    """

    def __init__(self, path_mapping=None):
        try:
            import ceph
        except ImportError:
            raise ImportError('Please install ceph to enable CephBackend.')

        warnings.warn(
            'CephBackend will be deprecated, please use PetrelBackend instead',
            DeprecationWarning)
        self._client = ceph.S3Client()
        assert isinstance(path_mapping, dict) or path_mapping is None
        self.path_mapping = path_mapping

    def get(self, filepath):
        filepath = str(filepath)
        if self.path_mapping is not None:
            for k, v in self.path_mapping.items():
                filepath = filepath.replace(k, v)
        value = self._client.Get(filepath)
        value_buf = memoryview(value)
        return value_buf

    def get_text(self, filepath, encoding=None):
        raise NotImplementedError


class PetrelBackend(BaseStorageBackend):
    """Petrel storage backend (for internal use).

    PetrelBackend supports reading and writing data to multiple clusters.
    If the file path contains the cluster name, PetrelBackend will read data
    from specified cluster or write data to it. Otherwise, PetrelBackend will
    access the default cluster.

    Args:
        path_mapping (dict, optional): Path mapping dict from local path to
            Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in
            ``filepath`` will be replaced by ``dst``. Default: None.
        enable_mc (bool, optional): Whether to enable memcached support.
            Default: True.

    Examples:
        >>> filepath1 = 's3://path/of/file'
        >>> filepath2 = 'cluster-name:s3://path/of/file'
        >>> client = PetrelBackend()
        >>> client.get(filepath1)  # get data from default cluster
        >>> client.get(filepath2)  # get data from 'cluster-name' cluster
    """

    def __init__(self,
                 path_mapping: Optional[dict] = None,
                 enable_mc: bool = True):
        try:
            from petrel_client import client
        except ImportError:
            raise ImportError('Please install petrel_client to enable '
                              'PetrelBackend.')

        self._client = client.Client(enable_mc=enable_mc)
        assert isinstance(path_mapping, dict) or path_mapping is None
        self.path_mapping = path_mapping

    def _map_path(self, filepath: Union[str, Path]) -> str:
        """Map ``filepath`` to a string path whose prefix will be replaced by
        :attr:`self.path_mapping`.

        Args:
            filepath (str): Path to be mapped.
        """
        filepath = str(filepath)
        if self.path_mapping is not None:
            for k, v in self.path_mapping.items():
                filepath = filepath.replace(k, v)
        return filepath

    def _format_path(self, filepath: str) -> str:
        """Convert a ``filepath`` to standard format of petrel oss.

        If the ``filepath`` is concatenated by ``os.path.join``, in a Windows
        environment, the ``filepath`` will be the format of
        's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the
        above ``filepath`` will be converted to 's3://bucket_name/image.jpg'.

        Args:
            filepath (str): Path to be formatted.
        """
        return re.sub(r'\\+', '/', filepath)

    def get(self, filepath: Union[str, Path]) -> memoryview:
        """Read data from a given ``filepath`` with 'rb' mode.

        Args:
            filepath (str or Path): Path to read data.

        Returns:
            memoryview: A memory view of expected bytes object to avoid
                copying. The memoryview object can be converted to bytes by
                ``value_buf.tobytes()``.
        """
        filepath = self._map_path(filepath)
        filepath = self._format_path(filepath)
        value = self._client.Get(filepath)
        value_buf = memoryview(value)
        return value_buf

    def get_text(self,
                 filepath: Union[str, Path],
                 encoding: str = 'utf-8') -> str:
        """Read data from a given ``filepath`` with 'r' mode.

        Args:
            filepath (str or Path): Path to read data.
            encoding (str): The encoding format used to open the ``filepath``.
                Default: 'utf-8'.

        Returns:
            str: Expected text reading from ``filepath``.
        """
        return str(self.get(filepath), encoding=encoding)

    def put(self, obj: bytes, filepath: Union[str, Path]) -> None:
        """Save data to a given ``filepath``.

        Args:
            obj (bytes): Data to be saved.
            filepath (str or Path): Path to write data.
        """
        filepath = self._map_path(filepath)
        filepath = self._format_path(filepath)
        self._client.put(filepath, obj)

    def put_text(self,
                 obj: str,
                 filepath: Union[str, Path],
                 encoding: str = 'utf-8') -> None:
        """Save data to a given ``filepath``.

        Args:
            obj (str): Data to be written.
            filepath (str or Path): Path to write data.
            encoding (str): The encoding format used to encode the ``obj``.
                Default: 'utf-8'.
        """
        self.put(bytes(obj, encoding=encoding), filepath)

    def remove(self, filepath: Union[str, Path]) -> None:
        """Remove a file.

        Args:
            filepath (str or Path): Path to be removed.
        """
        if not has_method(self._client, 'delete'):
            raise NotImplementedError(
                ('Current version of Petrel Python SDK has not supported '
                 'the `delete` method, please use a higher version or dev'
                 ' branch instead.'))

        filepath = self._map_path(filepath)
        filepath = self._format_path(filepath)
        self._client.delete(filepath)

    def exists(self, filepath: Union[str, Path]) -> bool:
        """Check whether a file path exists.

        Args:
            filepath (str or Path): Path to be checked whether exists.

        Returns:
            bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise.
        """
        if not (has_method(self._client, 'contains')
                and has_method(self._client, 'isdir')):
            raise NotImplementedError(
                ('Current version of Petrel Python SDK has not supported '
                 'the `contains` and `isdir` methods, please use a higher'
                 'version or dev branch instead.'))

        filepath = self._map_path(filepath)
        filepath = self._format_path(filepath)
        return self._client.contains(filepath) or self._client.isdir(filepath)

    def isdir(self, filepath: Union[str, Path]) -> bool:
        """Check whether a file path is a directory.

        Args:
            filepath (str or Path): Path to be checked whether it is a
                directory.

        Returns:
            bool: Return ``True`` if ``filepath`` points to a directory,
            ``False`` otherwise.
        """
        if not has_method(self._client, 'isdir'):
            raise NotImplementedError(
                ('Current version of Petrel Python SDK has not supported '
                 'the `isdir` method, please use a higher version or dev'
                 ' branch instead.'))

        filepath = self._map_path(filepath)
        filepath = self._format_path(filepath)
        return self._client.isdir(filepath)

    def isfile(self, filepath: Union[str, Path]) -> bool:
        """Check whether a file path is a file.

        Args:
            filepath (str or Path): Path to be checked whether it is a file.

        Returns:
            bool: Return ``True`` if ``filepath`` points to a file, ``False``
            otherwise.
        """
        if not has_method(self._client, 'contains'):
            raise NotImplementedError(
                ('Current version of Petrel Python SDK has not supported '
                 'the `contains` method, please use a higher version or '
                 'dev branch instead.'))

        filepath = self._map_path(filepath)
        filepath = self._format_path(filepath)
        return self._client.contains(filepath)

    def join_path(self, filepath: Union[str, Path],
                  *filepaths: Union[str, Path]) -> str:
        """Concatenate all file paths.

        Args:
            filepath (str or Path): Path to be concatenated.

        Returns:
            str: The result after concatenation.
        """
        filepath = self._format_path(self._map_path(filepath))
        if filepath.endswith('/'):
            filepath = filepath[:-1]
        formatted_paths = [filepath]
        for path in filepaths:
            formatted_paths.append(self._format_path(self._map_path(path)))
        return '/'.join(formatted_paths)

    @contextmanager
    def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]:
        """Download a file from ``filepath`` and return a temporary path.

        ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
        can be called with ``with`` statement, and when exists from the
        ``with`` statement, the temporary path will be released.

        Args:
            filepath (str | Path): Download a file from ``filepath``.

        Examples:
            >>> client = PetrelBackend()
            >>> # After existing from the ``with`` clause,
            >>> # the path will be removed
            >>> with client.get_local_path('s3://path/of/your/file') as path:
            ...     # do something here

        Yields:
            Iterable[str]: Only yield one temporary path.
        """
        filepath = self._map_path(filepath)
        filepath = self._format_path(filepath)
        assert self.isfile(filepath)
        try:
            f = tempfile.NamedTemporaryFile(delete=False)
            f.write(self.get(filepath))
            f.close()
            yield f.name
        finally:
            os.remove(f.name)

    def list_dir_or_file(self,
                         dir_path: Union[str, Path],
                         list_dir: bool = True,
                         list_file: bool = True,
                         suffix: Optional[Union[str, Tuple[str]]] = None,
                         recursive: bool = False) -> Iterator[str]:
        """Scan a directory to find the interested directories or files in
        arbitrary order.

        Note:
            Petrel has no concept of directories but it simulates the directory
            hierarchy in the filesystem through public prefixes. In addition,
            if the returned path ends with '/', it means the path is a public
            prefix which is a logical directory.

        Note:
            :meth:`list_dir_or_file` returns the path relative to ``dir_path``.
            In addition, the returned path of directory will not contains the
            suffix '/' which is consistent with other backends.

        Args:
            dir_path (str | Path): Path of the directory.
            list_dir (bool): List the directories. Default: True.
            list_file (bool): List the path of files. Default: True.
            suffix (str or tuple[str], optional):  File suffix
                that we are interested in. Default: None.
            recursive (bool): If set to True, recursively scan the
                directory. Default: False.

        Yields:
            Iterable[str]: A relative path to ``dir_path``.
        """
        if not has_method(self._client, 'list'):
            raise NotImplementedError(
                ('Current version of Petrel Python SDK has not supported '
                 'the `list` method, please use a higher version or dev'
                 ' branch instead.'))

        dir_path = self._map_path(dir_path)
        dir_path = self._format_path(dir_path)
        if list_dir and suffix is not None:
            raise TypeError(
                '`list_dir` should be False when `suffix` is not None')

        if (suffix is not None) and not isinstance(suffix, (str, tuple)):
            raise TypeError('`suffix` must be a string or tuple of strings')

        # Petrel's simulated directory hierarchy assumes that directory paths
        # should end with `/`
        if not dir_path.endswith('/'):
            dir_path += '/'

        root = dir_path

        def _list_dir_or_file(dir_path, list_dir, list_file, suffix,
                              recursive):
            for path in self._client.list(dir_path):
                # the `self.isdir` is not used here to determine whether path
                # is a directory, because `self.isdir` relies on
                # `self._client.list`
                if path.endswith('/'):  # a directory path
                    next_dir_path = self.join_path(dir_path, path)
                    if list_dir:
                        # get the relative path and exclude the last
                        # character '/'
                        rel_dir = next_dir_path[len(root):-1]
                        yield rel_dir
                    if recursive:
                        yield from _list_dir_or_file(next_dir_path, list_dir,
                                                     list_file, suffix,
                                                     recursive)
                else:  # a file path
                    absolute_path = self.join_path(dir_path, path)
                    rel_path = absolute_path[len(root):]
                    if (suffix is None
                            or rel_path.endswith(suffix)) and list_file:
                        yield rel_path

        return _list_dir_or_file(dir_path, list_dir, list_file, suffix,
                                 recursive)


class MemcachedBackend(BaseStorageBackend):
    """Memcached storage backend.

    Attributes:
        server_list_cfg (str): Config file for memcached server list.
        client_cfg (str): Config file for memcached client.
        sys_path (str | None): Additional path to be appended to `sys.path`.
            Default: None.
    """

    def __init__(self, server_list_cfg, client_cfg, sys_path=None):
        if sys_path is not None:
            import sys
            sys.path.append(sys_path)
        try:
            import mc
        except ImportError:
            raise ImportError(
                'Please install memcached to enable MemcachedBackend.')

        self.server_list_cfg = server_list_cfg
        self.client_cfg = client_cfg
        self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg,
                                                      self.client_cfg)
        # mc.pyvector servers as a point which points to a memory cache
        self._mc_buffer = mc.pyvector()

    def get(self, filepath):
        filepath = str(filepath)
        import mc
        self._client.Get(filepath, self._mc_buffer)
        value_buf = mc.ConvertBuffer(self._mc_buffer)
        return value_buf

    def get_text(self, filepath, encoding=None):
        raise NotImplementedError


class LmdbBackend(BaseStorageBackend):
    """Lmdb storage backend.

    Args:
        db_path (str): Lmdb database path.
        readonly (bool, optional): Lmdb environment parameter. If True,
            disallow any write operations. Default: True.
        lock (bool, optional): Lmdb environment parameter. If False, when
            concurrent access occurs, do not lock the database. Default: False.
        readahead (bool, optional): Lmdb environment parameter. If False,
            disable the OS filesystem readahead mechanism, which may improve
            random read performance when a database is larger than RAM.
            Default: False.

    Attributes:
        db_path (str): Lmdb database path.
    """

    def __init__(self,
                 db_path,
                 readonly=True,
                 lock=False,
                 readahead=False,
                 **kwargs):
        try:
            import lmdb
        except ImportError:
            raise ImportError('Please install lmdb to enable LmdbBackend.')

        self.db_path = str(db_path)
        self._client = lmdb.open(
            self.db_path,
            readonly=readonly,
            lock=lock,
            readahead=readahead,
            **kwargs)

    def get(self, filepath):
        """Get values according to the filepath.

        Args:
            filepath (str | obj:`Path`): Here, filepath is the lmdb key.
        """
        filepath = str(filepath)
        with self._client.begin(write=False) as txn:
            value_buf = txn.get(filepath.encode('ascii'))
        return value_buf

    def get_text(self, filepath, encoding=None):
        raise NotImplementedError


class HardDiskBackend(BaseStorageBackend):
    """Raw hard disks storage backend."""

    _allow_symlink = True

    def get(self, filepath: Union[str, Path]) -> bytes:
        """Read data from a given ``filepath`` with 'rb' mode.

        Args:
            filepath (str or Path): Path to read data.

        Returns:
            bytes: Expected bytes object.
        """
        with open(filepath, 'rb') as f:
            value_buf = f.read()
        return value_buf

    def get_text(self,
                 filepath: Union[str, Path],
                 encoding: str = 'utf-8') -> str:
        """Read data from a given ``filepath`` with 'r' mode.

        Args:
            filepath (str or Path): Path to read data.
            encoding (str): The encoding format used to open the ``filepath``.
                Default: 'utf-8'.

        Returns:
            str: Expected text reading from ``filepath``.
        """
        with open(filepath, 'r', encoding=encoding) as f:
            value_buf = f.read()
        return value_buf

    def put(self, obj: bytes, filepath: Union[str, Path]) -> None:
        """Write data to a given ``filepath`` with 'wb' mode.

        Note:
            ``put`` will create a directory if the directory of ``filepath``
            does not exist.

        Args:
            obj (bytes): Data to be written.
            filepath (str or Path): Path to write data.
        """
        mmcv.mkdir_or_exist(osp.dirname(filepath))
        with open(filepath, 'wb') as f:
            f.write(obj)

    def put_text(self,
                 obj: str,
                 filepath: Union[str, Path],
                 encoding: str = 'utf-8') -> None:
        """Write data to a given ``filepath`` with 'w' mode.

        Note:
            ``put_text`` will create a directory if the directory of
            ``filepath`` does not exist.

        Args:
            obj (str): Data to be written.
            filepath (str or Path): Path to write data.
            encoding (str): The encoding format used to open the ``filepath``.
                Default: 'utf-8'.
        """
        mmcv.mkdir_or_exist(osp.dirname(filepath))
        with open(filepath, 'w', encoding=encoding) as f:
            f.write(obj)

    def remove(self, filepath: Union[str, Path]) -> None:
        """Remove a file.

        Args:
            filepath (str or Path): Path to be removed.
        """
        os.remove(filepath)

    def exists(self, filepath: Union[str, Path]) -> bool:
        """Check whether a file path exists.

        Args:
            filepath (str or Path): Path to be checked whether exists.

        Returns:
            bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise.
        """
        return osp.exists(filepath)

    def isdir(self, filepath: Union[str, Path]) -> bool:
        """Check whether a file path is a directory.

        Args:
            filepath (str or Path): Path to be checked whether it is a
                directory.

        Returns:
            bool: Return ``True`` if ``filepath`` points to a directory,
            ``False`` otherwise.
        """
        return osp.isdir(filepath)

    def isfile(self, filepath: Union[str, Path]) -> bool:
        """Check whether a file path is a file.

        Args:
            filepath (str or Path): Path to be checked whether it is a file.

        Returns:
            bool: Return ``True`` if ``filepath`` points to a file, ``False``
            otherwise.
        """
        return osp.isfile(filepath)

    def join_path(self, filepath: Union[str, Path],
                  *filepaths: Union[str, Path]) -> str:
        """Concatenate all file paths.

        Join one or more filepath components intelligently. The return value
        is the concatenation of filepath and any members of *filepaths.

        Args:
            filepath (str or Path): Path to be concatenated.

        Returns:
            str: The result of concatenation.
        """
        return osp.join(filepath, *filepaths)

    @contextmanager
    def get_local_path(
            self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]:
        """Only for unified API and do nothing."""
        yield filepath

    def list_dir_or_file(self,
                         dir_path: Union[str, Path],
                         list_dir: bool = True,
                         list_file: bool = True,
                         suffix: Optional[Union[str, Tuple[str]]] = None,
                         recursive: bool = False) -> Iterator[str]:
        """Scan a directory to find the interested directories or files in
        arbitrary order.

        Note:
            :meth:`list_dir_or_file` returns the path relative to ``dir_path``.

        Args:
            dir_path (str | Path): Path of the directory.
            list_dir (bool): List the directories. Default: True.
            list_file (bool): List the path of files. Default: True.
            suffix (str or tuple[str], optional):  File suffix
                that we are interested in. Default: None.
            recursive (bool): If set to True, recursively scan the
                directory. Default: False.

        Yields:
            Iterable[str]: A relative path to ``dir_path``.
        """
        if list_dir and suffix is not None:
            raise TypeError('`suffix` should be None when `list_dir` is True')

        if (suffix is not None) and not isinstance(suffix, (str, tuple)):
            raise TypeError('`suffix` must be a string or tuple of strings')

        root = dir_path

        def _list_dir_or_file(dir_path, list_dir, list_file, suffix,
                              recursive):
            for entry in os.scandir(dir_path):
                if not entry.name.startswith('.') and entry.is_file():
                    rel_path = osp.relpath(entry.path, root)
                    if (suffix is None
                            or rel_path.endswith(suffix)) and list_file:
                        yield rel_path
                elif osp.isdir(entry.path):
                    if list_dir:
                        rel_dir = osp.relpath(entry.path, root)
                        yield rel_dir
                    if recursive:
                        yield from _list_dir_or_file(entry.path, list_dir,
                                                     list_file, suffix,
                                                     recursive)

        return _list_dir_or_file(dir_path, list_dir, list_file, suffix,
                                 recursive)


class HTTPBackend(BaseStorageBackend):
    """HTTP and HTTPS storage bachend."""

    def get(self, filepath):
        value_buf = urlopen(filepath).read()
        return value_buf

    def get_text(self, filepath, encoding='utf-8'):
        value_buf = urlopen(filepath).read()
        return value_buf.decode(encoding)

    @contextmanager
    def get_local_path(self, filepath: str) -> Iterable[str]:
        """Download a file from ``filepath``.

        ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
        can be called with ``with`` statement, and when exists from the
        ``with`` statement, the temporary path will be released.

        Args:
            filepath (str): Download a file from ``filepath``.

        Examples:
            >>> client = HTTPBackend()
            >>> # After existing from the ``with`` clause,
            >>> # the path will be removed
            >>> with client.get_local_path('http://path/of/your/file') as path:
            ...     # do something here
        """
        try:
            f = tempfile.NamedTemporaryFile(delete=False)
            f.write(self.get(filepath))
            f.close()
            yield f.name
        finally:
            os.remove(f.name)


class FileClient:
    """A general file client to access files in different backends.

    The client loads a file or text in a specified backend from its path
    and returns it as a binary or text file. There are two ways to choose a
    backend, the name of backend and the prefix of path. Although both of them
    can be used to choose a storage backend, ``backend`` has a higher priority
    that is if they are all set, the storage backend will be chosen by the
    backend argument. If they are all `None`, the disk backend will be chosen.
    Note that It can also register other backend accessor with a given name,
    prefixes, and backend class. In addition, We use the singleton pattern to
    avoid repeated object creation. If the arguments are the same, the same
    object will be returned.

    Args:
        backend (str, optional): The storage backend type. Options are "disk",
            "ceph", "memcached", "lmdb", "http" and "petrel". Default: None.
        prefix (str, optional): The prefix of the registered storage backend.
            Options are "s3", "http", "https". Default: None.

    Examples:
        >>> # only set backend
        >>> file_client = FileClient(backend='petrel')
        >>> # only set prefix
        >>> file_client = FileClient(prefix='s3')
        >>> # set both backend and prefix but use backend to choose client
        >>> file_client = FileClient(backend='petrel', prefix='s3')
        >>> # if the arguments are the same, the same object is returned
        >>> file_client1 = FileClient(backend='petrel')
        >>> file_client1 is file_client
        True

    Attributes:
        client (:obj:`BaseStorageBackend`): The backend object.
    """

    _backends = {
        'disk': HardDiskBackend,
        'ceph': CephBackend,
        'memcached': MemcachedBackend,
        'lmdb': LmdbBackend,
        'petrel': PetrelBackend,
        'http': HTTPBackend,
    }
    # This collection is used to record the overridden backends, and when a
    # backend appears in the collection, the singleton pattern is disabled for
    # that backend, because if the singleton pattern is used, then the object
    # returned will be the backend before overwriting
    _overridden_backends = set()
    _prefix_to_backends = {
        's3': PetrelBackend,
        'http': HTTPBackend,
        'https': HTTPBackend,
    }
    _overridden_prefixes = set()

    _instances = {}

    def __new__(cls, backend=None, prefix=None, **kwargs):
        if backend is None and prefix is None:
            backend = 'disk'
        if backend is not None and backend not in cls._backends:
            raise ValueError(
                f'Backend {backend} is not supported. Currently supported ones'
                f' are {list(cls._backends.keys())}')
        if prefix is not None and prefix not in cls._prefix_to_backends:
            raise ValueError(
                f'prefix {prefix} is not supported. Currently supported ones '
                f'are {list(cls._prefix_to_backends.keys())}')

        # concatenate the arguments to a unique key for determining whether
        # objects with the same arguments were created
        arg_key = f'{backend}:{prefix}'
        for key, value in kwargs.items():
            arg_key += f':{key}:{value}'

        # if a backend was overridden, it will create a new object
        if (arg_key in cls._instances
                and backend not in cls._overridden_backends
                and prefix not in cls._overridden_prefixes):
            _instance = cls._instances[arg_key]
        else:
            # create a new object and put it to _instance
            _instance = super().__new__(cls)
            if backend is not None:
                _instance.client = cls._backends[backend](**kwargs)
            else:
                _instance.client = cls._prefix_to_backends[prefix](**kwargs)

            cls._instances[arg_key] = _instance

        return _instance

    @property
    def name(self):
        return self.client.name

    @property
    def allow_symlink(self):
        return self.client.allow_symlink

    @staticmethod
    def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]:
        """Parse the prefix of a uri.

        Args:
            uri (str | Path): Uri to be parsed that contains the file prefix.

        Examples:
            >>> FileClient.parse_uri_prefix('s3://path/of/your/file')
            's3'

        Returns:
            str | None: Return the prefix of uri if the uri contains '://' else
            ``None``.
        """
        assert is_filepath(uri)
        uri = str(uri)
        if '://' not in uri:
            return None
        else:
            prefix, _ = uri.split('://')
            # In the case of PetrelBackend, the prefix may contains the cluster
            # name like clusterName:s3
            if ':' in prefix:
                _, prefix = prefix.split(':')
            return prefix

    @classmethod
    def infer_client(cls,
                     file_client_args: Optional[dict] = None,
                     uri: Optional[Union[str, Path]] = None) -> 'FileClient':
        """Infer a suitable file client based on the URI and arguments.

        Args:
            file_client_args (dict, optional): Arguments to instantiate a
                FileClient. Default: None.
            uri (str | Path, optional): Uri to be parsed that contains the file
                prefix. Default: None.

        Examples:
            >>> uri = 's3://path/of/your/file'
            >>> file_client = FileClient.infer_client(uri=uri)
            >>> file_client_args = {'backend': 'petrel'}
            >>> file_client = FileClient.infer_client(file_client_args)

        Returns:
            FileClient: Instantiated FileClient object.
        """
        assert file_client_args is not None or uri is not None
        if file_client_args is None:
            file_prefix = cls.parse_uri_prefix(uri)  # type: ignore
            return cls(prefix=file_prefix)
        else:
            return cls(**file_client_args)

    @classmethod
    def _register_backend(cls, name, backend, force=False, prefixes=None):
        if not isinstance(name, str):
            raise TypeError('the backend name should be a string, '
                            f'but got {type(name)}')
        if not inspect.isclass(backend):
            raise TypeError(
                f'backend should be a class but got {type(backend)}')
        if not issubclass(backend, BaseStorageBackend):
            raise TypeError(
                f'backend {backend} is not a subclass of BaseStorageBackend')
        if not force and name in cls._backends:
            raise KeyError(
                f'{name} is already registered as a storage backend, '
                'add "force=True" if you want to override it')

        if name in cls._backends and force:
            cls._overridden_backends.add(name)
        cls._backends[name] = backend

        if prefixes is not None:
            if isinstance(prefixes, str):
                prefixes = [prefixes]
            else:
                assert isinstance(prefixes, (list, tuple))
            for prefix in prefixes:
                if prefix not in cls._prefix_to_backends:
                    cls._prefix_to_backends[prefix] = backend
                elif (prefix in cls._prefix_to_backends) and force:
                    cls._overridden_prefixes.add(prefix)
                    cls._prefix_to_backends[prefix] = backend
                else:
                    raise KeyError(
                        f'{prefix} is already registered as a storage backend,'
                        ' add "force=True" if you want to override it')

    @classmethod
    def register_backend(cls, name, backend=None, force=False, prefixes=None):
        """Register a backend to FileClient.

        This method can be used as a normal class method or a decorator.

        .. code-block:: python

            class NewBackend(BaseStorageBackend):

                def get(self, filepath):
                    return filepath

                def get_text(self, filepath):
                    return filepath

            FileClient.register_backend('new', NewBackend)

        or

        .. code-block:: python

            @FileClient.register_backend('new')
            class NewBackend(BaseStorageBackend):

                def get(self, filepath):
                    return filepath

                def get_text(self, filepath):
                    return filepath

        Args:
            name (str): The name of the registered backend.
            backend (class, optional): The backend class to be registered,
                which must be a subclass of :class:`BaseStorageBackend`.
                When this method is used as a decorator, backend is None.
                Defaults to None.
            force (bool, optional): Whether to override the backend if the name
                has already been registered. Defaults to False.
            prefixes (str or list[str] or tuple[str], optional): The prefixes
                of the registered storage backend. Default: None.
                `New in version 1.3.15.`
        """
        if backend is not None:
            cls._register_backend(
                name, backend, force=force, prefixes=prefixes)
            return

        def _register(backend_cls):
            cls._register_backend(
                name, backend_cls, force=force, prefixes=prefixes)
            return backend_cls

        return _register

    def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]:
        """Read data from a given ``filepath`` with 'rb' mode.

        Note:
            There are two types of return values for ``get``, one is ``bytes``
            and the other is ``memoryview``. The advantage of using memoryview
            is that you can avoid copying, and if you want to convert it to
            ``bytes``, you can use ``.tobytes()``.

        Args:
            filepath (str or Path): Path to read data.

        Returns:
            bytes | memoryview: Expected bytes object or a memory view of the
            bytes object.
        """
        return self.client.get(filepath)

    def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str:
        """Read data from a given ``filepath`` with 'r' mode.

        Args:
            filepath (str or Path): Path to read data.
            encoding (str): The encoding format used to open the ``filepath``.
                Default: 'utf-8'.

        Returns:
            str: Expected text reading from ``filepath``.
        """
        return self.client.get_text(filepath, encoding)

    def put(self, obj: bytes, filepath: Union[str, Path]) -> None:
        """Write data to a given ``filepath`` with 'wb' mode.

        Note:
            ``put`` should create a directory if the directory of ``filepath``
            does not exist.

        Args:
            obj (bytes): Data to be written.
            filepath (str or Path): Path to write data.
        """
        self.client.put(obj, filepath)

    def put_text(self, obj: str, filepath: Union[str, Path]) -> None:
        """Write data to a given ``filepath`` with 'w' mode.

        Note:
            ``put_text`` should create a directory if the directory of
            ``filepath`` does not exist.

        Args:
            obj (str): Data to be written.
            filepath (str or Path): Path to write data.
            encoding (str, optional): The encoding format used to open the
                `filepath`. Default: 'utf-8'.
        """
        self.client.put_text(obj, filepath)

    def remove(self, filepath: Union[str, Path]) -> None:
        """Remove a file.

        Args:
            filepath (str, Path): Path to be removed.
        """
        self.client.remove(filepath)

    def exists(self, filepath: Union[str, Path]) -> bool:
        """Check whether a file path exists.

        Args:
            filepath (str or Path): Path to be checked whether exists.

        Returns:
            bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise.
        """
        return self.client.exists(filepath)

    def isdir(self, filepath: Union[str, Path]) -> bool:
        """Check whether a file path is a directory.

        Args:
            filepath (str or Path): Path to be checked whether it is a
                directory.

        Returns:
            bool: Return ``True`` if ``filepath`` points to a directory,
            ``False`` otherwise.
        """
        return self.client.isdir(filepath)

    def isfile(self, filepath: Union[str, Path]) -> bool:
        """Check whether a file path is a file.

        Args:
            filepath (str or Path): Path to be checked whether it is a file.

        Returns:
            bool: Return ``True`` if ``filepath`` points to a file, ``False``
            otherwise.
        """
        return self.client.isfile(filepath)

    def join_path(self, filepath: Union[str, Path],
                  *filepaths: Union[str, Path]) -> str:
        """Concatenate all file paths.

        Join one or more filepath components intelligently. The return value
        is the concatenation of filepath and any members of *filepaths.

        Args:
            filepath (str or Path): Path to be concatenated.

        Returns:
            str: The result of concatenation.
        """
        return self.client.join_path(filepath, *filepaths)

    @contextmanager
    def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]:
        """Download data from ``filepath`` and write the data to local path.

        ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
        can be called with ``with`` statement, and when exists from the
        ``with`` statement, the temporary path will be released.

        Note:
            If the ``filepath`` is a local path, just return itself.

        .. warning::
            ``get_local_path`` is an experimental interface that may change in
            the future.

        Args:
            filepath (str or Path): Path to be read data.

        Examples:
            >>> file_client = FileClient(prefix='s3')
            >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path:
            ...     # do something here

        Yields:
            Iterable[str]: Only yield one path.
        """
        with self.client.get_local_path(str(filepath)) as local_path:
            yield local_path

    def list_dir_or_file(self,
                         dir_path: Union[str, Path],
                         list_dir: bool = True,
                         list_file: bool = True,
                         suffix: Optional[Union[str, Tuple[str]]] = None,
                         recursive: bool = False) -> Iterator[str]:
        """Scan a directory to find the interested directories or files in
        arbitrary order.

        Note:
            :meth:`list_dir_or_file` returns the path relative to ``dir_path``.

        Args:
            dir_path (str | Path): Path of the directory.
            list_dir (bool): List the directories. Default: True.
            list_file (bool): List the path of files. Default: True.
            suffix (str or tuple[str], optional):  File suffix
                that we are interested in. Default: None.
            recursive (bool): If set to True, recursively scan the
                directory. Default: False.

        Yields:
            Iterable[str]: A relative path to ``dir_path``.
        """
        yield from self.client.list_dir_or_file(dir_path, list_dir, list_file,
                                                suffix, recursive)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/handlers/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .base import BaseFileHandler
from .json_handler import JsonHandler
from .pickle_handler import PickleHandler
from .yaml_handler import YamlHandler

__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler']


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/handlers/base.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from abc import ABCMeta, abstractmethod


class BaseFileHandler(metaclass=ABCMeta):
    # `str_like` is a flag to indicate whether the type of file object is
    # str-like object or bytes-like object. Pickle only processes bytes-like
    # objects but json only processes str-like object. If it is str-like
    # object, `StringIO` will be used to process the buffer.
    str_like = True

    @abstractmethod
    def load_from_fileobj(self, file, **kwargs):
        pass

    @abstractmethod
    def dump_to_fileobj(self, obj, file, **kwargs):
        pass

    @abstractmethod
    def dump_to_str(self, obj, **kwargs):
        pass

    def load_from_path(self, filepath, mode='r', **kwargs):
        with open(filepath, mode) as f:
            return self.load_from_fileobj(f, **kwargs)

    def dump_to_path(self, obj, filepath, mode='w', **kwargs):
        with open(filepath, mode) as f:
            self.dump_to_fileobj(obj, f, **kwargs)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/handlers/json_handler.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import json

import numpy as np

from .base import BaseFileHandler


def set_default(obj):
    """Set default json values for non-serializable values.

    It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list.
    It also converts ``np.generic`` (including ``np.int32``, ``np.float32``,
    etc.) into plain numbers of plain python built-in types.
    """
    if isinstance(obj, (set, range)):
        return list(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, np.generic):
        return obj.item()
    raise TypeError(f'{type(obj)} is unsupported for json dump')


class JsonHandler(BaseFileHandler):

    def load_from_fileobj(self, file):
        return json.load(file)

    def dump_to_fileobj(self, obj, file, **kwargs):
        kwargs.setdefault('default', set_default)
        json.dump(obj, file, **kwargs)

    def dump_to_str(self, obj, **kwargs):
        kwargs.setdefault('default', set_default)
        return json.dumps(obj, **kwargs)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/handlers/pickle_handler.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import pickle

from .base import BaseFileHandler


class PickleHandler(BaseFileHandler):

    str_like = False

    def load_from_fileobj(self, file, **kwargs):
        return pickle.load(file, **kwargs)

    def load_from_path(self, filepath, **kwargs):
        return super(PickleHandler, self).load_from_path(
            filepath, mode='rb', **kwargs)

    def dump_to_str(self, obj, **kwargs):
        kwargs.setdefault('protocol', 2)
        return pickle.dumps(obj, **kwargs)

    def dump_to_fileobj(self, obj, file, **kwargs):
        kwargs.setdefault('protocol', 2)
        pickle.dump(obj, file, **kwargs)

    def dump_to_path(self, obj, filepath, **kwargs):
        super(PickleHandler, self).dump_to_path(
            obj, filepath, mode='wb', **kwargs)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/handlers/yaml_handler.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import yaml

try:
    from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
    from yaml import Loader, Dumper

from .base import BaseFileHandler  # isort:skip


class YamlHandler(BaseFileHandler):

    def load_from_fileobj(self, file, **kwargs):
        kwargs.setdefault('Loader', Loader)
        return yaml.load(file, **kwargs)

    def dump_to_fileobj(self, obj, file, **kwargs):
        kwargs.setdefault('Dumper', Dumper)
        yaml.dump(obj, file, **kwargs)

    def dump_to_str(self, obj, **kwargs):
        kwargs.setdefault('Dumper', Dumper)
        return yaml.dump(obj, **kwargs)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/io.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from io import BytesIO, StringIO
from pathlib import Path

from ..utils import is_list_of, is_str
from .file_client import FileClient
from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler

file_handlers = {
    'json': JsonHandler(),
    'yaml': YamlHandler(),
    'yml': YamlHandler(),
    'pickle': PickleHandler(),
    'pkl': PickleHandler()
}


def load(file, file_format=None, file_client_args=None, **kwargs):
    """Load data from json/yaml/pickle files.

    This method provides a unified api for loading data from serialized files.

    Note:
        In v1.3.16 and later, ``load`` supports loading data from serialized
        files those can be storaged in different backends.

    Args:
        file (str or :obj:`Path` or file-like object): Filename or a file-like
            object.
        file_format (str, optional): If not specified, the file format will be
            inferred from the file extension, otherwise use the specified one.
            Currently supported formats include "json", "yaml/yml" and
            "pickle/pkl".
        file_client_args (dict, optional): Arguments to instantiate a
            FileClient. See :class:`mmcv.fileio.FileClient` for details.
            Default: None.

    Examples:
        >>> load('/path/of/your/file')  # file is storaged in disk
        >>> load('https://path/of/your/file')  # file is storaged in Internet
        >>> load('s3://path/of/your/file')  # file is storaged in petrel

    Returns:
        The content from the file.
    """
    if isinstance(file, Path):
        file = str(file)
    if file_format is None and is_str(file):
        file_format = file.split('.')[-1]
    if file_format not in file_handlers:
        raise TypeError(f'Unsupported format: {file_format}')

    handler = file_handlers[file_format]
    if is_str(file):
        file_client = FileClient.infer_client(file_client_args, file)
        if handler.str_like:
            with StringIO(file_client.get_text(file)) as f:
                obj = handler.load_from_fileobj(f, **kwargs)
        else:
            with BytesIO(file_client.get(file)) as f:
                obj = handler.load_from_fileobj(f, **kwargs)
    elif hasattr(file, 'read'):
        obj = handler.load_from_fileobj(file, **kwargs)
    else:
        raise TypeError('"file" must be a filepath str or a file-object')
    return obj


def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs):
    """Dump data to json/yaml/pickle strings or files.

    This method provides a unified api for dumping data as strings or to files,
    and also supports custom arguments for each file format.

    Note:
        In v1.3.16 and later, ``dump`` supports dumping data as strings or to
        files which is saved to different backends.

    Args:
        obj (any): The python object to be dumped.
        file (str or :obj:`Path` or file-like object, optional): If not
            specified, then the object is dumped to a str, otherwise to a file
            specified by the filename or file-like object.
        file_format (str, optional): Same as :func:`load`.
        file_client_args (dict, optional): Arguments to instantiate a
            FileClient. See :class:`mmcv.fileio.FileClient` for details.
            Default: None.

    Examples:
        >>> dump('hello world', '/path/of/your/file')  # disk
        >>> dump('hello world', 's3://path/of/your/file')  # ceph or petrel

    Returns:
        bool: True for success, False otherwise.
    """
    if isinstance(file, Path):
        file = str(file)
    if file_format is None:
        if is_str(file):
            file_format = file.split('.')[-1]
        elif file is None:
            raise ValueError(
                'file_format must be specified since file is None')
    if file_format not in file_handlers:
        raise TypeError(f'Unsupported format: {file_format}')

    handler = file_handlers[file_format]
    if file is None:
        return handler.dump_to_str(obj, **kwargs)
    elif is_str(file):
        file_client = FileClient.infer_client(file_client_args, file)
        if handler.str_like:
            with StringIO() as f:
                handler.dump_to_fileobj(obj, f, **kwargs)
                file_client.put_text(f.getvalue(), file)
        else:
            with BytesIO() as f:
                handler.dump_to_fileobj(obj, f, **kwargs)
                file_client.put(f.getvalue(), file)
    elif hasattr(file, 'write'):
        handler.dump_to_fileobj(obj, file, **kwargs)
    else:
        raise TypeError('"file" must be a filename str or a file-object')


def _register_handler(handler, file_formats):
    """Register a handler for some file extensions.

    Args:
        handler (:obj:`BaseFileHandler`): Handler to be registered.
        file_formats (str or list[str]): File formats to be handled by this
            handler.
    """
    if not isinstance(handler, BaseFileHandler):
        raise TypeError(
            f'handler must be a child of BaseFileHandler, not {type(handler)}')
    if isinstance(file_formats, str):
        file_formats = [file_formats]
    if not is_list_of(file_formats, str):
        raise TypeError('file_formats must be a str or a list of str')
    for ext in file_formats:
        file_handlers[ext] = handler


def register_handler(file_formats, **kwargs):

    def wrap(cls):
        _register_handler(cls(**kwargs), file_formats)
        return cls

    return wrap


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/fileio/parse.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.

from io import StringIO

from .file_client import FileClient


def list_from_file(filename,
                   prefix='',
                   offset=0,
                   max_num=0,
                   encoding='utf-8',
                   file_client_args=None):
    """Load a text file and parse the content as a list of strings.

    Note:
        In v1.3.16 and later, ``list_from_file`` supports loading a text file
        which can be storaged in different backends and parsing the content as
        a list for strings.

    Args:
        filename (str): Filename.
        prefix (str): The prefix to be inserted to the beginning of each item.
        offset (int): The offset of lines.
        max_num (int): The maximum number of lines to be read,
            zeros and negatives mean no limitation.
        encoding (str): Encoding used to open the file. Default utf-8.
        file_client_args (dict, optional): Arguments to instantiate a
            FileClient. See :class:`mmcv.fileio.FileClient` for details.
            Default: None.

    Examples:
        >>> list_from_file('/path/of/your/file')  # disk
        ['hello', 'world']
        >>> list_from_file('s3://path/of/your/file')  # ceph or petrel
        ['hello', 'world']

    Returns:
        list[str]: A list of strings.
    """
    cnt = 0
    item_list = []
    file_client = FileClient.infer_client(file_client_args, filename)
    with StringIO(file_client.get_text(filename, encoding)) as f:
        for _ in range(offset):
            f.readline()
        for line in f:
            if 0 < max_num <= cnt:
                break
            item_list.append(prefix + line.rstrip('\n\r'))
            cnt += 1
    return item_list


def dict_from_file(filename,
                   key_type=str,
                   encoding='utf-8',
                   file_client_args=None):
    """Load a text file and parse the content as a dict.

    Each line of the text file will be two or more columns split by
    whitespaces or tabs. The first column will be parsed as dict keys, and
    the following columns will be parsed as dict values.

    Note:
        In v1.3.16 and later, ``dict_from_file`` supports loading a text file
        which can be storaged in different backends and parsing the content as
        a dict.

    Args:
        filename(str): Filename.
        key_type(type): Type of the dict keys. str is user by default and
            type conversion will be performed if specified.
        encoding (str): Encoding used to open the file. Default utf-8.
        file_client_args (dict, optional): Arguments to instantiate a
            FileClient. See :class:`mmcv.fileio.FileClient` for details.
            Default: None.

    Examples:
        >>> dict_from_file('/path/of/your/file')  # disk
        {'key1': 'value1', 'key2': 'value2'}
        >>> dict_from_file('s3://path/of/your/file')  # ceph or petrel
        {'key1': 'value1', 'key2': 'value2'}

    Returns:
        dict: The parsed contents.
    """
    mapping = {}
    file_client = FileClient.infer_client(file_client_args, filename)
    with StringIO(file_client.get_text(filename, encoding)) as f:
        for line in f:
            items = line.rstrip('\n').split()
            assert len(items) >= 2
            key = key_type(items[0])
            val = items[1:] if len(items) > 2 else items[1]
            mapping[key] = val
    return mapping


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr,
                         gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert,
                         rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb)
from .geometric import (cutout, imcrop, imflip, imflip_, impad,
                        impad_to_multiple, imrescale, imresize, imresize_like,
                        imresize_to_multiple, imrotate, imshear, imtranslate,
                        rescale_size)
from .io import imfrombytes, imread, imwrite, supported_backends, use_backend
from .misc import tensor2imgs
from .photometric import (adjust_brightness, adjust_color, adjust_contrast,
                          adjust_lighting, adjust_sharpness, auto_contrast,
                          clahe, imdenormalize, imequalize, iminvert,
                          imnormalize, imnormalize_, lut_transform, posterize,
                          solarize)

__all__ = [
    'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb',
    'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale',
    'imresize', 'imresize_like', 'imresize_to_multiple', 'rescale_size',
    'imcrop', 'imflip', 'imflip_', 'impad', 'impad_to_multiple', 'imrotate',
    'imfrombytes', 'imread', 'imwrite', 'supported_backends', 'use_backend',
    'imdenormalize', 'imnormalize', 'imnormalize_', 'iminvert', 'posterize',
    'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr',
    'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize',
    'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe',
    'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/colorspace.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np


def imconvert(img, src, dst):
    """Convert an image from the src colorspace to dst colorspace.

    Args:
        img (ndarray): The input image.
        src (str): The source colorspace, e.g., 'rgb', 'hsv'.
        dst (str): The destination colorspace, e.g., 'rgb', 'hsv'.

    Returns:
        ndarray: The converted image.
    """
    code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
    out_img = cv2.cvtColor(img, code)
    return out_img


def bgr2gray(img, keepdim=False):
    """Convert a BGR image to grayscale image.

    Args:
        img (ndarray): The input image.
        keepdim (bool): If False (by default), then return the grayscale image
            with 2 dims, otherwise 3 dims.

    Returns:
        ndarray: The converted grayscale image.
    """
    out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    if keepdim:
        out_img = out_img[..., None]
    return out_img


def rgb2gray(img, keepdim=False):
    """Convert a RGB image to grayscale image.

    Args:
        img (ndarray): The input image.
        keepdim (bool): If False (by default), then return the grayscale image
            with 2 dims, otherwise 3 dims.

    Returns:
        ndarray: The converted grayscale image.
    """
    out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    if keepdim:
        out_img = out_img[..., None]
    return out_img


def gray2bgr(img):
    """Convert a grayscale image to BGR image.

    Args:
        img (ndarray): The input image.

    Returns:
        ndarray: The converted BGR image.
    """
    img = img[..., None] if img.ndim == 2 else img
    out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    return out_img


def gray2rgb(img):
    """Convert a grayscale image to RGB image.

    Args:
        img (ndarray): The input image.

    Returns:
        ndarray: The converted RGB image.
    """
    img = img[..., None] if img.ndim == 2 else img
    out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    return out_img


def _convert_input_type_range(img):
    """Convert the type and range of the input image.

    It converts the input image to np.float32 type and range of [0, 1].
    It is mainly used for pre-processing the input image in colorspace
    conversion functions such as rgb2ycbcr and ycbcr2rgb.

    Args:
        img (ndarray): The input image. It accepts:
            1. np.uint8 type with range [0, 255];
            2. np.float32 type with range [0, 1].

    Returns:
        (ndarray): The converted image with type of np.float32 and range of
            [0, 1].
    """
    img_type = img.dtype
    img = img.astype(np.float32)
    if img_type == np.float32:
        pass
    elif img_type == np.uint8:
        img /= 255.
    else:
        raise TypeError('The img type should be np.float32 or np.uint8, '
                        f'but got {img_type}')
    return img


def _convert_output_type_range(img, dst_type):
    """Convert the type and range of the image according to dst_type.

    It converts the image to desired type and range. If `dst_type` is np.uint8,
    images will be converted to np.uint8 type with range [0, 255]. If
    `dst_type` is np.float32, it converts the image to np.float32 type with
    range [0, 1].
    It is mainly used for post-processing images in colorspace conversion
    functions such as rgb2ycbcr and ycbcr2rgb.

    Args:
        img (ndarray): The image to be converted with np.float32 type and
            range [0, 255].
        dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it
            converts the image to np.uint8 type with range [0, 255]. If
            dst_type is np.float32, it converts the image to np.float32 type
            with range [0, 1].

    Returns:
        (ndarray): The converted image with desired type and range.
    """
    if dst_type not in (np.uint8, np.float32):
        raise TypeError('The dst_type should be np.float32 or np.uint8, '
                        f'but got {dst_type}')
    if dst_type == np.uint8:
        img = img.round()
    else:
        img /= 255.
    return img.astype(dst_type)


def rgb2ycbcr(img, y_only=False):
    """Convert a RGB image to YCbCr image.

    This function produces the same results as Matlab's `rgb2ycbcr` function.
    It implements the ITU-R BT.601 conversion for standard-definition
    television. See more details in
    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.

    It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`.
    In OpenCV, it implements a JPEG conversion. See more details in
    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.

    Args:
        img (ndarray): The input image. It accepts:
            1. np.uint8 type with range [0, 255];
            2. np.float32 type with range [0, 1].
        y_only (bool): Whether to only return Y channel. Default: False.

    Returns:
        ndarray: The converted YCbCr image. The output image has the same type
        and range as input image.
    """
    img_type = img.dtype
    img = _convert_input_type_range(img)
    if y_only:
        out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0
    else:
        out_img = np.matmul(
            img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
                  [24.966, 112.0, -18.214]]) + [16, 128, 128]
    out_img = _convert_output_type_range(out_img, img_type)
    return out_img


def bgr2ycbcr(img, y_only=False):
    """Convert a BGR image to YCbCr image.

    The bgr version of rgb2ycbcr.
    It implements the ITU-R BT.601 conversion for standard-definition
    television. See more details in
    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.

    It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`.
    In OpenCV, it implements a JPEG conversion. See more details in
    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.

    Args:
        img (ndarray): The input image. It accepts:
            1. np.uint8 type with range [0, 255];
            2. np.float32 type with range [0, 1].
        y_only (bool): Whether to only return Y channel. Default: False.

    Returns:
        ndarray: The converted YCbCr image. The output image has the same type
        and range as input image.
    """
    img_type = img.dtype
    img = _convert_input_type_range(img)
    if y_only:
        out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0
    else:
        out_img = np.matmul(
            img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
                  [65.481, -37.797, 112.0]]) + [16, 128, 128]
    out_img = _convert_output_type_range(out_img, img_type)
    return out_img


def ycbcr2rgb(img):
    """Convert a YCbCr image to RGB image.

    This function produces the same results as Matlab's ycbcr2rgb function.
    It implements the ITU-R BT.601 conversion for standard-definition
    television. See more details in
    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.

    It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`.
    In OpenCV, it implements a JPEG conversion. See more details in
    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.

    Args:
        img (ndarray): The input image. It accepts:
            1. np.uint8 type with range [0, 255];
            2. np.float32 type with range [0, 1].

    Returns:
        ndarray: The converted RGB image. The output image has the same type
        and range as input image.
    """
    img_type = img.dtype
    img = _convert_input_type_range(img) * 255
    out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
                              [0, -0.00153632, 0.00791071],
                              [0.00625893, -0.00318811, 0]]) * 255.0 + [
                                  -222.921, 135.576, -276.836
                              ]
    out_img = _convert_output_type_range(out_img, img_type)
    return out_img


def ycbcr2bgr(img):
    """Convert a YCbCr image to BGR image.

    The bgr version of ycbcr2rgb.
    It implements the ITU-R BT.601 conversion for standard-definition
    television. See more details in
    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.

    It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`.
    In OpenCV, it implements a JPEG conversion. See more details in
    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.

    Args:
        img (ndarray): The input image. It accepts:
            1. np.uint8 type with range [0, 255];
            2. np.float32 type with range [0, 1].

    Returns:
        ndarray: The converted BGR image. The output image has the same type
        and range as input image.
    """
    img_type = img.dtype
    img = _convert_input_type_range(img) * 255
    out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
                              [0.00791071, -0.00153632, 0],
                              [0, -0.00318811, 0.00625893]]) * 255.0 + [
                                  -276.836, 135.576, -222.921
                              ]
    out_img = _convert_output_type_range(out_img, img_type)
    return out_img


def convert_color_factory(src, dst):

    code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')

    def convert_color(img):
        out_img = cv2.cvtColor(img, code)
        return out_img

    convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()}
        image.

    Args:
        img (ndarray or str): The input image.

    Returns:
        ndarray: The converted {dst.upper()} image.
    """

    return convert_color


bgr2rgb = convert_color_factory('bgr', 'rgb')

rgb2bgr = convert_color_factory('rgb', 'bgr')

bgr2hsv = convert_color_factory('bgr', 'hsv')

hsv2bgr = convert_color_factory('hsv', 'bgr')

bgr2hls = convert_color_factory('bgr', 'hls')

hls2bgr = convert_color_factory('hls', 'bgr')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/geometric.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import numbers

import cv2
import numpy as np

from ..utils import to_2tuple
from .io import imread_backend

try:
    from PIL import Image
except ImportError:
    Image = None


def _scale_size(size, scale):
    """Rescale a size by a ratio.

    Args:
        size (tuple[int]): (w, h).
        scale (float | tuple(float)): Scaling factor.

    Returns:
        tuple[int]: scaled size.
    """
    if isinstance(scale, (float, int)):
        scale = (scale, scale)
    w, h = size
    return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5)


cv2_interp_codes = {
    'nearest': cv2.INTER_NEAREST,
    'bilinear': cv2.INTER_LINEAR,
    'bicubic': cv2.INTER_CUBIC,
    'area': cv2.INTER_AREA,
    'lanczos': cv2.INTER_LANCZOS4
}

if Image is not None:
    pillow_interp_codes = {
        'nearest': Image.NEAREST,
        'bilinear': Image.BILINEAR,
        'bicubic': Image.BICUBIC,
        'box': Image.BOX,
        'lanczos': Image.LANCZOS,
        'hamming': Image.HAMMING
    }


def imresize(img,
             size,
             return_scale=False,
             interpolation='bilinear',
             out=None,
             backend=None):
    """Resize image to a given size.

    Args:
        img (ndarray): The input image.
        size (tuple[int]): Target size (w, h).
        return_scale (bool): Whether to return `w_scale` and `h_scale`.
        interpolation (str): Interpolation method, accepted values are
            "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
            backend, "nearest", "bilinear" for 'pillow' backend.
        out (ndarray): The output destination.
        backend (str | None): The image resize backend type. Options are `cv2`,
            `pillow`, `None`. If backend is None, the global imread_backend
            specified by ``mmcv.use_backend()`` will be used. Default: None.

    Returns:
        tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
        `resized_img`.
    """
    h, w = img.shape[:2]
    if backend is None:
        backend = imread_backend
    if backend not in ['cv2', 'pillow']:
        raise ValueError(f'backend: {backend} is not supported for resize.'
                         f"Supported backends are 'cv2', 'pillow'")

    if backend == 'pillow':
        assert img.dtype == np.uint8, 'Pillow backend only support uint8 type'
        pil_image = Image.fromarray(img)
        pil_image = pil_image.resize(size, pillow_interp_codes[interpolation])
        resized_img = np.array(pil_image)
    else:
        resized_img = cv2.resize(
            img, size, dst=out, interpolation=cv2_interp_codes[interpolation])
    if not return_scale:
        return resized_img
    else:
        w_scale = size[0] / w
        h_scale = size[1] / h
        return resized_img, w_scale, h_scale


def imresize_to_multiple(img,
                         divisor,
                         size=None,
                         scale_factor=None,
                         keep_ratio=False,
                         return_scale=False,
                         interpolation='bilinear',
                         out=None,
                         backend=None):
    """Resize image according to a given size or scale factor and then rounds
    up the the resized or rescaled image size to the nearest value that can be
    divided by the divisor.

    Args:
        img (ndarray): The input image.
        divisor (int | tuple): Resized image size will be a multiple of
            divisor. If divisor is a tuple, divisor should be
            (w_divisor, h_divisor).
        size (None | int | tuple[int]): Target size (w, h). Default: None.
        scale_factor (None | float | tuple[float]): Multiplier for spatial
            size. Should match input size if it is a tuple and the 2D style is
            (w_scale_factor, h_scale_factor). Default: None.
        keep_ratio (bool): Whether to keep the aspect ratio when resizing the
            image. Default: False.
        return_scale (bool): Whether to return `w_scale` and `h_scale`.
        interpolation (str): Interpolation method, accepted values are
            "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
            backend, "nearest", "bilinear" for 'pillow' backend.
        out (ndarray): The output destination.
        backend (str | None): The image resize backend type. Options are `cv2`,
            `pillow`, `None`. If backend is None, the global imread_backend
            specified by ``mmcv.use_backend()`` will be used. Default: None.

    Returns:
        tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
        `resized_img`.
    """
    h, w = img.shape[:2]
    if size is not None and scale_factor is not None:
        raise ValueError('only one of size or scale_factor should be defined')
    elif size is None and scale_factor is None:
        raise ValueError('one of size or scale_factor should be defined')
    elif size is not None:
        size = to_2tuple(size)
        if keep_ratio:
            size = rescale_size((w, h), size, return_scale=False)
    else:
        size = _scale_size((w, h), scale_factor)

    divisor = to_2tuple(divisor)
    size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)])
    resized_img, w_scale, h_scale = imresize(
        img,
        size,
        return_scale=True,
        interpolation=interpolation,
        out=out,
        backend=backend)
    if return_scale:
        return resized_img, w_scale, h_scale
    else:
        return resized_img


def imresize_like(img,
                  dst_img,
                  return_scale=False,
                  interpolation='bilinear',
                  backend=None):
    """Resize image to the same size of a given image.

    Args:
        img (ndarray): The input image.
        dst_img (ndarray): The target image.
        return_scale (bool): Whether to return `w_scale` and `h_scale`.
        interpolation (str): Same as :func:`resize`.
        backend (str | None): Same as :func:`resize`.

    Returns:
        tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or
        `resized_img`.
    """
    h, w = dst_img.shape[:2]
    return imresize(img, (w, h), return_scale, interpolation, backend=backend)


def rescale_size(old_size, scale, return_scale=False):
    """Calculate the new size to be rescaled to.

    Args:
        old_size (tuple[int]): The old size (w, h) of image.
        scale (float | tuple[int]): The scaling factor or maximum size.
            If it is a float number, then the image will be rescaled by this
            factor, else if it is a tuple of 2 integers, then the image will
            be rescaled as large as possible within the scale.
        return_scale (bool): Whether to return the scaling factor besides the
            rescaled image size.

    Returns:
        tuple[int]: The new rescaled image size.
    """
    w, h = old_size
    if isinstance(scale, (float, int)):
        if scale <= 0:
            raise ValueError(f'Invalid scale {scale}, must be positive.')
        scale_factor = scale
    elif isinstance(scale, tuple):
        max_long_edge = max(scale)
        max_short_edge = min(scale)
        scale_factor = min(max_long_edge / max(h, w),
                           max_short_edge / min(h, w))
    else:
        raise TypeError(
            f'Scale must be a number or tuple of int, but got {type(scale)}')

    new_size = _scale_size((w, h), scale_factor)

    if return_scale:
        return new_size, scale_factor
    else:
        return new_size


def imrescale(img,
              scale,
              return_scale=False,
              interpolation='bilinear',
              backend=None):
    """Resize image while keeping the aspect ratio.

    Args:
        img (ndarray): The input image.
        scale (float | tuple[int]): The scaling factor or maximum size.
            If it is a float number, then the image will be rescaled by this
            factor, else if it is a tuple of 2 integers, then the image will
            be rescaled as large as possible within the scale.
        return_scale (bool): Whether to return the scaling factor besides the
            rescaled image.
        interpolation (str): Same as :func:`resize`.
        backend (str | None): Same as :func:`resize`.

    Returns:
        ndarray: The rescaled image.
    """
    h, w = img.shape[:2]
    new_size, scale_factor = rescale_size((w, h), scale, return_scale=True)
    rescaled_img = imresize(
        img, new_size, interpolation=interpolation, backend=backend)
    if return_scale:
        return rescaled_img, scale_factor
    else:
        return rescaled_img


def imflip(img, direction='horizontal'):
    """Flip an image horizontally or vertically.

    Args:
        img (ndarray): Image to be flipped.
        direction (str): The flip direction, either "horizontal" or
            "vertical" or "diagonal".

    Returns:
        ndarray: The flipped image.
    """
    assert direction in ['horizontal', 'vertical', 'diagonal']
    if direction == 'horizontal':
        return np.flip(img, axis=1)
    elif direction == 'vertical':
        return np.flip(img, axis=0)
    else:
        return np.flip(img, axis=(0, 1))


def imflip_(img, direction='horizontal'):
    """Inplace flip an image horizontally or vertically.

    Args:
        img (ndarray): Image to be flipped.
        direction (str): The flip direction, either "horizontal" or
            "vertical" or "diagonal".

    Returns:
        ndarray: The flipped image (inplace).
    """
    assert direction in ['horizontal', 'vertical', 'diagonal']
    if direction == 'horizontal':
        return cv2.flip(img, 1, img)
    elif direction == 'vertical':
        return cv2.flip(img, 0, img)
    else:
        return cv2.flip(img, -1, img)


def imrotate(img,
             angle,
             center=None,
             scale=1.0,
             border_value=0,
             interpolation='bilinear',
             auto_bound=False):
    """Rotate an image.

    Args:
        img (ndarray): Image to be rotated.
        angle (float): Rotation angle in degrees, positive values mean
            clockwise rotation.
        center (tuple[float], optional): Center point (w, h) of the rotation in
            the source image. If not specified, the center of the image will be
            used.
        scale (float): Isotropic scale factor.
        border_value (int): Border value.
        interpolation (str): Same as :func:`resize`.
        auto_bound (bool): Whether to adjust the image size to cover the whole
            rotated image.

    Returns:
        ndarray: The rotated image.
    """
    if center is not None and auto_bound:
        raise ValueError('`auto_bound` conflicts with `center`')
    h, w = img.shape[:2]
    if center is None:
        center = ((w - 1) * 0.5, (h - 1) * 0.5)
    assert isinstance(center, tuple)

    matrix = cv2.getRotationMatrix2D(center, -angle, scale)
    if auto_bound:
        cos = np.abs(matrix[0, 0])
        sin = np.abs(matrix[0, 1])
        new_w = h * sin + w * cos
        new_h = h * cos + w * sin
        matrix[0, 2] += (new_w - w) * 0.5
        matrix[1, 2] += (new_h - h) * 0.5
        w = int(np.round(new_w))
        h = int(np.round(new_h))
    rotated = cv2.warpAffine(
        img,
        matrix, (w, h),
        flags=cv2_interp_codes[interpolation],
        borderValue=border_value)
    return rotated


def bbox_clip(bboxes, img_shape):
    """Clip bboxes to fit the image shape.

    Args:
        bboxes (ndarray): Shape (..., 4*k)
        img_shape (tuple[int]): (height, width) of the image.

    Returns:
        ndarray: Clipped bboxes.
    """
    assert bboxes.shape[-1] % 4 == 0
    cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype)
    cmin[0::2] = img_shape[1] - 1
    cmin[1::2] = img_shape[0] - 1
    clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0)
    return clipped_bboxes


def bbox_scaling(bboxes, scale, clip_shape=None):
    """Scaling bboxes w.r.t the box center.

    Args:
        bboxes (ndarray): Shape(..., 4).
        scale (float): Scaling factor.
        clip_shape (tuple[int], optional): If specified, bboxes that exceed the
            boundary will be clipped according to the given shape (h, w).

    Returns:
        ndarray: Scaled bboxes.
    """
    if float(scale) == 1.0:
        scaled_bboxes = bboxes.copy()
    else:
        w = bboxes[..., 2] - bboxes[..., 0] + 1
        h = bboxes[..., 3] - bboxes[..., 1] + 1
        dw = (w * (scale - 1)) * 0.5
        dh = (h * (scale - 1)) * 0.5
        scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1)
    if clip_shape is not None:
        return bbox_clip(scaled_bboxes, clip_shape)
    else:
        return scaled_bboxes


def imcrop(img, bboxes, scale=1.0, pad_fill=None):
    """Crop image patches.

    3 steps: scale the bboxes -> clip bboxes -> crop and pad.

    Args:
        img (ndarray): Image to be cropped.
        bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes.
        scale (float, optional): Scale ratio of bboxes, the default value
            1.0 means no padding.
        pad_fill (Number | list[Number]): Value to be filled for padding.
            Default: None, which means no padding.

    Returns:
        list[ndarray] | ndarray: The cropped image patches.
    """
    chn = 1 if img.ndim == 2 else img.shape[2]
    if pad_fill is not None:
        if isinstance(pad_fill, (int, float)):
            pad_fill = [pad_fill for _ in range(chn)]
        assert len(pad_fill) == chn

    _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes
    scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32)
    clipped_bbox = bbox_clip(scaled_bboxes, img.shape)

    patches = []
    for i in range(clipped_bbox.shape[0]):
        x1, y1, x2, y2 = tuple(clipped_bbox[i, :])
        if pad_fill is None:
            patch = img[y1:y2 + 1, x1:x2 + 1, ...]
        else:
            _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :])
            if chn == 1:
                patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1)
            else:
                patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn)
            patch = np.array(
                pad_fill, dtype=img.dtype) * np.ones(
                    patch_shape, dtype=img.dtype)
            x_start = 0 if _x1 >= 0 else -_x1
            y_start = 0 if _y1 >= 0 else -_y1
            w = x2 - x1 + 1
            h = y2 - y1 + 1
            patch[y_start:y_start + h, x_start:x_start + w,
                  ...] = img[y1:y1 + h, x1:x1 + w, ...]
        patches.append(patch)

    if bboxes.ndim == 1:
        return patches[0]
    else:
        return patches


def impad(img,
          *,
          shape=None,
          padding=None,
          pad_val=0,
          padding_mode='constant'):
    """Pad the given image to a certain shape or pad on all sides with
    specified padding mode and padding value.

    Args:
        img (ndarray): Image to be padded.
        shape (tuple[int]): Expected padding shape (h, w). Default: None.
        padding (int or tuple[int]): Padding on each border. If a single int is
            provided this is used to pad all borders. If tuple of length 2 is
            provided this is the padding on left/right and top/bottom
            respectively. If a tuple of length 4 is provided this is the
            padding for the left, top, right and bottom borders respectively.
            Default: None. Note that `shape` and `padding` can not be both
            set.
        pad_val (Number | Sequence[Number]): Values to be filled in padding
            areas when padding_mode is 'constant'. Default: 0.
        padding_mode (str): Type of padding. Should be: constant, edge,
            reflect or symmetric. Default: constant.

            - constant: pads with a constant value, this value is specified
              with pad_val.
            - edge: pads with the last value at the edge of the image.
            - reflect: pads with reflection of image without repeating the last
              value on the edge. For example, padding [1, 2, 3, 4] with 2
              elements on both sides in reflect mode will result in
              [3, 2, 1, 2, 3, 4, 3, 2].
            - symmetric: pads with reflection of image repeating the last value
              on the edge. For example, padding [1, 2, 3, 4] with 2 elements on
              both sides in symmetric mode will result in
              [2, 1, 1, 2, 3, 4, 4, 3]

    Returns:
        ndarray: The padded image.
    """

    assert (shape is not None) ^ (padding is not None)
    if shape is not None:
        padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0])

    # check pad_val
    if isinstance(pad_val, tuple):
        assert len(pad_val) == img.shape[-1]
    elif not isinstance(pad_val, numbers.Number):
        raise TypeError('pad_val must be a int or a tuple. '
                        f'But received {type(pad_val)}')

    # check padding
    if isinstance(padding, tuple) and len(padding) in [2, 4]:
        if len(padding) == 2:
            padding = (padding[0], padding[1], padding[0], padding[1])
    elif isinstance(padding, numbers.Number):
        padding = (padding, padding, padding, padding)
    else:
        raise ValueError('Padding must be a int or a 2, or 4 element tuple.'
                         f'But received {padding}')

    # check padding mode
    assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']

    border_type = {
        'constant': cv2.BORDER_CONSTANT,
        'edge': cv2.BORDER_REPLICATE,
        'reflect': cv2.BORDER_REFLECT_101,
        'symmetric': cv2.BORDER_REFLECT
    }
    img = cv2.copyMakeBorder(
        img,
        padding[1],
        padding[3],
        padding[0],
        padding[2],
        border_type[padding_mode],
        value=pad_val)

    return img


def impad_to_multiple(img, divisor, pad_val=0):
    """Pad an image to ensure each edge to be multiple to some number.

    Args:
        img (ndarray): Image to be padded.
        divisor (int): Padded image edges will be multiple to divisor.
        pad_val (Number | Sequence[Number]): Same as :func:`impad`.

    Returns:
        ndarray: The padded image.
    """
    pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor
    pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor
    return impad(img, shape=(pad_h, pad_w), pad_val=pad_val)


def cutout(img, shape, pad_val=0):
    """Randomly cut out a rectangle from the original img.

    Args:
        img (ndarray): Image to be cutout.
        shape (int | tuple[int]): Expected cutout shape (h, w). If given as a
            int, the value will be used for both h and w.
        pad_val (int | float | tuple[int | float]): Values to be filled in the
            cut area. Defaults to 0.

    Returns:
        ndarray: The cutout image.
    """

    channels = 1 if img.ndim == 2 else img.shape[2]
    if isinstance(shape, int):
        cut_h, cut_w = shape, shape
    else:
        assert isinstance(shape, tuple) and len(shape) == 2, \
            f'shape must be a int or a tuple with length 2, but got type ' \
            f'{type(shape)} instead.'
        cut_h, cut_w = shape
    if isinstance(pad_val, (int, float)):
        pad_val = tuple([pad_val] * channels)
    elif isinstance(pad_val, tuple):
        assert len(pad_val) == channels, \
            'Expected the num of elements in tuple equals the channels' \
            'of input image. Found {} vs {}'.format(
                len(pad_val), channels)
    else:
        raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`')

    img_h, img_w = img.shape[:2]
    y0 = np.random.uniform(img_h)
    x0 = np.random.uniform(img_w)

    y1 = int(max(0, y0 - cut_h / 2.))
    x1 = int(max(0, x0 - cut_w / 2.))
    y2 = min(img_h, y1 + cut_h)
    x2 = min(img_w, x1 + cut_w)

    if img.ndim == 2:
        patch_shape = (y2 - y1, x2 - x1)
    else:
        patch_shape = (y2 - y1, x2 - x1, channels)

    img_cutout = img.copy()
    patch = np.array(
        pad_val, dtype=img.dtype) * np.ones(
            patch_shape, dtype=img.dtype)
    img_cutout[y1:y2, x1:x2, ...] = patch

    return img_cutout


def _get_shear_matrix(magnitude, direction='horizontal'):
    """Generate the shear matrix for transformation.

    Args:
        magnitude (int | float): The magnitude used for shear.
        direction (str): The flip direction, either "horizontal"
            or "vertical".

    Returns:
        ndarray: The shear matrix with dtype float32.
    """
    if direction == 'horizontal':
        shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]])
    elif direction == 'vertical':
        shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]])
    return shear_matrix


def imshear(img,
            magnitude,
            direction='horizontal',
            border_value=0,
            interpolation='bilinear'):
    """Shear an image.

    Args:
        img (ndarray): Image to be sheared with format (h, w)
            or (h, w, c).
        magnitude (int | float): The magnitude used for shear.
        direction (str): The flip direction, either "horizontal"
            or "vertical".
        border_value (int | tuple[int]): Value used in case of a
            constant border.
        interpolation (str): Same as :func:`resize`.

    Returns:
        ndarray: The sheared image.
    """
    assert direction in ['horizontal',
                         'vertical'], f'Invalid direction: {direction}'
    height, width = img.shape[:2]
    if img.ndim == 2:
        channels = 1
    elif img.ndim == 3:
        channels = img.shape[-1]
    if isinstance(border_value, int):
        border_value = tuple([border_value] * channels)
    elif isinstance(border_value, tuple):
        assert len(border_value) == channels, \
            'Expected the num of elements in tuple equals the channels' \
            'of input image. Found {} vs {}'.format(
                len(border_value), channels)
    else:
        raise ValueError(
            f'Invalid type {type(border_value)} for `border_value`')
    shear_matrix = _get_shear_matrix(magnitude, direction)
    sheared = cv2.warpAffine(
        img,
        shear_matrix,
        (width, height),
        # Note case when the number elements in `border_value`
        # greater than 3 (e.g. shearing masks whose channels large
        # than 3) will raise TypeError in `cv2.warpAffine`.
        # Here simply slice the first 3 values in `border_value`.
        borderValue=border_value[:3],
        flags=cv2_interp_codes[interpolation])
    return sheared


def _get_translate_matrix(offset, direction='horizontal'):
    """Generate the translate matrix.

    Args:
        offset (int | float): The offset used for translate.
        direction (str): The translate direction, either
            "horizontal" or "vertical".

    Returns:
        ndarray: The translate matrix with dtype float32.
    """
    if direction == 'horizontal':
        translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]])
    elif direction == 'vertical':
        translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]])
    return translate_matrix


def imtranslate(img,
                offset,
                direction='horizontal',
                border_value=0,
                interpolation='bilinear'):
    """Translate an image.

    Args:
        img (ndarray): Image to be translated with format
            (h, w) or (h, w, c).
        offset (int | float): The offset used for translate.
        direction (str): The translate direction, either "horizontal"
            or "vertical".
        border_value (int | tuple[int]): Value used in case of a
            constant border.
        interpolation (str): Same as :func:`resize`.

    Returns:
        ndarray: The translated image.
    """
    assert direction in ['horizontal',
                         'vertical'], f'Invalid direction: {direction}'
    height, width = img.shape[:2]
    if img.ndim == 2:
        channels = 1
    elif img.ndim == 3:
        channels = img.shape[-1]
    if isinstance(border_value, int):
        border_value = tuple([border_value] * channels)
    elif isinstance(border_value, tuple):
        assert len(border_value) == channels, \
            'Expected the num of elements in tuple equals the channels' \
            'of input image. Found {} vs {}'.format(
                len(border_value), channels)
    else:
        raise ValueError(
            f'Invalid type {type(border_value)} for `border_value`.')
    translate_matrix = _get_translate_matrix(offset, direction)
    translated = cv2.warpAffine(
        img,
        translate_matrix,
        (width, height),
        # Note case when the number elements in `border_value`
        # greater than 3 (e.g. translating masks whose channels
        # large than 3) will raise TypeError in `cv2.warpAffine`.
        # Here simply slice the first 3 values in `border_value`.
        borderValue=border_value[:3],
        flags=cv2_interp_codes[interpolation])
    return translated


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/io.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import io
import os.path as osp
import warnings
from pathlib import Path

import cv2
import numpy as np
from cv2 import (IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION,
                 IMREAD_UNCHANGED)

from mmcv.fileio import FileClient
from mmcv.utils import is_filepath, is_str

try:
    from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG
except ImportError:
    TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None

try:
    from PIL import Image, ImageOps
except ImportError:
    Image = None

try:
    import tifffile
except ImportError:
    tifffile = None

jpeg = None
supported_backends = ['cv2', 'turbojpeg', 'pillow', 'tifffile']

imread_flags = {
    'color': IMREAD_COLOR,
    'grayscale': IMREAD_GRAYSCALE,
    'unchanged': IMREAD_UNCHANGED,
    'color_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_COLOR,
    'grayscale_ignore_orientation':
    IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE
}

imread_backend = 'cv2'


def use_backend(backend):
    """Select a backend for image decoding.

    Args:
        backend (str): The image decoding backend type. Options are `cv2`,
        `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG)
        and `tifffile`. `turbojpeg` is faster but it only supports `.jpeg`
        file format.
    """
    assert backend in supported_backends
    global imread_backend
    imread_backend = backend
    if imread_backend == 'turbojpeg':
        if TurboJPEG is None:
            raise ImportError('`PyTurboJPEG` is not installed')
        global jpeg
        if jpeg is None:
            jpeg = TurboJPEG()
    elif imread_backend == 'pillow':
        if Image is None:
            raise ImportError('`Pillow` is not installed')
    elif imread_backend == 'tifffile':
        if tifffile is None:
            raise ImportError('`tifffile` is not installed')


def _jpegflag(flag='color', channel_order='bgr'):
    channel_order = channel_order.lower()
    if channel_order not in ['rgb', 'bgr']:
        raise ValueError('channel order must be either "rgb" or "bgr"')

    if flag == 'color':
        if channel_order == 'bgr':
            return TJPF_BGR
        elif channel_order == 'rgb':
            return TJCS_RGB
    elif flag == 'grayscale':
        return TJPF_GRAY
    else:
        raise ValueError('flag must be "color" or "grayscale"')


def _pillow2array(img, flag='color', channel_order='bgr'):
    """Convert a pillow image to numpy array.

    Args:
        img (:obj:`PIL.Image.Image`): The image loaded using PIL
        flag (str): Flags specifying the color type of a loaded image,
            candidates are 'color', 'grayscale' and 'unchanged'.
            Default to 'color'.
        channel_order (str): The channel order of the output image array,
            candidates are 'bgr' and 'rgb'. Default to 'bgr'.

    Returns:
        np.ndarray: The converted numpy array
    """
    channel_order = channel_order.lower()
    if channel_order not in ['rgb', 'bgr']:
        raise ValueError('channel order must be either "rgb" or "bgr"')

    if flag == 'unchanged':
        array = np.array(img)
        if array.ndim >= 3 and array.shape[2] >= 3:  # color image
            array[:, :, :3] = array[:, :, (2, 1, 0)]  # RGB to BGR
    else:
        # Handle exif orientation tag
        if flag in ['color', 'grayscale']:
            img = ImageOps.exif_transpose(img)
        # If the image mode is not 'RGB', convert it to 'RGB' first.
        if img.mode != 'RGB':
            if img.mode != 'LA':
                # Most formats except 'LA' can be directly converted to RGB
                img = img.convert('RGB')
            else:
                # When the mode is 'LA', the default conversion will fill in
                #  the canvas with black, which sometimes shadows black objects
                #  in the foreground.
                #
                # Therefore, a random color (124, 117, 104) is used for canvas
                img_rgba = img.convert('RGBA')
                img = Image.new('RGB', img_rgba.size, (124, 117, 104))
                img.paste(img_rgba, mask=img_rgba.split()[3])  # 3 is alpha
        if flag in ['color', 'color_ignore_orientation']:
            array = np.array(img)
            if channel_order != 'rgb':
                array = array[:, :, ::-1]  # RGB to BGR
        elif flag in ['grayscale', 'grayscale_ignore_orientation']:
            img = img.convert('L')
            array = np.array(img)
        else:
            raise ValueError(
                'flag must be "color", "grayscale", "unchanged", '
                f'"color_ignore_orientation" or "grayscale_ignore_orientation"'
                f' but got {flag}')
    return array


def imread(img_or_path,
           flag='color',
           channel_order='bgr',
           backend=None,
           file_client_args=None):
    """Read an image.

    Note:
        In v1.4.1 and later, add `file_client_args` parameters.

    Args:
        img_or_path (ndarray or str or Path): Either a numpy array or str or
            pathlib.Path. If it is a numpy array (loaded image), then
            it will be returned as is.
        flag (str): Flags specifying the color type of a loaded image,
            candidates are `color`, `grayscale`, `unchanged`,
            `color_ignore_orientation` and `grayscale_ignore_orientation`.
            By default, `cv2` and `pillow` backend would rotate the image
            according to its EXIF info unless called with `unchanged` or
            `*_ignore_orientation` flags. `turbojpeg` and `tifffile` backend
            always ignore image's EXIF info regardless of the flag.
            The `turbojpeg` backend only supports `color` and `grayscale`.
        channel_order (str): Order of channel, candidates are `bgr` and `rgb`.
        backend (str | None): The image decoding backend type. Options are
            `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`.
            If backend is None, the global imread_backend specified by
            ``mmcv.use_backend()`` will be used. Default: None.
        file_client_args (dict | None): Arguments to instantiate a
            FileClient. See :class:`mmcv.fileio.FileClient` for details.
            Default: None.

    Returns:
        ndarray: Loaded image array.

    Examples:
        >>> import mmcv
        >>> img_path = '/path/to/img.jpg'
        >>> img = mmcv.imread(img_path)
        >>> img = mmcv.imread(img_path, flag='color', channel_order='rgb',
        ...     backend='cv2')
        >>> img = mmcv.imread(img_path, flag='color', channel_order='bgr',
        ...     backend='pillow')
        >>> s3_img_path = 's3://bucket/img.jpg'
        >>> # infer the file backend by the prefix s3
        >>> img = mmcv.imread(s3_img_path)
        >>> # manually set the file backend petrel
        >>> img = mmcv.imread(s3_img_path, file_client_args={
        ...     'backend': 'petrel'})
        >>> http_img_path = 'http://path/to/img.jpg'
        >>> img = mmcv.imread(http_img_path)
        >>> img = mmcv.imread(http_img_path, file_client_args={
        ...     'backend': 'http'})
    """

    if isinstance(img_or_path, Path):
        img_or_path = str(img_or_path)

    if isinstance(img_or_path, np.ndarray):
        return img_or_path
    elif is_str(img_or_path):
        file_client = FileClient.infer_client(file_client_args, img_or_path)
        img_bytes = file_client.get(img_or_path)
        return imfrombytes(img_bytes, flag, channel_order, backend)
    else:
        raise TypeError('"img" must be a numpy array or a str or '
                        'a pathlib.Path object')


def imfrombytes(content, flag='color', channel_order='bgr', backend=None):
    """Read an image from bytes.

    Args:
        content (bytes): Image bytes got from files or other streams.
        flag (str): Same as :func:`imread`.
        backend (str | None): The image decoding backend type. Options are
            `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. If backend is
            None, the global imread_backend specified by ``mmcv.use_backend()``
            will be used. Default: None.

    Returns:
        ndarray: Loaded image array.

    Examples:
        >>> img_path = '/path/to/img.jpg'
        >>> with open(img_path, 'rb') as f:
        >>>     img_buff = f.read()
        >>> img = mmcv.imfrombytes(img_buff)
        >>> img = mmcv.imfrombytes(img_buff, flag='color', channel_order='rgb')
        >>> img = mmcv.imfrombytes(img_buff, backend='pillow')
        >>> img = mmcv.imfrombytes(img_buff, backend='cv2')
    """

    if backend is None:
        backend = imread_backend
    if backend not in supported_backends:
        raise ValueError(
            f'backend: {backend} is not supported. Supported '
            "backends are 'cv2', 'turbojpeg', 'pillow', 'tifffile'")
    if backend == 'turbojpeg':
        img = jpeg.decode(content, _jpegflag(flag, channel_order))
        if img.shape[-1] == 1:
            img = img[:, :, 0]
        return img
    elif backend == 'pillow':
        with io.BytesIO(content) as buff:
            img = Image.open(buff)
            img = _pillow2array(img, flag, channel_order)
        return img
    elif backend == 'tifffile':
        with io.BytesIO(content) as buff:
            img = tifffile.imread(buff)
        return img
    else:
        img_np = np.frombuffer(content, np.uint8)
        flag = imread_flags[flag] if is_str(flag) else flag
        img = cv2.imdecode(img_np, flag)
        if flag == IMREAD_COLOR and channel_order == 'rgb':
            cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
        return img


def imwrite(img,
            file_path,
            params=None,
            auto_mkdir=None,
            file_client_args=None):
    """Write image to file.

    Note:
        In v1.4.1 and later, add `file_client_args` parameters.

    Warning:
        The parameter `auto_mkdir` will be deprecated in the future and every
        file clients will make directory automatically.

    Args:
        img (ndarray): Image array to be written.
        file_path (str): Image file path.
        params (None or list): Same as opencv :func:`imwrite` interface.
        auto_mkdir (bool): If the parent folder of `file_path` does not exist,
            whether to create it automatically. It will be deprecated.
        file_client_args (dict | None): Arguments to instantiate a
            FileClient. See :class:`mmcv.fileio.FileClient` for details.
            Default: None.

    Returns:
        bool: Successful or not.

    Examples:
        >>> # write to hard disk client
        >>> ret = mmcv.imwrite(img, '/path/to/img.jpg')
        >>> # infer the file backend by the prefix s3
        >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg')
        >>> # manually set the file backend petrel
        >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg', file_client_args={
        ...     'backend': 'petrel'})
    """
    assert is_filepath(file_path)
    file_path = str(file_path)
    if auto_mkdir is not None:
        warnings.warn(
            'The parameter `auto_mkdir` will be deprecated in the future and '
            'every file clients will make directory automatically.')
    file_client = FileClient.infer_client(file_client_args, file_path)
    img_ext = osp.splitext(file_path)[-1]
    # Encode image according to image suffix.
    # For example, if image path is '/path/your/img.jpg', the encode
    # format is '.jpg'.
    flag, img_buff = cv2.imencode(img_ext, img, params)
    file_client.put(img_buff.tobytes(), file_path)
    return flag


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/misc.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np

import mmcv

try:
    import torch
except ImportError:
    torch = None


def tensor2imgs(tensor, mean=None, std=None, to_rgb=True):
    """Convert tensor to 3-channel images or 1-channel gray images.

    Args:
        tensor (torch.Tensor): Tensor that contains multiple images, shape (
            N, C, H, W). :math:`C` can be either 3 or 1.
        mean (tuple[float], optional): Mean of images. If None,
            (0, 0, 0) will be used for tensor with 3-channel,
            while (0, ) for tensor with 1-channel. Defaults to None.
        std (tuple[float], optional): Standard deviation of images. If None,
            (1, 1, 1) will be used for tensor with 3-channel,
            while (1, ) for tensor with 1-channel. Defaults to None.
        to_rgb (bool, optional): Whether the tensor was converted to RGB
            format in the first place. If so, convert it back to BGR.
            For the tensor with 1 channel, it must be False. Defaults to True.

    Returns:
        list[np.ndarray]: A list that contains multiple images.
    """

    if torch is None:
        raise RuntimeError('pytorch is not installed')
    assert torch.is_tensor(tensor) and tensor.ndim == 4
    channels = tensor.size(1)
    assert channels in [1, 3]
    if mean is None:
        mean = (0, ) * channels
    if std is None:
        std = (1, ) * channels
    assert (channels == len(mean) == len(std) == 3) or \
        (channels == len(mean) == len(std) == 1 and not to_rgb)

    num_imgs = tensor.size(0)
    mean = np.array(mean, dtype=np.float32)
    std = np.array(std, dtype=np.float32)
    imgs = []
    for img_id in range(num_imgs):
        img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
        img = mmcv.imdenormalize(
            img, mean, std, to_bgr=to_rgb).astype(np.uint8)
        imgs.append(np.ascontiguousarray(img))
    return imgs


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/image/photometric.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np

from ..utils import is_tuple_of
from .colorspace import bgr2gray, gray2bgr


def imnormalize(img, mean, std, to_rgb=True):
    """Normalize an image with mean and std.

    Args:
        img (ndarray): Image to be normalized.
        mean (ndarray): The mean to be used for normalize.
        std (ndarray): The std to be used for normalize.
        to_rgb (bool): Whether to convert to rgb.

    Returns:
        ndarray: The normalized image.
    """
    img = img.copy().astype(np.float32)
    return imnormalize_(img, mean, std, to_rgb)


def imnormalize_(img, mean, std, to_rgb=True):
    """Inplace normalize an image with mean and std.

    Args:
        img (ndarray): Image to be normalized.
        mean (ndarray): The mean to be used for normalize.
        std (ndarray): The std to be used for normalize.
        to_rgb (bool): Whether to convert to rgb.

    Returns:
        ndarray: The normalized image.
    """
    # cv2 inplace normalization does not accept uint8
    assert img.dtype != np.uint8
    mean = np.float64(mean.reshape(1, -1))
    stdinv = 1 / np.float64(std.reshape(1, -1))
    if to_rgb:
        cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)  # inplace
    cv2.subtract(img, mean, img)  # inplace
    cv2.multiply(img, stdinv, img)  # inplace
    return img


def imdenormalize(img, mean, std, to_bgr=True):
    assert img.dtype != np.uint8
    mean = mean.reshape(1, -1).astype(np.float64)
    std = std.reshape(1, -1).astype(np.float64)
    img = cv2.multiply(img, std)  # make a copy
    cv2.add(img, mean, img)  # inplace
    if to_bgr:
        cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img)  # inplace
    return img


def iminvert(img):
    """Invert (negate) an image.

    Args:
        img (ndarray): Image to be inverted.

    Returns:
        ndarray: The inverted image.
    """
    return np.full_like(img, 255) - img


def solarize(img, thr=128):
    """Solarize an image (invert all pixel values above a threshold)

    Args:
        img (ndarray): Image to be solarized.
        thr (int): Threshold for solarizing (0 - 255).

    Returns:
        ndarray: The solarized image.
    """
    img = np.where(img < thr, img, 255 - img)
    return img


def posterize(img, bits):
    """Posterize an image (reduce the number of bits for each color channel)

    Args:
        img (ndarray): Image to be posterized.
        bits (int): Number of bits (1 to 8) to use for posterizing.

    Returns:
        ndarray: The posterized image.
    """
    shift = 8 - bits
    img = np.left_shift(np.right_shift(img, shift), shift)
    return img


def adjust_color(img, alpha=1, beta=None, gamma=0):
    r"""It blends the source image and its gray image:

    .. math::
        output = img * alpha + gray\_img * beta + gamma

    Args:
        img (ndarray): The input source image.
        alpha (int | float): Weight for the source image. Default 1.
        beta (int | float): Weight for the converted gray image.
            If None, it's assigned the value (1 - `alpha`).
        gamma (int | float): Scalar added to each sum.
            Same as :func:`cv2.addWeighted`. Default 0.

    Returns:
        ndarray: Colored image which has the same size and dtype as input.
    """
    gray_img = bgr2gray(img)
    gray_img = np.tile(gray_img[..., None], [1, 1, 3])
    if beta is None:
        beta = 1 - alpha
    colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma)
    if not colored_img.dtype == np.uint8:
        # Note when the dtype of `img` is not the default `np.uint8`
        # (e.g. np.float32), the value in `colored_img` got from cv2
        # is not guaranteed to be in range [0, 255], so here clip
        # is needed.
        colored_img = np.clip(colored_img, 0, 255)
    return colored_img


def imequalize(img):
    """Equalize the image histogram.

    This function applies a non-linear mapping to the input image,
    in order to create a uniform distribution of grayscale values
    in the output image.

    Args:
        img (ndarray): Image to be equalized.

    Returns:
        ndarray: The equalized image.
    """

    def _scale_channel(im, c):
        """Scale the data in the corresponding channel."""
        im = im[:, :, c]
        # Compute the histogram of the image channel.
        histo = np.histogram(im, 256, (0, 255))[0]
        # For computing the step, filter out the nonzeros.
        nonzero_histo = histo[histo > 0]
        step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255
        if not step:
            lut = np.array(range(256))
        else:
            # Compute the cumulative sum, shifted by step // 2
            # and then normalized by step.
            lut = (np.cumsum(histo) + (step // 2)) // step
            # Shift lut, prepending with 0.
            lut = np.concatenate([[0], lut[:-1]], 0)
            # handle potential integer overflow
            lut[lut > 255] = 255
        # If step is zero, return the original image.
        # Otherwise, index from lut.
        return np.where(np.equal(step, 0), im, lut[im])

    # Scales each channel independently and then stacks
    # the result.
    s1 = _scale_channel(img, 0)
    s2 = _scale_channel(img, 1)
    s3 = _scale_channel(img, 2)
    equalized_img = np.stack([s1, s2, s3], axis=-1)
    return equalized_img.astype(img.dtype)


def adjust_brightness(img, factor=1.):
    """Adjust image brightness.

    This function controls the brightness of an image. An
    enhancement factor of 0.0 gives a black image.
    A factor of 1.0 gives the original image. This function
    blends the source image and the degenerated black image:

    .. math::
        output = img * factor + degenerated * (1 - factor)

    Args:
        img (ndarray): Image to be brightened.
        factor (float): A value controls the enhancement.
            Factor 1.0 returns the original image, lower
            factors mean less color (brightness, contrast,
            etc), and higher values more. Default 1.

    Returns:
        ndarray: The brightened image.
    """
    degenerated = np.zeros_like(img)
    # Note manually convert the dtype to np.float32, to
    # achieve as close results as PIL.ImageEnhance.Brightness.
    # Set beta=1-factor, and gamma=0
    brightened_img = cv2.addWeighted(
        img.astype(np.float32), factor, degenerated.astype(np.float32),
        1 - factor, 0)
    brightened_img = np.clip(brightened_img, 0, 255)
    return brightened_img.astype(img.dtype)


def adjust_contrast(img, factor=1.):
    """Adjust image contrast.

    This function controls the contrast of an image. An
    enhancement factor of 0.0 gives a solid grey
    image. A factor of 1.0 gives the original image. It
    blends the source image and the degenerated mean image:

    .. math::
        output = img * factor + degenerated * (1 - factor)

    Args:
        img (ndarray): Image to be contrasted. BGR order.
        factor (float): Same as :func:`mmcv.adjust_brightness`.

    Returns:
        ndarray: The contrasted image.
    """
    gray_img = bgr2gray(img)
    hist = np.histogram(gray_img, 256, (0, 255))[0]
    mean = round(np.sum(gray_img) / np.sum(hist))
    degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype)
    degenerated = gray2bgr(degenerated)
    contrasted_img = cv2.addWeighted(
        img.astype(np.float32), factor, degenerated.astype(np.float32),
        1 - factor, 0)
    contrasted_img = np.clip(contrasted_img, 0, 255)
    return contrasted_img.astype(img.dtype)


def auto_contrast(img, cutoff=0):
    """Auto adjust image contrast.

    This function maximize (normalize) image contrast by first removing cutoff
    percent of the lightest and darkest pixels from the histogram and remapping
    the image so that the darkest pixel becomes black (0), and the lightest
    becomes white (255).

    Args:
        img (ndarray): Image to be contrasted. BGR order.
        cutoff (int | float | tuple): The cutoff percent of the lightest and
            darkest pixels to be removed. If given as tuple, it shall be
            (low, high). Otherwise, the single value will be used for both.
            Defaults to 0.

    Returns:
        ndarray: The contrasted image.
    """

    def _auto_contrast_channel(im, c, cutoff):
        im = im[:, :, c]
        # Compute the histogram of the image channel.
        histo = np.histogram(im, 256, (0, 255))[0]
        # Remove cut-off percent pixels from histo
        histo_sum = np.cumsum(histo)
        cut_low = histo_sum[-1] * cutoff[0] // 100
        cut_high = histo_sum[-1] - histo_sum[-1] * cutoff[1] // 100
        histo_sum = np.clip(histo_sum, cut_low, cut_high) - cut_low
        histo = np.concatenate([[histo_sum[0]], np.diff(histo_sum)], 0)

        # Compute mapping
        low, high = np.nonzero(histo)[0][0], np.nonzero(histo)[0][-1]
        # If all the values have been cut off, return the origin img
        if low >= high:
            return im
        scale = 255.0 / (high - low)
        offset = -low * scale
        lut = np.array(range(256))
        lut = lut * scale + offset
        lut = np.clip(lut, 0, 255)
        return lut[im]

    if isinstance(cutoff, (int, float)):
        cutoff = (cutoff, cutoff)
    else:
        assert isinstance(cutoff, tuple), 'cutoff must be of type int, ' \
            f'float or tuple, but got {type(cutoff)} instead.'
    # Auto adjusts contrast for each channel independently and then stacks
    # the result.
    s1 = _auto_contrast_channel(img, 0, cutoff)
    s2 = _auto_contrast_channel(img, 1, cutoff)
    s3 = _auto_contrast_channel(img, 2, cutoff)
    contrasted_img = np.stack([s1, s2, s3], axis=-1)
    return contrasted_img.astype(img.dtype)


def adjust_sharpness(img, factor=1., kernel=None):
    """Adjust image sharpness.

    This function controls the sharpness of an image. An
    enhancement factor of 0.0 gives a blurred image. A
    factor of 1.0 gives the original image. And a factor
    of 2.0 gives a sharpened image. It blends the source
    image and the degenerated mean image:

    .. math::
        output = img * factor + degenerated * (1 - factor)

    Args:
        img (ndarray): Image to be sharpened. BGR order.
        factor (float): Same as :func:`mmcv.adjust_brightness`.
        kernel (np.ndarray, optional): Filter kernel to be applied on the img
            to obtain the degenerated img. Defaults to None.

    Note:
        No value sanity check is enforced on the kernel set by users. So with
        an inappropriate kernel, the ``adjust_sharpness`` may fail to perform
        the function its name indicates but end up performing whatever
        transform determined by the kernel.

    Returns:
        ndarray: The sharpened image.
    """

    if kernel is None:
        # adopted from PIL.ImageFilter.SMOOTH
        kernel = np.array([[1., 1., 1.], [1., 5., 1.], [1., 1., 1.]]) / 13
    assert isinstance(kernel, np.ndarray), \
        f'kernel must be of type np.ndarray, but got {type(kernel)} instead.'
    assert kernel.ndim == 2, \
        f'kernel must have a dimension of 2, but got {kernel.ndim} instead.'

    degenerated = cv2.filter2D(img, -1, kernel)
    sharpened_img = cv2.addWeighted(
        img.astype(np.float32), factor, degenerated.astype(np.float32),
        1 - factor, 0)
    sharpened_img = np.clip(sharpened_img, 0, 255)
    return sharpened_img.astype(img.dtype)


def adjust_lighting(img, eigval, eigvec, alphastd=0.1, to_rgb=True):
    """AlexNet-style PCA jitter.

    This data augmentation is proposed in `ImageNet Classification with Deep
    Convolutional Neural Networks
    <https://dl.acm.org/doi/pdf/10.1145/3065386>`_.

    Args:
        img (ndarray): Image to be adjusted lighting. BGR order.
        eigval (ndarray): the eigenvalue of the convariance matrix of pixel
            values, respectively.
        eigvec (ndarray): the eigenvector of the convariance matrix of pixel
            values, respectively.
        alphastd (float): The standard deviation for distribution of alpha.
            Defaults to 0.1
        to_rgb (bool): Whether to convert img to rgb.

    Returns:
        ndarray: The adjusted image.
    """
    assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), \
        f'eigval and eigvec should both be of type np.ndarray, got ' \
        f'{type(eigval)} and {type(eigvec)} instead.'

    assert eigval.ndim == 1 and eigvec.ndim == 2
    assert eigvec.shape == (3, eigval.shape[0])
    n_eigval = eigval.shape[0]
    assert isinstance(alphastd, float), 'alphastd should be of type float, ' \
        f'got {type(alphastd)} instead.'

    img = img.copy().astype(np.float32)
    if to_rgb:
        cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)  # inplace

    alpha = np.random.normal(0, alphastd, n_eigval)
    alter = eigvec \
        * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) \
        * np.broadcast_to(eigval.reshape(1, n_eigval), (3, n_eigval))
    alter = np.broadcast_to(alter.sum(axis=1).reshape(1, 1, 3), img.shape)
    img_adjusted = img + alter
    return img_adjusted


def lut_transform(img, lut_table):
    """Transform array by look-up table.

    The function lut_transform fills the output array with values from the
    look-up table. Indices of the entries are taken from the input array.

    Args:
        img (ndarray): Image to be transformed.
        lut_table (ndarray): look-up table of 256 elements; in case of
            multi-channel input array, the table should either have a single
            channel (in this case the same table is used for all channels) or
            the same number of channels as in the input array.

    Returns:
        ndarray: The transformed image.
    """
    assert isinstance(img, np.ndarray)
    assert 0 <= np.min(img) and np.max(img) <= 255
    assert isinstance(lut_table, np.ndarray)
    assert lut_table.shape == (256, )

    return cv2.LUT(np.array(img, dtype=np.uint8), lut_table)


def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)):
    """Use CLAHE method to process the image.

    See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J].
    Graphics Gems, 1994:474-485.` for more information.

    Args:
        img (ndarray): Image to be processed.
        clip_limit (float): Threshold for contrast limiting. Default: 40.0.
        tile_grid_size (tuple[int]): Size of grid for histogram equalization.
            Input image will be divided into equally sized rectangular tiles.
            It defines the number of tiles in row and column. Default: (8, 8).

    Returns:
        ndarray: The processed image.
    """
    assert isinstance(img, np.ndarray)
    assert img.ndim == 2
    assert isinstance(clip_limit, (float, int))
    assert is_tuple_of(tile_grid_size, int)
    assert len(tile_grid_size) == 2

    clahe = cv2.createCLAHE(clip_limit, tile_grid_size)
    return clahe.apply(np.array(img, dtype=np.uint8))


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/onnx/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .info import is_custom_op_loaded
from .symbolic import register_extra_symbolics

__all__ = ['register_extra_symbolics', 'is_custom_op_loaded']


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/onnx/info.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os

import torch


def is_custom_op_loaded():
    flag = False
    try:
        from ..tensorrt import is_tensorrt_plugin_loaded
        flag = is_tensorrt_plugin_loaded()
    except (ImportError, ModuleNotFoundError):
        pass
    if not flag:
        try:
            from ..ops import get_onnxruntime_op_path
            ort_lib_path = get_onnxruntime_op_path()
            flag = os.path.exists(ort_lib_path)
        except (ImportError, ModuleNotFoundError):
            pass
    return flag or torch.__version__ == 'parrots'


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/onnx/onnx_utils/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/onnx/onnx_utils/symbolic_helper.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
"""Modified from https://github.com/pytorch/pytorch."""
import warnings
from functools import wraps
from sys import maxsize

import torch
import torch.onnx
# This import monkey-patches graph manipulation methods on Graph, used for the
# ONNX symbolics
import torch.onnx.utils
from torch._C import ListType

# ---------------------------------------------------------------------------------
# Helper functions
# ---------------------------------------------------------------------------------

# Save some builtins as locals, because we'll shadown them below
_sum = sum


def _parse_arg(value, desc):
    if desc == 'none':
        return value
    if desc == 'v' or not _is_value(value):
        return value
    if value.node().mustBeNone():
        return None
    if value.node().kind() == 'onnx::Constant':
        tval = value.node()['value']
        if desc == 'i':
            return int(tval)
        elif desc == 'f':
            return float(tval)
        elif desc == 'b':
            return bool(tval)
        elif desc == 's':
            return str(tval)
        elif desc == 't':
            return tval
        elif desc == 'is':
            return [int(v) for v in tval]
        elif desc == 'fs':
            return [float(v) for v in tval]
        else:
            raise RuntimeError(
                "ONNX symbolic doesn't know to interpret Constant node")
    elif value.node().kind() == 'prim::ListConstruct':
        if desc == 'is':
            for v in value.node().inputs():
                if v.node().kind() != 'onnx::Constant':
                    raise RuntimeError(
                        "Failed to export an ONNX attribute '" +
                        v.node().kind() +
                        "', since it's not constant, please try to make "
                        'things (e.g., kernel size) static if possible')
            return [int(v.node()['value']) for v in value.node().inputs()]
        else:
            raise RuntimeError(
                "ONNX symbolic doesn't know to interpret ListConstruct node")

    raise RuntimeError('Unexpected node type: {}'.format(value.node().kind()))


def _maybe_get_const(value, desc):
    if _is_value(value) and value.node().kind() == 'onnx::Constant':
        return _parse_arg(value, desc)
    return value


def _maybe_get_scalar(value):
    value_t = _maybe_get_const(value, 't')
    if isinstance(value_t, torch.Tensor) and value_t.shape == ():
        return value_t
    return value


def _get_const(value, desc, arg_name):
    if _is_value(value) and value.node().kind() not in ('onnx::Constant',
                                                        'prim::Constant'):
        raise RuntimeError('ONNX symbolic expected a constant'
                           ' value of the {} argument, got `{}`'.format(
                               arg_name, value))
    return _parse_arg(value, desc)


def _unpack_list(list_value):
    list_node = list_value.node()
    assert list_node.kind() == 'prim::ListConstruct'
    return list(list_node.inputs())


# Check if list_value is output from prim::ListConstruct
# This is usually called before _unpack_list to ensure the list can be
# unpacked.
def _is_packed_list(list_value):
    return _is_value(
        list_value) and list_value.node().kind() == 'prim::ListConstruct'


def parse_args(*arg_descriptors):

    def decorator(fn):
        fn._arg_descriptors = arg_descriptors

        def wrapper(g, *args):
            # some args may be optional, so the length may be smaller
            assert len(arg_descriptors) >= len(args)
            args = [
                _parse_arg(arg, arg_desc)
                for arg, arg_desc in zip(args, arg_descriptors)
            ]
            return fn(g, *args)

        # In Python 2 functools.wraps chokes on partially applied functions, so
        # we need this as a workaround
        try:
            wrapper = wraps(fn)(wrapper)
        except Exception:
            pass
        return wrapper

    return decorator


def _scalar(x):
    """Convert a scalar tensor into a Python value."""
    assert x.numel() == 1
    return x.item()


def _if_scalar_type_as(g, self, tensor):
    """Convert self into the same type of tensor, as necessary."""
    if isinstance(self, torch._C.Value):
        return self

    scalar_type = tensor.type().scalarType()
    if scalar_type:
        ty = scalar_type.lower()
        return getattr(self, ty)()

    return self


def _is_none(x):
    return x.node().mustBeNone()


def _is_value(x):
    return isinstance(x, torch._C.Value)


def _is_tensor_list(x):
    return x.type().isSubtypeOf(ListType.ofTensors())


def _unimplemented(op, msg):
    warnings.warn('ONNX export failed on ' + op + ' because ' + msg +
                  ' not supported')


def _try_get_scalar_type(*args):
    for arg in args:
        try:
            return arg.type().scalarType()
        except RuntimeError:
            pass
    return None


def _topk_helper(g, input, k, dim, largest=True, sorted=False, out=None):
    if out is not None:
        _unimplemented('TopK', 'Out parameter is not supported')
    if not _is_value(k):
        k = g.op('Constant', value_t=torch.tensor([k], dtype=torch.int64))
    else:
        k = g.op('Reshape', k, g.op('Constant', value_t=torch.tensor([1])))
    return g.op(
        'TopK',
        input,
        k,
        axis_i=dim,
        largest_i=largest,
        sorted_i=sorted,
        outputs=2)


def _slice_helper(g,
                  input,
                  axes,
                  starts,
                  ends,
                  steps=None,
                  dynamic_slice=False):
    # TODO(ruobing): add support for opset<10
    from torch.onnx.symbolic_opset10 import _slice
    return _slice(g, input, axes, starts, ends, steps, dynamic_slice)


def _unsqueeze_helper(g, input, dim):
    from torch.onnx.symbolic_opset9 import unsqueeze
    return unsqueeze(g, input, dim)


def _interpolate_size_to_scales(g, input, output_size, dim):
    output_size = _maybe_get_const(output_size, 'is')
    if _is_value(output_size):
        offset = 2
        offsets = g.op(
            'Constant', value_t=torch.ones(offset, dtype=torch.float32))
        dividend = g.op(
            'Cast', output_size, to_i=cast_pytorch_to_onnx['Float'])
        divisor = _slice_helper(
            g, g.op('Shape', input), axes=[0], ends=[maxsize], starts=[offset])
        divisor = g.op('Cast', divisor, to_i=cast_pytorch_to_onnx['Float'])
        scale_dims = g.op('Div', dividend, divisor)
        scales = g.op('Concat', offsets, scale_dims, axis_i=0)
    else:
        scales_constant = [
            1. if i < 2 else float(output_size[-(dim - i)]) /
            float(input.type().sizes()[-(dim - i)]) for i in range(0, dim)
        ]
        scales = g.op(
            'Constant',
            value_t=torch.tensor(scales_constant, dtype=torch.float32))
    return scales


def _interpolate_get_scales_if_available(g, scales):
    if len(scales) == 0:
        return None
    # scales[0] is NoneType in Pytorch == 1.5.1
    # scales[0] is TensorType with sizes = [] in Pytorch == 1.6.0
    # scales[0] is ListType in Pytorch == 1.7.0
    # scales[0] is TensorType with sizes = [2] in Pytorch == 1.8.0
    scale_desc = 'fs' if scales[0].type().kind() == 'ListType' or (
        scales[0].type().kind() == 'TensorType' and
        (sum(scales[0].type().sizes()) > 1)) else 'f'
    available_scales = _maybe_get_const(
        scales[0], scale_desc) != -1 and not _is_none(scales[0])

    if not available_scales:
        return None

    offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32))
    if scale_desc == 'fs':
        scales_list = g.op(
            'Constant',
            value_t=torch.tensor(_maybe_get_const(scales[0], scale_desc)))
        # modify to support PyTorch==1.7.0
        # https://github.com/pytorch/pytorch/blob/75ee5756715e7161314ce037474843b68f69fc04/torch/onnx/symbolic_helper.py#L375 # noqa: E501
        scales = g.op('Concat', offsets, scales_list, axis_i=0)
    else:
        # for PyTorch < 1.7.0
        scales_list = []
        for scale in scales:
            unsqueezed_scale = _unsqueeze_helper(g, scale, 0)
            # ONNX only supports float for the scales. double -> float.
            unsqueezed_scale = g.op(
                'Cast', unsqueezed_scale, to_i=cast_pytorch_to_onnx['Float'])
            scales_list.append(unsqueezed_scale)
        scales = g.op('Concat', offsets, *scales_list, axis_i=0)
    return scales


def _get_interpolate_attributes(g, mode, args):
    if mode == 'nearest':
        align_corners = None
        scales = args[0:]
    else:
        align_corners = args[0]
        scales = args[1:]
    scales = _interpolate_get_scales_if_available(g, scales)
    return scales, align_corners


def _interpolate_get_scales(g, scale_factor, dim):
    offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32))
    if isinstance(scale_factor.type(), torch._C.ListType):
        return g.op('Concat', offsets, scale_factor, axis_i=0)
    else:
        scale_factor = _unsqueeze_helper(g, scale_factor, 0)
        scale_factor = g.op(
            'Cast', scale_factor, to_i=cast_pytorch_to_onnx['Float'])
        scales = [scale_factor for i in range(dim - 2)]
    scale_factor = g.op('Concat', offsets, *scales, axis_i=0)
    return scale_factor


def _size_helper(g, self, dim):
    full_shape = g.op('Shape', self)
    from torch.onnx.symbolic_opset9 import select
    return select(g, full_shape, g.op('Constant', value_t=torch.tensor([0])),
                  dim)


def _avgpool_helper(tuple_fn, padding, kernel_size, stride, divisor_override,
                    name):
    if divisor_override and divisor_override.node().kind() != 'prim::Constant':
        return _unimplemented(name, 'divisor_override')
    if not stride:
        stride = kernel_size
    padding = tuple(tuple_fn(padding))
    return padding


# Metaprogram symbolics for each ATen native specialized cast operator.
# For e.g. we specify a function named `_cast_uint8_t` that instantiates an
# ONNX cast node with `to` attribute 'UINT8'
#
# TODO: remove these once we support Type's in the JIT IR and we can once again
# use the unified toType operator
cast_pytorch_to_onnx = {
    'Byte': torch.onnx.TensorProtoDataType.UINT8,
    'Char': torch.onnx.TensorProtoDataType.INT8,
    'Double': torch.onnx.TensorProtoDataType.DOUBLE,
    'Float': torch.onnx.TensorProtoDataType.FLOAT,
    'Half': torch.onnx.TensorProtoDataType.FLOAT16,
    'Int': torch.onnx.TensorProtoDataType.INT32,
    'Long': torch.onnx.TensorProtoDataType.INT64,
    'Short': torch.onnx.TensorProtoDataType.INT16,
    'Bool': torch.onnx.TensorProtoDataType.BOOL,
    'ComplexFloat': torch.onnx.TensorProtoDataType.COMPLEX64,
    'ComplexDouble': torch.onnx.TensorProtoDataType.COMPLEX128,
    'Undefined': torch.onnx.TensorProtoDataType.UNDEFINED,
}

# Global set to store the list of quantized operators in the network.
# This is currently only used in the conversion of quantized ops from PT
# -> C2 via ONNX.
_quantized_ops = set()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/onnx/symbolic.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
"""Modified from https://github.com/pytorch/pytorch."""
import os

import numpy as np
import torch
from torch.nn.modules.utils import _pair, _single, _triple
from torch.onnx.symbolic_helper import parse_args
from torch.onnx.symbolic_registry import register_op

from .onnx_utils import symbolic_helper as sym_help


def _interpolate(name, dim, interpolate_mode):

    def symbolic_fn(g, input, output_size, *args):
        scales, align_corners = sym_help._get_interpolate_attributes(
            g, interpolate_mode, args)
        align_corners = sym_help._maybe_get_scalar(align_corners)
        transformation_mode = 'asymmetric' \
            if interpolate_mode == 'nearest' \
            else 'align_corners' if align_corners else 'pytorch_half_pixel'
        empty_tensor = g.op(
            'Constant', value_t=torch.tensor([], dtype=torch.float32))

        if scales is None:
            if 'ONNX_BACKEND' in os.environ and os.environ[
                    'ONNX_BACKEND'] == 'TensorRT':
                input_size = input.type().sizes()
                # slice the first two dim
                input_size = input_size[:2]
                # convert output_size to int type
                output_size = sym_help._maybe_get_const(output_size, 'is')
                input_size.extend(output_size)
                output_size = g.op(
                    'Constant',
                    value_t=torch.tensor(input_size, dtype=torch.int64))
            else:
                input_size = g.op('Shape', input)
                input_size_beg = sym_help._slice_helper(
                    g, input_size, axes=[0], ends=[2], starts=[0])
                output_size = g.op(
                    'Cast',
                    output_size,
                    to_i=sym_help.cast_pytorch_to_onnx['Long'])
                output_size = g.op(
                    'Concat', input_size_beg, output_size, axis_i=0)
            scales = g.op(
                'Constant', value_t=torch.tensor([], dtype=torch.float32))
            return g.op(
                'Resize',
                input,
                empty_tensor,
                # roi only takes effect with
                # coordinate_transformation_mode="tf_crop_and_resize"
                scales,  # scales is not needed since we are sending out_size
                output_size,
                coordinate_transformation_mode_s=transformation_mode,
                cubic_coeff_a_f=-0.75,  # only valid when mode="cubic"
                mode_s=interpolate_mode,  # nearest, linear, or cubic
                nearest_mode_s='floor')  # only valid when mode="nearest"
        else:
            return g.op(
                'Resize',
                input,
                empty_tensor,
                # roi only takes effect with
                # coordinate_transformation_mode="tf_crop_and_resize"
                scales,  # scales is not needed since we are sending out_size
                coordinate_transformation_mode_s=transformation_mode,
                cubic_coeff_a_f=-0.75,  # only valid when mode="cubic"
                mode_s=interpolate_mode,  # nearest, linear, or cubic
                nearest_mode_s='floor')  # only valid when mode="nearest"

    return symbolic_fn


upsample_nearest1d = _interpolate('upsample_nearest1d', 3, 'nearest')
upsample_nearest2d = _interpolate('upsample_nearest2d', 4, 'nearest')
upsample_nearest3d = _interpolate('upsample_nearest3d', 5, 'nearest')
upsample_linear1d = _interpolate('upsample_linear1d', 3, 'linear')
upsample_bilinear2d = _interpolate('upsample_bilinear2d', 4, 'linear')
upsample_trilinear3d = _interpolate('upsample_trilinear3d', 5, 'linear')
upsample_bicubic2d = _interpolate('upsample_bicubic2d', 4, 'cubic')


@parse_args('v', 'v', 'i', 'i', 'i', 'none')
def topk(g, self, k, dim, largest, sorted, out=None):
    return sym_help._topk_helper(
        g, self, k, dim, largest=largest, sorted=sorted, out=out)


def masked_select(g, self, mask):
    from torch.onnx.symbolic_opset9 import expand_as, nonzero
    index = nonzero(g, expand_as(g, mask, self))
    return g.op('GatherND', self, index)


def _prepare_onnx_paddings(g, dim, pad):
    pad_len = torch.onnx.symbolic_opset9.size(
        g, pad, g.op('Constant', value_t=torch.tensor([0])))
    # Set extension = [0] * (dim * 2 - len(pad))
    extension = g.op(
        'Sub',
        g.op('Mul',
             g.op('Constant', value_t=torch.tensor(dim, dtype=torch.int64)),
             g.op('Constant', value_t=torch.tensor(2, dtype=torch.int64))),
        pad_len)
    pad = g.op('Cast', pad, to_i=sym_help.cast_pytorch_to_onnx['Long'])
    paddings = g.op(
        'Concat',
        pad,
        g.op(
            'ConstantOfShape',
            extension,
            value_t=torch.tensor([0], dtype=torch.int64)),
        axis_i=0)
    paddings = g.op('Reshape', paddings,
                    g.op('Constant', value_t=torch.tensor([-1, 2])))
    paddings = g.op(
        'Transpose',
        torch.onnx.symbolic_opset10.flip(g, paddings, [0]),
        perm_i=[1, 0])
    paddings = g.op('Reshape', paddings,
                    g.op('Constant', value_t=torch.tensor([-1])))
    padding_c = g.op(
        'Cast', paddings, to_i=sym_help.cast_pytorch_to_onnx['Long'])
    return padding_c


def constant_pad_nd(g, input, padding, value=None):
    mode = 'constant'
    value = sym_help._maybe_get_scalar(value)
    value = sym_help._if_scalar_type_as(g, value, input)
    pad = _prepare_onnx_paddings(g, input.type().dim(), padding)
    return g.op('Pad', input, pad, value, mode_s=mode)


def reflection_pad(g, input, padding):
    mode = 'reflect'
    paddings = _prepare_onnx_paddings(g, input.type().dim(), padding)
    return g.op('Pad', input, paddings, mode_s=mode)


reflection_pad1d = reflection_pad
reflection_pad2d = reflection_pad
reflection_pad3d = reflection_pad


def _avg_pool(name, tuple_fn):

    @parse_args('v', 'is', 'is', 'is', 'i', 'i', 'none')
    def symbolic_fn(g,
                    input,
                    kernel_size,
                    stride,
                    padding,
                    ceil_mode,
                    count_include_pad,
                    divisor_override=None):
        padding = sym_help._avgpool_helper(tuple_fn, padding, kernel_size,
                                           stride, divisor_override, name)
        if not stride:
            stride = kernel_size
        if count_include_pad:
            input = g.op(
                'Pad',
                input,
                g.op(
                    'Constant',
                    value_t=torch.tensor(((0, ) * 2 + padding) * 2)),
                mode_s='constant')
            padding = (0, ) * len(padding)
        output = g.op(
            'AveragePool',
            input,
            kernel_shape_i=tuple_fn(kernel_size),
            strides_i=tuple_fn(stride),
            pads_i=padding * 2,
            ceil_mode_i=ceil_mode)
        return output

    return symbolic_fn


avg_pool1d = _avg_pool('avg_pool1d', _single)
avg_pool2d = _avg_pool('avg_pool2d', _pair)
avg_pool3d = _avg_pool('avg_pool3d', _triple)


def _get_im2col_indices_along_dim(g, input_d, kernel_size_d, dilation_d,
                                  padding_d, stride_d):
    # Input is always 4-D (N, C, H, W)
    # Calculate indices of sliding blocks along spatial dimension
    # Slide kernel over input each dim d:
    # each dimension d ranges from 0 to
    # input[d]+2xpadding[d]-dilation[d]x(kernel_size[d]-1)
    # with steps = stride

    blocks_d = g.op('Add', input_d,
                    g.op('Constant', value_t=torch.tensor(padding_d * 2)))
    blocks_d = g.op(
        'Sub', blocks_d,
        g.op(
            'Constant',
            value_t=torch.tensor(dilation_d * (kernel_size_d - 1))))

    # Stride kernel over input and find starting indices along dim d
    blocks_d_indices = g.op('Range', g.op('Constant', value_t=torch.tensor(0)),
                            blocks_d,
                            g.op('Constant', value_t=torch.tensor(stride_d)))

    # Apply dilation on kernel and find its indices along dim d
    kernel_grid = np.arange(0, kernel_size_d * dilation_d, dilation_d)
    kernel_grid = g.op('Constant', value_t=torch.tensor([kernel_grid]))

    # Broadcast and add kernel staring positions (indices) with
    # kernel_grid along dim d, to get block indices along dim d
    blocks_d_indices = g.op(
        'Unsqueeze', blocks_d_indices, axes_i=[0])  # Reshape to [1, -1]
    kernel_mask = g.op('Reshape', kernel_grid,
                       g.op('Constant', value_t=torch.tensor([-1, 1])))
    block_mask = g.op('Add', blocks_d_indices, kernel_mask)

    return block_mask


def _get_im2col_padded_input(g, input, padding_h, padding_w):
    # Input is always 4-D tensor (N, C, H, W)
    # Padding tensor has the following format: (padding_h, padding_w)
    # Reshape the padding to follow ONNX format:
    # (dim1_begin, dim2_begin,...,dim1_end, dim2_end,...)
    pad = g.op(
        'Constant', value_t=torch.LongTensor([0, 0, padding_h, padding_w] * 2))
    return g.op('Pad', input, pad)


def _get_im2col_output_shape(g, input, kernel_h, kernel_w):
    batch_dim = size(g, input, g.op('Constant', value_t=torch.tensor(0)))
    channel_dim = size(g, input, g.op('Constant', value_t=torch.tensor(1)))
    channel_unfolded = g.op(
        'Mul', channel_dim,
        g.op('Constant', value_t=torch.tensor(kernel_h * kernel_w)))

    return g.op(
        'Concat',
        g.op('Unsqueeze', batch_dim, axes_i=[0]),
        g.op('Unsqueeze', channel_unfolded, axes_i=[0]),
        g.op('Constant', value_t=torch.tensor([-1])),
        axis_i=0)


def size(g, self, dim=None):
    if dim is None:
        return g.op('Shape', self)
    return sym_help._size_helper(g, self, dim)


@parse_args('v', 'is', 'is', 'is', 'is')
def im2col(g, input, kernel_size, dilation, padding, stride):
    # Input is always 4-D tensor (N, C, H, W)
    # All other args are int[2]

    input_h = size(g, input, g.op('Constant', value_t=torch.tensor(2)))
    input_w = size(g, input, g.op('Constant', value_t=torch.tensor(3)))

    stride_h, stride_w = stride[0], stride[1]
    padding_h, padding_w = padding[0], padding[1]
    dilation_h, dilation_w = dilation[0], dilation[1]
    kernel_h, kernel_w = kernel_size[0], kernel_size[1]

    blocks_row_indices = _get_im2col_indices_along_dim(g, input_h, kernel_h,
                                                       dilation_h, padding_h,
                                                       stride_h)
    blocks_col_indices = _get_im2col_indices_along_dim(g, input_w, kernel_w,
                                                       dilation_w, padding_w,
                                                       stride_w)

    output_shape = _get_im2col_output_shape(g, input, kernel_h, kernel_w)
    padded_input = _get_im2col_padded_input(g, input, padding_h, padding_w)

    output = g.op('Gather', padded_input, blocks_row_indices, axis_i=2)
    output = g.op('Gather', output, blocks_col_indices, axis_i=4)
    output = g.op('Transpose', output, perm_i=[0, 1, 2, 4, 3, 5])
    return g.op('Reshape', output, output_shape)


@parse_args('v', 'i')
def one_hot(g, self, num_classes):
    values = g.op('Constant', value_t=torch.LongTensor([0, 1]))
    depth = g.op('Constant', value_t=torch.LongTensor([num_classes]))
    return g.op('OneHot', self, depth, values, axis_i=-1)


@parse_args('v', 'i', 'none')
def softmax(g, input, dim, dtype=None):
    input_dim = input.type().dim()
    if input_dim:
        # TODO: remove this as onnx opset 11 spec allows negative axes
        if dim < 0:
            dim = input_dim + dim
        if input_dim == dim + 1:
            softmax = g.op('Softmax', input, axis_i=dim)
            if dtype and dtype.node().kind() != 'prim::Constant':
                parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype')
                softmax = g.op(
                    'Cast',
                    softmax,
                    to_i=sym_help.scalar_type_to_onnx[parsed_dtype])
            return softmax

    max_value = g.op('ReduceMax', input, axes_i=[dim], keepdims_i=1)
    input = g.op('Sub', input, max_value)
    exp = g.op('Exp', input)
    sum = g.op('ReduceSum', exp, axes_i=[dim])
    softmax = g.op('Div', exp, sum)
    if dtype and dtype.node().kind() != 'prim::Constant':
        parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype')
        softmax = g.op(
            'Cast', softmax, to_i=sym_help.scalar_type_to_onnx[parsed_dtype])
    return softmax


def _adaptive_pool(name, type, tuple_fn, fn=None):

    @parse_args('v', 'is')
    def symbolic_fn(g, input, output_size):
        if output_size == [1] * len(output_size) and type == 'AveragePool':
            return g.op('GlobalAveragePool', input)
        if not input.isCompleteTensor():
            if output_size == [1] * len(output_size):
                return g.op('GlobalMaxPool', input), None
            raise NotImplementedError(
                '[Adaptive pool]:input size not accessible')
        dim = input.type().sizes()[2:]
        if output_size == [1] * len(output_size) and type == 'MaxPool':
            return g.op('GlobalMaxPool', input), None

        # compute stride = floor(input_size / output_size)
        s = [int(dim[i] / output_size[i]) for i in range(0, len(dim))]

        # compute kernel_size = input_size - (output_size - 1) * stride
        k = [dim[i] - (output_size[i] - 1) * s[i] for i in range(0, len(dim))]

        # call max_poolxd_with_indices to get indices in the output
        if type == 'MaxPool':
            return fn(g, input, k, k, (0, ) * len(dim), (1, ) * len(dim),
                      False)
        output = g.op(
            type,
            input,
            kernel_shape_i=tuple_fn(k),
            strides_i=tuple_fn(s),
            ceil_mode_i=False)
        return output

    return symbolic_fn


adaptive_avg_pool1d = _adaptive_pool('adaptive_avg_pool1d', 'AveragePool',
                                     _single)
adaptive_avg_pool2d = _adaptive_pool('adaptive_avg_pool2d', 'AveragePool',
                                     _pair)
adaptive_avg_pool3d = _adaptive_pool('adaptive_avg_pool3d', 'AveragePool',
                                     _triple)


def new_full(g,
             self,
             size,
             fill_value,
             dtype,
             layout,
             device,
             pin_memory=False):
    from torch.onnx.symbolic_opset9 import full
    if dtype is None and self.isCompleteTensor():
        dtype = self.type().scalarType()
        dtype = sym_help.scalar_type_to_onnx.index(
            sym_help.cast_pytorch_to_onnx[dtype])
    return full(g, size, fill_value, dtype, layout, device, pin_memory)


@parse_args('v', 'v', 'i', 'i', 'i')
def grid_sampler(g,
                 input,
                 grid,
                 interpolation_mode,
                 padding_mode,
                 align_corners=False):
    return g.op(
        'mmcv::grid_sampler',
        input,
        grid,
        interpolation_mode_i=interpolation_mode,
        padding_mode_i=padding_mode,
        align_corners_i=align_corners)


@parse_args('v', 'i')
def cummax(g, input, dim):
    return g.op('mmcv::cummax', input, dim_i=dim, outputs=2)


@parse_args('v', 'i')
def cummin(g, input, dim):
    return g.op('mmcv::cummin', input, dim_i=dim, outputs=2)


@parse_args('v', 'v', 'is')
def roll(g, input, shifts, dims):
    from torch.onnx.symbolic_opset9 import squeeze
    from packaging import version
    input_shape = g.op('Shape', input)

    need_flatten = len(dims) == 0
    # If dims is not specified, the tensor will be flattened before
    # rolling and then restored to the original shape.
    if need_flatten:
        resize_shape = input_shape
        input = g.op('Reshape', input,
                     g.op('Constant', value_t=torch.LongTensor([1, -1])))
        input_shape = g.op('Shape', input)
        dims = [1]

    for index, dim in enumerate(dims):
        end_size = sym_help._slice_helper(
            g, input_shape, axes=[0], ends=[dim + 1], starts=[dim])
        shift_size = sym_help._slice_helper(
            g, shifts, axes=[0], ends=[index + 1], starts=[index])
        slice_size = g.op('Sub', end_size, shift_size)

        # Can not use Mod because tensorrt does not support
        div_size = g.op('Div', slice_size, end_size)
        slice_size = g.op('Sub', slice_size, g.op('Mul', end_size, div_size))

        if version.parse(torch.__version__) >= version.parse('1.7.0'):
            # add dim=0 for pytorch 1.9.0
            end_size = squeeze(g, end_size, 0)
            slice_size = squeeze(g, slice_size, 0)
        else:
            end_size = g.op('Squeeze', end_size)
            slice_size = g.op('Squeeze', slice_size)
            dim = torch.LongTensor([dim])

        input_slice0 = sym_help._slice_helper(
            g,
            input,
            axes=dim,
            starts=torch.LongTensor([0]),
            ends=slice_size,
            dynamic_slice=True)
        input_slice1 = sym_help._slice_helper(
            g,
            input,
            axes=dim,
            ends=end_size,
            starts=slice_size,
            dynamic_slice=True)

        input = g.op('Concat', input_slice1, input_slice0, axis_i=dim)

    if need_flatten:
        input = g.op('Reshape', input, resize_shape)

    return input


def register_extra_symbolics(opset=11):
    register_op('one_hot', one_hot, '', opset)
    register_op('im2col', im2col, '', opset)
    register_op('topk', topk, '', opset)
    register_op('softmax', softmax, '', opset)
    register_op('constant_pad_nd', constant_pad_nd, '', opset)
    register_op('reflection_pad1d', reflection_pad1d, '', opset)
    register_op('reflection_pad2d', reflection_pad2d, '', opset)
    register_op('reflection_pad3d', reflection_pad3d, '', opset)
    register_op('avg_pool1d', avg_pool1d, '', opset)
    register_op('avg_pool2d', avg_pool2d, '', opset)
    register_op('avg_pool3d', avg_pool3d, '', opset)
    register_op('adaptive_avg_pool1d', adaptive_avg_pool1d, '', opset)
    register_op('adaptive_avg_pool2d', adaptive_avg_pool2d, '', opset)
    register_op('adaptive_avg_pool3d', adaptive_avg_pool3d, '', opset)
    register_op('masked_select', masked_select, '', opset)
    register_op('upsample_nearest1d', upsample_nearest1d, '', opset)
    register_op('upsample_nearest2d', upsample_nearest2d, '', opset)
    register_op('upsample_nearest3d', upsample_nearest3d, '', opset)
    register_op('upsample_linear1d', upsample_linear1d, '', opset)
    register_op('upsample_bilinear2d', upsample_bilinear2d, '', opset)
    register_op('upsample_trilinear3d', upsample_trilinear3d, '', opset)
    register_op('upsample_bicubic2d', upsample_bicubic2d, '', opset)
    register_op('new_full', new_full, '', opset)
    register_op('grid_sampler', grid_sampler, '', opset)
    register_op('cummax', cummax, '', opset)
    register_op('cummin', cummin, '', opset)
    register_op('roll', roll, '', opset)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .active_rotated_filter import active_rotated_filter
from .assign_score_withk import assign_score_withk
from .ball_query import ball_query
from .bbox import bbox_overlaps
from .border_align import BorderAlign, border_align
from .box_iou_rotated import box_iou_rotated
from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive
from .cc_attention import CrissCrossAttention
from .contour_expand import contour_expand
from .convex_iou import convex_giou, convex_iou
from .corner_pool import CornerPool
from .correlation import Correlation
from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d
from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack,
                              ModulatedDeformRoIPoolPack, deform_roi_pool)
from .deprecated_wrappers import Conv2d_deprecated as Conv2d
from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d
from .deprecated_wrappers import Linear_deprecated as Linear
from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d
from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss,
                         sigmoid_focal_loss, softmax_focal_loss)
from .furthest_point_sample import (furthest_point_sample,
                                    furthest_point_sample_with_dist)
from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu
from .gather_points import gather_points
from .group_points import GroupAll, QueryAndGroup, grouping_operation
from .info import (get_compiler_version, get_compiling_cuda_version,
                   get_onnxruntime_op_path)
from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev
from .knn import knn
from .masked_conv import MaskedConv2d, masked_conv2d
from .min_area_polygons import min_area_polygons
from .modulated_deform_conv import (ModulatedDeformConv2d,
                                    ModulatedDeformConv2dPack,
                                    modulated_deform_conv2d)
from .multi_scale_deform_attn import MultiScaleDeformableAttention
from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms
from .pixel_group import pixel_group
from .point_sample import (SimpleRoIAlign, point_sample,
                           rel_roi_point_to_rel_img_point)
from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu,
                              points_in_boxes_part)
from .points_in_polygons import points_in_polygons
from .points_sampler import PointsSampler
from .psa_mask import PSAMask
from .riroi_align_rotated import RiRoIAlignRotated, riroi_align_rotated
from .roi_align import RoIAlign, roi_align
from .roi_align_rotated import RoIAlignRotated, roi_align_rotated
from .roi_pool import RoIPool, roi_pool
from .roiaware_pool3d import RoIAwarePool3d
from .roipoint_pool3d import RoIPointPool3d
from .rotated_feature_align import rotated_feature_align
from .saconv import SAConv2d
from .scatter_points import DynamicScatter, dynamic_scatter
from .sync_bn import SyncBatchNorm
from .three_interpolate import three_interpolate
from .three_nn import three_nn
from .tin_shift import TINShift, tin_shift
from .upfirdn2d import upfirdn2d
from .voxelize import Voxelization, voxelization

__all__ = [
    'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe',
    'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack',
    'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack',
    'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss',
    'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss',
    'get_compiler_version', 'get_compiling_cuda_version',
    'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d',
    'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack',
    'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match',
    'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d',
    'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask',
    'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign',
    'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk',
    'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query',
    'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu',
    'rotated_feature_align', 'RiRoIAlignRotated', 'riroi_align_rotated',
    'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup',
    'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn',
    'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign',
    'border_align', 'gather_points', 'furthest_point_sample',
    'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation',
    'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization',
    'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d',
    'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all',
    'points_in_polygons', 'min_area_polygons', 'active_rotated_filter',
    'convex_iou', 'convex_giou'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/active_rotated_filter.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch.autograd import Function
from torch.autograd.function import once_differentiable

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext',
    ['active_rotated_filter_forward', 'active_rotated_filter_backward'])


class ActiveRotatedFilterFunction(Function):
    """Encoding the orientation information and generating orientation-
    sensitive features.

    The details are described in the paper `Align Deep Features for Oriented
    Object Detection  <https://arxiv.org/abs/2008.09397>_`.
    """

    @staticmethod
    def forward(ctx, input, indices):
        """
        Args:
            input (torch.Tensor): Input features with shape
                [num_output_planes, num_input_planes, num_orientations, H, W].
            indices (torch.Tensor): Indices with shape
                [num_orientations, H, W, num_rotations].

        Returns:
            torch.Tensor: Refined features with shape [num_output_planes *
            num_rotations, num_input_planes * num_orientations, H, W].
        """
        ctx.save_for_backward(input, indices)
        op, ip, o, h, w = input.size()
        o, h, w, r = indices.size()
        output = input.new_zeros((op * r, ip * o, h, w))
        ext_module.active_rotated_filter_forward(input, indices, output)

        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_out):
        """
        Args:
            grad_output (torch.Tensor): The gradiant of output features
                with shape [num_output_planes * num_rotations,
                num_input_planes * num_orientations, H, W].

        Returns:
            torch.Tensor: The gradiant of input features with shape
            [num_output_planes, num_input_planes, num_orientations, H, W].
        """
        input, indices = ctx.saved_tensors
        grad_in = torch.zeros_like(input)
        ext_module.active_rotated_filter_backward(grad_out, indices, grad_in)
        return grad_in, None


active_rotated_filter = ActiveRotatedFilterFunction.apply


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/assign_score_withk.py
================================================
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['assign_score_withk_forward', 'assign_score_withk_backward'])


class AssignScoreWithK(Function):
    r"""Perform weighted sum to generate output features according to scores.
    Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
    scene_seg/lib/paconv_lib/src/gpu>`_.

    This is a memory-efficient CUDA implementation of assign_scores operation,
    which first transform all point features with weight bank, then assemble
    neighbor features with ``knn_idx`` and perform weighted sum of ``scores``.

    See the `paper <https://arxiv.org/pdf/2103.14635.pdf>`_ appendix Sec. D for
        more detailed descriptions.

    Note:
        This implementation assumes using ``neighbor`` kernel input, which is
            (point_features - center_features, point_features).
        See https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/
        pointnet2/paconv.py#L128 for more details.
    """

    @staticmethod
    def forward(ctx,
                scores,
                point_features,
                center_features,
                knn_idx,
                aggregate='sum'):
        """
        Args:
            scores (torch.Tensor): (B, npoint, K, M), predicted scores to
                aggregate weight matrices in the weight bank.
                ``npoint`` is the number of sampled centers.
                ``K`` is the number of queried neighbors.
                ``M`` is the number of weight matrices in the weight bank.
            point_features (torch.Tensor): (B, N, M, out_dim)
                Pre-computed point features to be aggregated.
            center_features (torch.Tensor): (B, N, M, out_dim)
                Pre-computed center features to be aggregated.
            knn_idx (torch.Tensor): (B, npoint, K), index of sampled kNN.
                We assume the first idx in each row is the idx of the center.
            aggregate (str, optional): Aggregation method.
                Can be 'sum', 'avg' or 'max'. Defaults: 'sum'.

        Returns:
            torch.Tensor: (B, out_dim, npoint, K), the aggregated features.
        """
        agg = {'sum': 0, 'avg': 1, 'max': 2}

        B, N, M, out_dim = point_features.size()
        _, npoint, K, _ = scores.size()

        output = point_features.new_zeros((B, out_dim, npoint, K))
        ext_module.assign_score_withk_forward(
            point_features.contiguous(),
            center_features.contiguous(),
            scores.contiguous(),
            knn_idx.contiguous(),
            output,
            B=B,
            N0=N,
            N1=npoint,
            M=M,
            K=K,
            O=out_dim,
            aggregate=agg[aggregate])

        ctx.save_for_backward(output, point_features, center_features, scores,
                              knn_idx)
        ctx.agg = agg[aggregate]

        return output

    @staticmethod
    def backward(ctx, grad_out):
        """
        Args:
            grad_out (torch.Tensor): (B, out_dim, npoint, K)

        Returns:
            tuple[torch.Tensor]: A tuple contains five elements. The first one
            is the gradient of ``scores`` whose shape is (B, npoint, K, M). The
            second is the gradient of ``point_features`` whose shape is
            (B, N, M, out_dim). The third is the gradient of
            ``center_features`` with the shape of (B, N, M, out_dim). The last
            two are ``None``.
        """
        _, point_features, center_features, scores, knn_idx = ctx.saved_tensors

        agg = ctx.agg

        B, N, M, out_dim = point_features.size()
        _, npoint, K, _ = scores.size()

        grad_point_features = point_features.new_zeros(point_features.shape)
        grad_center_features = center_features.new_zeros(center_features.shape)
        grad_scores = scores.new_zeros(scores.shape)

        ext_module.assign_score_withk_backward(
            grad_out.contiguous(),
            point_features.contiguous(),
            center_features.contiguous(),
            scores.contiguous(),
            knn_idx.contiguous(),
            grad_point_features,
            grad_center_features,
            grad_scores,
            B=B,
            N0=N,
            N1=npoint,
            M=M,
            K=K,
            O=out_dim,
            aggregate=agg)

        return grad_scores, grad_point_features, \
            grad_center_features, None, None


assign_score_withk = AssignScoreWithK.apply


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/ball_query.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['ball_query_forward'])


class BallQuery(Function):
    """Find nearby points in spherical space."""

    @staticmethod
    def forward(ctx, min_radius: float, max_radius: float, sample_num: int,
                xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor:
        """
        Args:
            min_radius (float): minimum radius of the balls.
            max_radius (float): maximum radius of the balls.
            sample_num (int): maximum number of features in the balls.
            xyz (Tensor): (B, N, 3) xyz coordinates of the features.
            center_xyz (torch.Tensor): (B, npoint, 3) centers of the ball
                query.

        Returns:
            torch.Tensor: (B, npoint, nsample) tensor with the indices of the
            features that form the query balls.
        """
        assert center_xyz.is_contiguous()
        assert xyz.is_contiguous()
        assert min_radius < max_radius

        B, N, _ = xyz.size()
        npoint = center_xyz.size(1)
        idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int)

        ext_module.ball_query_forward(
            center_xyz,
            xyz,
            idx,
            b=B,
            n=N,
            m=npoint,
            min_radius=min_radius,
            max_radius=max_radius,
            nsample=sample_num)
        if torch.__version__ != 'parrots':
            ctx.mark_non_differentiable(idx)
        return idx

    @staticmethod
    def backward(ctx, a=None):
        return None, None, None, None


ball_query = BallQuery.apply


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/bbox.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps'])


def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0):
    """Calculate overlap between two set of bboxes.

    If ``aligned`` is ``False``, then calculate the ious between each bbox
    of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
    bboxes1 and bboxes2.

    Args:
        bboxes1 (torch.Tensor): shape (m, 4) in <x1, y1, x2, y2> format or
            empty.
        bboxes2 (torch.Tensor): shape (n, 4) in <x1, y1, x2, y2> format or
            empty. If aligned is ``True``, then m and n must be equal.
        mode (str): "iou" (intersection over union) or iof (intersection over
            foreground).

    Returns:
        torch.Tensor: Return the ious betweens boxes. If ``aligned`` is
        ``False``, the shape of ious is (m, n) else (m, 1).

    Example:
        >>> bboxes1 = torch.FloatTensor([
        >>>     [0, 0, 10, 10],
        >>>     [10, 10, 20, 20],
        >>>     [32, 32, 38, 42],
        >>> ])
        >>> bboxes2 = torch.FloatTensor([
        >>>     [0, 0, 10, 20],
        >>>     [0, 10, 10, 19],
        >>>     [10, 10, 20, 20],
        >>> ])
        >>> bbox_overlaps(bboxes1, bboxes2)
        tensor([[0.5000, 0.0000, 0.0000],
                [0.0000, 0.0000, 1.0000],
                [0.0000, 0.0000, 0.0000]])

    Example:
        >>> empty = torch.FloatTensor([])
        >>> nonempty = torch.FloatTensor([
        >>>     [0, 0, 10, 9],
        >>> ])
        >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
        >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
        >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
    """

    mode_dict = {'iou': 0, 'iof': 1}
    assert mode in mode_dict.keys()
    mode_flag = mode_dict[mode]
    # Either the boxes are empty or the length of boxes' last dimension is 4
    assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0)
    assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0)
    assert offset == 1 or offset == 0

    rows = bboxes1.size(0)
    cols = bboxes2.size(0)
    if aligned:
        assert rows == cols

    if rows * cols == 0:
        return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols)

    if aligned:
        ious = bboxes1.new_zeros(rows)
    else:
        ious = bboxes1.new_zeros((rows, cols))
    ext_module.bbox_overlaps(
        bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset)
    return ious


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/border_align.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
# modified from
# https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py

import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['border_align_forward', 'border_align_backward'])


class BorderAlignFunction(Function):

    @staticmethod
    def symbolic(g, input, boxes, pool_size):
        return g.op(
            'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size)

    @staticmethod
    def forward(ctx, input, boxes, pool_size):
        ctx.pool_size = pool_size
        ctx.input_shape = input.size()

        assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]'
        assert boxes.size(2) == 4, \
            'the last dimension of boxes must be (x1, y1, x2, y2)'
        assert input.size(1) % 4 == 0, \
            'the channel for input feature must be divisible by factor 4'

        # [B, C//4, H*W, 4]
        output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4)
        output = input.new_zeros(output_shape)
        # `argmax_idx` only used for backward
        argmax_idx = input.new_zeros(output_shape).to(torch.int)

        ext_module.border_align_forward(
            input, boxes, output, argmax_idx, pool_size=ctx.pool_size)

        ctx.save_for_backward(boxes, argmax_idx)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        boxes, argmax_idx = ctx.saved_tensors
        grad_input = grad_output.new_zeros(ctx.input_shape)
        # complex head architecture may cause grad_output uncontiguous
        grad_output = grad_output.contiguous()
        ext_module.border_align_backward(
            grad_output,
            boxes,
            argmax_idx,
            grad_input,
            pool_size=ctx.pool_size)
        return grad_input, None, None


border_align = BorderAlignFunction.apply


class BorderAlign(nn.Module):
    r"""Border align pooling layer.

    Applies border_align over the input feature based on predicted bboxes.
    The details were described in the paper
    `BorderDet: Border Feature for Dense Object Detection
    <https://arxiv.org/abs/2007.11056>`_.

    For each border line (e.g. top, left, bottom or right) of each box,
    border_align does the following:

    1. uniformly samples ``pool_size`` +1 positions on this line, involving
       the start and end points.
    2. the corresponding features on these points are computed by bilinear
       interpolation.
    3. max pooling over all the ``pool_size`` +1 positions are used for
       computing pooled feature.

    Args:
        pool_size (int): number of positions sampled over the boxes' borders
            (e.g. top, bottom, left, right).
    """

    def __init__(self, pool_size):
        super(BorderAlign, self).__init__()
        self.pool_size = pool_size

    def forward(self, input, boxes):
        """
        Args:
            input: Features with shape [N,4C,H,W]. Channels ranged in [0,C),
                [C,2C), [2C,3C), [3C,4C) represent the top, left, bottom,
                right features respectively.
            boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2).

        Returns:
            torch.Tensor: Pooled features with shape [N,C,H*W,4]. The order is
            (top,left,bottom,right) for the last dimension.
        """
        return border_align(input, boxes, self.pool_size)

    def __repr__(self):
        s = self.__class__.__name__
        s += f'(pool_size={self.pool_size})'
        return s


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/box_iou_rotated.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated'])


def box_iou_rotated(bboxes1,
                    bboxes2,
                    mode='iou',
                    aligned=False,
                    clockwise=True):
    """Return intersection-over-union (Jaccard index) of boxes.

    Both sets of boxes are expected to be in
    (x_center, y_center, width, height, angle) format.

    If ``aligned`` is ``False``, then calculate the ious between each bbox
    of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
    bboxes1 and bboxes2.

    .. note::
        The operator assumes:

        1) The positive direction along x axis is left -> right.

        2) The positive direction along y axis is top -> down.

        3) The w border is in parallel with x axis when angle = 0.

        However, there are 2 opposite definitions of the positive angular
        direction, clockwise (CW) and counter-clockwise (CCW). MMCV supports
        both definitions and uses CW by default.

        Please set ``clockwise=False`` if you are using the CCW definition.

        The coordinate system when ``clockwise`` is ``True`` (default)

            .. code-block:: none

                0-------------------> x (0 rad)
                |  A-------------B
                |  |             |
                |  |     box     h
                |  |   angle=0   |
                |  D------w------C
                v
                y (pi/2 rad)

            In such coordination system the rotation matrix is

            .. math::
                \\begin{pmatrix}
                \\cos\\alpha & -\\sin\\alpha \\\\
                \\sin\\alpha & \\cos\\alpha
                \\end{pmatrix}

            The coordinates of the corner point A can be calculated as:

            .. math::
                P_A=
                \\begin{pmatrix} x_A \\\\ y_A\\end{pmatrix}
                =
                \\begin{pmatrix} x_{center} \\\\ y_{center}\\end{pmatrix} +
                \\begin{pmatrix}\\cos\\alpha & -\\sin\\alpha \\\\
                \\sin\\alpha & \\cos\\alpha\\end{pmatrix}
                \\begin{pmatrix} -0.5w \\\\ -0.5h\\end{pmatrix} \\\\
                =
                \\begin{pmatrix} x_{center}-0.5w\\cos\\alpha+0.5h\\sin\\alpha
                \\\\
                y_{center}-0.5w\\sin\\alpha-0.5h\\cos\\alpha\\end{pmatrix}


        The coordinate system when ``clockwise`` is ``False``

            .. code-block:: none

                0-------------------> x (0 rad)
                |  A-------------B
                |  |             |
                |  |     box     h
                |  |   angle=0   |
                |  D------w------C
                v
                y (-pi/2 rad)

            In such coordination system the rotation matrix is

            .. math::
                \\begin{pmatrix}
                \\cos\\alpha & \\sin\\alpha \\\\
                -\\sin\\alpha & \\cos\\alpha
                \\end{pmatrix}

            The coordinates of the corner point A can be calculated as:

            .. math::
                P_A=
                \\begin{pmatrix} x_A \\\\ y_A\\end{pmatrix}
                =
                \\begin{pmatrix} x_{center} \\\\ y_{center}\\end{pmatrix} +
                \\begin{pmatrix}\\cos\\alpha & \\sin\\alpha \\\\
                -\\sin\\alpha & \\cos\\alpha\\end{pmatrix}
                \\begin{pmatrix} -0.5w \\\\ -0.5h\\end{pmatrix} \\\\
                =
                \\begin{pmatrix} x_{center}-0.5w\\cos\\alpha-0.5h\\sin\\alpha
                \\\\
                y_{center}+0.5w\\sin\\alpha-0.5h\\cos\\alpha\\end{pmatrix}

    Args:
        boxes1 (torch.Tensor): rotated bboxes 1. It has shape (N, 5),
            indicating (x, y, w, h, theta) for each row. Note that theta is in
            radian.
        boxes2 (torch.Tensor): rotated bboxes 2. It has shape (M, 5),
            indicating (x, y, w, h, theta) for each row. Note that theta is in
            radian.
        mode (str): "iou" (intersection over union) or iof (intersection over
            foreground).
        clockwise (bool): flag indicating whether the positive angular
            orientation is clockwise. default True.
            `New in version 1.4.3.`

    Returns:
        torch.Tensor: Return the ious betweens boxes. If ``aligned`` is
        ``False``, the shape of ious is (N, M) else (N,).
    """
    assert mode in ['iou', 'iof']
    mode_dict = {'iou': 0, 'iof': 1}
    mode_flag = mode_dict[mode]
    rows = bboxes1.size(0)
    cols = bboxes2.size(0)
    if aligned:
        ious = bboxes1.new_zeros(rows)
    else:
        ious = bboxes1.new_zeros((rows * cols))
    if not clockwise:
        flip_mat = bboxes1.new_ones(bboxes1.shape[-1])
        flip_mat[-1] = -1
        bboxes1 = bboxes1 * flip_mat
        bboxes2 = bboxes2 * flip_mat
    bboxes1 = bboxes1.contiguous()
    bboxes2 = bboxes2.contiguous()
    ext_module.box_iou_rotated(
        bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned)
    if not aligned:
        ious = ious.view(rows, cols)
    return ious


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/carafe.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
from torch.nn.modules.module import Module

from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init
from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', [
    'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward',
    'carafe_backward'
])


class CARAFENaiveFunction(Function):

    @staticmethod
    def symbolic(g, features, masks, kernel_size, group_size, scale_factor):
        return g.op(
            'mmcv::MMCVCARAFENaive',
            features,
            masks,
            kernel_size_i=kernel_size,
            group_size_i=group_size,
            scale_factor_f=scale_factor)

    @staticmethod
    def forward(ctx, features, masks, kernel_size, group_size, scale_factor):
        assert scale_factor >= 1
        assert masks.size(1) == kernel_size * kernel_size * group_size
        assert masks.size(-1) == features.size(-1) * scale_factor
        assert masks.size(-2) == features.size(-2) * scale_factor
        assert features.size(1) % group_size == 0
        assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1
        ctx.kernel_size = kernel_size
        ctx.group_size = group_size
        ctx.scale_factor = scale_factor
        ctx.feature_size = features.size()
        ctx.mask_size = masks.size()

        n, c, h, w = features.size()
        output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
        ext_module.carafe_naive_forward(
            features,
            masks,
            output,
            kernel_size=kernel_size,
            group_size=group_size,
            scale_factor=scale_factor)

        if features.requires_grad or masks.requires_grad:
            ctx.save_for_backward(features, masks)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        assert grad_output.is_cuda

        features, masks = ctx.saved_tensors
        kernel_size = ctx.kernel_size
        group_size = ctx.group_size
        scale_factor = ctx.scale_factor

        grad_input = torch.zeros_like(features)
        grad_masks = torch.zeros_like(masks)
        ext_module.carafe_naive_backward(
            grad_output.contiguous(),
            features,
            masks,
            grad_input,
            grad_masks,
            kernel_size=kernel_size,
            group_size=group_size,
            scale_factor=scale_factor)

        return grad_input, grad_masks, None, None, None


carafe_naive = CARAFENaiveFunction.apply


class CARAFENaive(Module):

    def __init__(self, kernel_size, group_size, scale_factor):
        super(CARAFENaive, self).__init__()

        assert isinstance(kernel_size, int) and isinstance(
            group_size, int) and isinstance(scale_factor, int)
        self.kernel_size = kernel_size
        self.group_size = group_size
        self.scale_factor = scale_factor

    def forward(self, features, masks):
        return carafe_naive(features, masks, self.kernel_size, self.group_size,
                            self.scale_factor)


class CARAFEFunction(Function):

    @staticmethod
    def symbolic(g, features, masks, kernel_size, group_size, scale_factor):
        return g.op(
            'mmcv::MMCVCARAFE',
            features,
            masks,
            kernel_size_i=kernel_size,
            group_size_i=group_size,
            scale_factor_f=scale_factor)

    @staticmethod
    def forward(ctx, features, masks, kernel_size, group_size, scale_factor):
        assert scale_factor >= 1
        assert masks.size(1) == kernel_size * kernel_size * group_size
        assert masks.size(-1) == features.size(-1) * scale_factor
        assert masks.size(-2) == features.size(-2) * scale_factor
        assert features.size(1) % group_size == 0
        assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1
        ctx.kernel_size = kernel_size
        ctx.group_size = group_size
        ctx.scale_factor = scale_factor
        ctx.feature_size = features.size()
        ctx.mask_size = masks.size()

        n, c, h, w = features.size()
        output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
        routput = features.new_zeros(output.size(), requires_grad=False)
        rfeatures = features.new_zeros(features.size(), requires_grad=False)
        rmasks = masks.new_zeros(masks.size(), requires_grad=False)
        ext_module.carafe_forward(
            features,
            masks,
            rfeatures,
            routput,
            rmasks,
            output,
            kernel_size=kernel_size,
            group_size=group_size,
            scale_factor=scale_factor)

        if features.requires_grad or masks.requires_grad:
            ctx.save_for_backward(features, masks, rfeatures)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        assert grad_output.is_cuda

        features, masks, rfeatures = ctx.saved_tensors
        kernel_size = ctx.kernel_size
        group_size = ctx.group_size
        scale_factor = ctx.scale_factor

        rgrad_output = torch.zeros_like(grad_output, requires_grad=False)
        rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False)
        rgrad_input = torch.zeros_like(features, requires_grad=False)
        rgrad_masks = torch.zeros_like(masks, requires_grad=False)
        grad_input = torch.zeros_like(features, requires_grad=False)
        grad_masks = torch.zeros_like(masks, requires_grad=False)
        ext_module.carafe_backward(
            grad_output.contiguous(),
            rfeatures,
            masks,
            rgrad_output,
            rgrad_input_hs,
            rgrad_input,
            rgrad_masks,
            grad_input,
            grad_masks,
            kernel_size=kernel_size,
            group_size=group_size,
            scale_factor=scale_factor)
        return grad_input, grad_masks, None, None, None


carafe = CARAFEFunction.apply


class CARAFE(Module):
    """ CARAFE: Content-Aware ReAssembly of FEatures

    Please refer to `CARAFE: Content-Aware ReAssembly of FEatures
    <https://arxiv.org/abs/1905.02188>`_ for more details.

    Args:
        kernel_size (int): reassemble kernel size
        group_size (int): reassemble group size
        scale_factor (int): upsample ratio

    Returns:
        upsampled feature map
    """

    def __init__(self, kernel_size, group_size, scale_factor):
        super(CARAFE, self).__init__()

        assert isinstance(kernel_size, int) and isinstance(
            group_size, int) and isinstance(scale_factor, int)
        self.kernel_size = kernel_size
        self.group_size = group_size
        self.scale_factor = scale_factor

    def forward(self, features, masks):
        return carafe(features, masks, self.kernel_size, self.group_size,
                      self.scale_factor)


@UPSAMPLE_LAYERS.register_module(name='carafe')
class CARAFEPack(nn.Module):
    """A unified package of CARAFE upsampler that contains: 1) channel
    compressor 2) content encoder 3) CARAFE op.

    Official implementation of ICCV 2019 paper
    `CARAFE: Content-Aware ReAssembly of FEatures
    <https://arxiv.org/abs/1905.02188>`_.

    Args:
        channels (int): input feature channels
        scale_factor (int): upsample ratio
        up_kernel (int): kernel size of CARAFE op
        up_group (int): group size of CARAFE op
        encoder_kernel (int): kernel size of content encoder
        encoder_dilation (int): dilation of content encoder
        compressed_channels (int): output channels of channels compressor

    Returns:
        upsampled feature map
    """

    def __init__(self,
                 channels,
                 scale_factor,
                 up_kernel=5,
                 up_group=1,
                 encoder_kernel=3,
                 encoder_dilation=1,
                 compressed_channels=64):
        super(CARAFEPack, self).__init__()
        self.channels = channels
        self.scale_factor = scale_factor
        self.up_kernel = up_kernel
        self.up_group = up_group
        self.encoder_kernel = encoder_kernel
        self.encoder_dilation = encoder_dilation
        self.compressed_channels = compressed_channels
        self.channel_compressor = nn.Conv2d(channels, self.compressed_channels,
                                            1)
        self.content_encoder = nn.Conv2d(
            self.compressed_channels,
            self.up_kernel * self.up_kernel * self.up_group *
            self.scale_factor * self.scale_factor,
            self.encoder_kernel,
            padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2),
            dilation=self.encoder_dilation,
            groups=1)
        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                xavier_init(m, distribution='uniform')
        normal_init(self.content_encoder, std=0.001)

    def kernel_normalizer(self, mask):
        mask = F.pixel_shuffle(mask, self.scale_factor)
        n, mask_c, h, w = mask.size()
        # use float division explicitly,
        # to void inconsistency while exporting to onnx
        mask_channel = int(mask_c / float(self.up_kernel**2))
        mask = mask.view(n, mask_channel, -1, h, w)

        mask = F.softmax(mask, dim=2, dtype=mask.dtype)
        mask = mask.view(n, mask_c, h, w).contiguous()

        return mask

    def feature_reassemble(self, x, mask):
        x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor)
        return x

    def forward(self, x):
        compressed_x = self.channel_compressor(x)
        mask = self.content_encoder(compressed_x)
        mask = self.kernel_normalizer(mask)

        x = self.feature_reassemble(x, mask)
        return x


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/cc_attention.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F

from mmcv.cnn import PLUGIN_LAYERS, Scale


def NEG_INF_DIAG(n, device):
    """Returns a diagonal matrix of size [n, n].

    The diagonal are all "-inf". This is for avoiding calculating the
    overlapped element in the Criss-Cross twice.
    """
    return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0)


@PLUGIN_LAYERS.register_module()
class CrissCrossAttention(nn.Module):
    """Criss-Cross Attention Module.

    .. note::
        Before v1.3.13, we use a CUDA op. Since v1.3.13, we switch
        to a pure PyTorch and equivalent implementation. For more
        details, please refer to https://github.com/open-mmlab/mmcv/pull/1201.

        Speed comparison for one forward pass

        - Input size: [2,512,97,97]
        - Device: 1 NVIDIA GeForce RTX 2080 Ti

        +-----------------------+---------------+------------+---------------+
        |                       |PyTorch version|CUDA version|Relative speed |
        +=======================+===============+============+===============+
        |with torch.no_grad()   |0.00554402 s   |0.0299619 s |5.4x           |
        +-----------------------+---------------+------------+---------------+
        |no with torch.no_grad()|0.00562803 s   |0.0301349 s |5.4x           |
        +-----------------------+---------------+------------+---------------+

    Args:
        in_channels (int): Channels of the input feature map.
    """

    def __init__(self, in_channels):
        super().__init__()
        self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
        self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
        self.value_conv = nn.Conv2d(in_channels, in_channels, 1)
        self.gamma = Scale(0.)
        self.in_channels = in_channels

    def forward(self, x):
        """forward function of Criss-Cross Attention.

        Args:
            x (torch.Tensor): Input feature with the shape of
                (batch_size, in_channels, height, width).

        Returns:
            torch.Tensor: Output of the layer, with the shape of
            (batch_size, in_channels, height, width)
        """
        B, C, H, W = x.size()
        query = self.query_conv(x)
        key = self.key_conv(x)
        value = self.value_conv(x)
        energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG(
            H, query.device)
        energy_H = energy_H.transpose(1, 2)
        energy_W = torch.einsum('bchw,bchj->bhwj', query, key)
        attn = F.softmax(
            torch.cat([energy_H, energy_W], dim=-1), dim=-1)  # [B,H,W,(H+W)]
        out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H])
        out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:])

        out = self.gamma(out) + x
        out = out.contiguous()

        return out

    def __repr__(self):
        s = self.__class__.__name__
        s += f'(in_channels={self.in_channels})'
        return s


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/contour_expand.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['contour_expand'])


def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
                   kernel_num):
    """Expand kernel contours so that foreground pixels are assigned into
    instances.

    Args:
        kernel_mask (np.array or torch.Tensor): The instance kernel mask with
            size hxw.
        internal_kernel_label (np.array or torch.Tensor): The instance internal
            kernel label with size hxw.
        min_kernel_area (int): The minimum kernel area.
        kernel_num (int): The instance kernel number.

    Returns:
        list: The instance index map with size hxw.
    """
    assert isinstance(kernel_mask, (torch.Tensor, np.ndarray))
    assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray))
    assert isinstance(min_kernel_area, int)
    assert isinstance(kernel_num, int)

    if isinstance(kernel_mask, np.ndarray):
        kernel_mask = torch.from_numpy(kernel_mask)
    if isinstance(internal_kernel_label, np.ndarray):
        internal_kernel_label = torch.from_numpy(internal_kernel_label)

    if torch.__version__ == 'parrots':
        if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0:
            label = []
        else:
            label = ext_module.contour_expand(
                kernel_mask,
                internal_kernel_label,
                min_kernel_area=min_kernel_area,
                kernel_num=kernel_num)
            label = label.tolist()
    else:
        label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
                                          min_kernel_area, kernel_num)
    return label


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/convex_iou.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['convex_iou', 'convex_giou'])


def convex_giou(pointsets, polygons):
    """Return generalized intersection-over-union (Jaccard index) between point
    sets and polygons.

    Args:
        pointsets (torch.Tensor): It has shape (N, 18),
            indicating (x1, y1, x2, y2, ..., x9, y9) for each row.
        polygons (torch.Tensor): It has shape (N, 8),
            indicating (x1, y1, x2, y2, x3, y3, x4, y4) for each row.

    Returns:
        tuple[torch.Tensor, torch.Tensor]: The first element is the gious
        between point sets and polygons with the shape (N,). The second
        element is the gradient of point sets with the shape (N, 18).
    """
    output = pointsets.new_zeros((pointsets.size(0), 19))
    ext_module.convex_giou(pointsets, polygons, output)
    convex_giou = output[:, -1]
    points_grad = output[:, 0:-1]
    return convex_giou, points_grad


def convex_iou(pointsets, polygons):
    """Return intersection-over-union (Jaccard index) between point sets and
    polygons.

    Args:
        pointsets (torch.Tensor): It has shape (N, 18),
            indicating (x1, y1, x2, y2, ..., x9, y9) for each row.
        polygons (torch.Tensor): It has shape (K, 8),
            indicating (x1, y1, x2, y2, x3, y3, x4, y4) for each row.

    Returns:
        torch.Tensor: Return the ious between point sets and polygons with the
        shape (N, K).
    """
    N, K = pointsets.size(0), polygons.size(0)
    ious = pointsets.new_zeros((N, K))
    ext_module.convex_iou(pointsets, polygons, ious)
    return ious


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/corner_pool.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch import nn
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', [
    'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward',
    'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward',
    'right_pool_forward', 'right_pool_backward'
])

_mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3}


class TopPoolFunction(Function):

    @staticmethod
    def symbolic(g, input):
        output = g.op(
            'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top']))
        return output

    @staticmethod
    def forward(ctx, input):
        output = ext_module.top_pool_forward(input)
        ctx.save_for_backward(input)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        output = ext_module.top_pool_backward(input, grad_output)
        return output


class BottomPoolFunction(Function):

    @staticmethod
    def symbolic(g, input):
        output = g.op(
            'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom']))
        return output

    @staticmethod
    def forward(ctx, input):
        output = ext_module.bottom_pool_forward(input)
        ctx.save_for_backward(input)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        output = ext_module.bottom_pool_backward(input, grad_output)
        return output


class LeftPoolFunction(Function):

    @staticmethod
    def symbolic(g, input):
        output = g.op(
            'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left']))
        return output

    @staticmethod
    def forward(ctx, input):
        output = ext_module.left_pool_forward(input)
        ctx.save_for_backward(input)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        output = ext_module.left_pool_backward(input, grad_output)
        return output


class RightPoolFunction(Function):

    @staticmethod
    def symbolic(g, input):
        output = g.op(
            'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right']))
        return output

    @staticmethod
    def forward(ctx, input):
        output = ext_module.right_pool_forward(input)
        ctx.save_for_backward(input)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        output = ext_module.right_pool_backward(input, grad_output)
        return output


class CornerPool(nn.Module):
    """Corner Pooling.

    Corner Pooling is a new type of pooling layer that helps a
    convolutional network better localize corners of bounding boxes.

    Please refer to `CornerNet: Detecting Objects as Paired Keypoints
    <https://arxiv.org/abs/1808.01244>`_ for more details.

    Code is modified from https://github.com/princeton-vl/CornerNet-Lite.

    Args:
        mode (str): Pooling orientation for the pooling layer

            - 'bottom': Bottom Pooling
            - 'left': Left Pooling
            - 'right': Right Pooling
            - 'top': Top Pooling

    Returns:
        Feature map after pooling.
    """

    pool_functions = {
        'bottom': BottomPoolFunction,
        'left': LeftPoolFunction,
        'right': RightPoolFunction,
        'top': TopPoolFunction,
    }

    cummax_dim_flip = {
        'bottom': (2, False),
        'left': (3, True),
        'right': (3, False),
        'top': (2, True),
    }

    def __init__(self, mode):
        super(CornerPool, self).__init__()
        assert mode in self.pool_functions
        self.mode = mode
        self.corner_pool = self.pool_functions[mode]

    def forward(self, x):
        if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0':
            if torch.onnx.is_in_onnx_export():
                assert torch.__version__ >= '1.7.0', \
                    'When `cummax` serves as an intermediate component whose '\
                    'outputs is used as inputs for another modules, it\'s '\
                    'expected that pytorch version must be >= 1.7.0, '\
                    'otherwise Error appears like: `RuntimeError: tuple '\
                    'appears in op that does not forward tuples, unsupported '\
                    'kind: prim::PythonOp`.'

            dim, flip = self.cummax_dim_flip[self.mode]
            if flip:
                x = x.flip(dim)
            pool_tensor, _ = torch.cummax(x, dim=dim)
            if flip:
                pool_tensor = pool_tensor.flip(dim)
            return pool_tensor
        else:
            return self.corner_pool.apply(x)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/correlation.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch import Tensor, nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['correlation_forward', 'correlation_backward'])


class CorrelationFunction(Function):

    @staticmethod
    def forward(ctx,
                input1,
                input2,
                kernel_size=1,
                max_displacement=1,
                stride=1,
                padding=1,
                dilation=1,
                dilation_patch=1):

        ctx.save_for_backward(input1, input2)

        kH, kW = ctx.kernel_size = _pair(kernel_size)
        patch_size = max_displacement * 2 + 1
        ctx.patch_size = patch_size
        dH, dW = ctx.stride = _pair(stride)
        padH, padW = ctx.padding = _pair(padding)
        dilationH, dilationW = ctx.dilation = _pair(dilation)
        dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair(
            dilation_patch)

        output_size = CorrelationFunction._output_size(ctx, input1)

        output = input1.new_zeros(output_size)

        ext_module.correlation_forward(
            input1,
            input2,
            output,
            kH=kH,
            kW=kW,
            patchH=patch_size,
            patchW=patch_size,
            padH=padH,
            padW=padW,
            dilationH=dilationH,
            dilationW=dilationW,
            dilation_patchH=dilation_patchH,
            dilation_patchW=dilation_patchW,
            dH=dH,
            dW=dW)

        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        input1, input2 = ctx.saved_tensors

        kH, kW = ctx.kernel_size
        patch_size = ctx.patch_size
        padH, padW = ctx.padding
        dilationH, dilationW = ctx.dilation
        dilation_patchH, dilation_patchW = ctx.dilation_patch
        dH, dW = ctx.stride
        grad_input1 = torch.zeros_like(input1)
        grad_input2 = torch.zeros_like(input2)

        ext_module.correlation_backward(
            grad_output,
            input1,
            input2,
            grad_input1,
            grad_input2,
            kH=kH,
            kW=kW,
            patchH=patch_size,
            patchW=patch_size,
            padH=padH,
            padW=padW,
            dilationH=dilationH,
            dilationW=dilationW,
            dilation_patchH=dilation_patchH,
            dilation_patchW=dilation_patchW,
            dH=dH,
            dW=dW)
        return grad_input1, grad_input2, None, None, None, None, None, None

    @staticmethod
    def _output_size(ctx, input1):
        iH, iW = input1.size(2), input1.size(3)
        batch_size = input1.size(0)
        kH, kW = ctx.kernel_size
        patch_size = ctx.patch_size
        dH, dW = ctx.stride
        padH, padW = ctx.padding
        dilationH, dilationW = ctx.dilation
        dilatedKH = (kH - 1) * dilationH + 1
        dilatedKW = (kW - 1) * dilationW + 1

        oH = int((iH + 2 * padH - dilatedKH) / dH + 1)
        oW = int((iW + 2 * padW - dilatedKW) / dW + 1)

        output_size = (batch_size, patch_size, patch_size, oH, oW)
        return output_size


class Correlation(nn.Module):
    r"""Correlation operator

    This correlation operator works for optical flow correlation computation.

    There are two batched tensors with shape :math:`(N, C, H, W)`,
    and the correlation output's shape is :math:`(N, max\_displacement \times
    2 + 1, max\_displacement * 2 + 1, H_{out}, W_{out})`

    where

    .. math::
        H_{out} = \left\lfloor\frac{H_{in}  + 2 \times padding -
            dilation \times (kernel\_size - 1) - 1}
            {stride} + 1\right\rfloor

    .. math::
        W_{out} = \left\lfloor\frac{W_{in}  + 2 \times padding - dilation
            \times (kernel\_size - 1) - 1}
            {stride} + 1\right\rfloor

    the correlation item :math:`(N_i, dy, dx)` is formed by taking the sliding
    window convolution between input1 and shifted input2,

    .. math::
        Corr(N_i, dx, dy) =
        \sum_{c=0}^{C-1}
        input1(N_i, c) \star
        \mathcal{S}(input2(N_i, c), dy, dx)

    where :math:`\star` is the valid 2d sliding window convolution operator,
    and :math:`\mathcal{S}` means shifting the input features (auto-complete
    zero marginal), and :math:`dx, dy` are shifting distance, :math:`dx, dy \in
    [-max\_displacement \times dilation\_patch, max\_displacement \times
    dilation\_patch]`.

    Args:
        kernel_size (int): The size of sliding window i.e. local neighborhood
            representing the center points and involved in correlation
            computation. Defaults to 1.
        max_displacement (int): The radius for computing correlation volume,
            but the actual working space can be dilated by dilation_patch.
            Defaults to 1.
        stride (int): The stride of the sliding blocks in the input spatial
            dimensions. Defaults to 1.
        padding (int): Zero padding added to all four sides of the input1.
            Defaults to 0.
        dilation (int): The spacing of local neighborhood that will involved
            in correlation. Defaults to 1.
        dilation_patch (int): The spacing between position need to compute
            correlation.  Defaults to 1.
    """

    def __init__(self,
                 kernel_size: int = 1,
                 max_displacement: int = 1,
                 stride: int = 1,
                 padding: int = 0,
                 dilation: int = 1,
                 dilation_patch: int = 1) -> None:
        super().__init__()
        self.kernel_size = kernel_size
        self.max_displacement = max_displacement
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.dilation_patch = dilation_patch

    def forward(self, input1: Tensor, input2: Tensor) -> Tensor:
        return CorrelationFunction.apply(input1, input2, self.kernel_size,
                                         self.max_displacement, self.stride,
                                         self.padding, self.dilation,
                                         self.dilation_patch)

    def __repr__(self) -> str:
        s = self.__class__.__name__
        s += f'(kernel_size={self.kernel_size}, '
        s += f'max_displacement={self.max_displacement}, '
        s += f'stride={self.stride}, '
        s += f'padding={self.padding}, '
        s += f'dilation={self.dilation}, '
        s += f'dilation_patch={self.dilation_patch})'
        return s


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/README.md
================================================
# Code Structure of CUDA operators

This folder contains all non-python code for MMCV custom ops. Please follow the same architecture if you want to add new ops.

## Directories Tree

```folder
.
├── common
│   ├── box_iou_rotated_utils.hpp
│   ├── parrots_cpp_helper.hpp
│   ├── parrots_cuda_helper.hpp
│   ├── pytorch_cpp_helper.hpp
│   ├── pytorch_cuda_helper.hpp
│   ├── pytorch_device_registry.hpp
│   └── cuda
│       ├── common_cuda_helper.hpp
│       ├── parrots_cudawarpfunction.cuh
│       ├── ...
│       └── ops_cuda_kernel.cuh
├── onnxruntime
│   ├── onnxruntime_register.h
│   ├── onnxruntime_session_options_config_keys.h
│   ├── ort_mmcv_utils.h
│   ├── ...
│   ├── onnx_ops.h
│   └── cpu
│       ├── onnxruntime_register.cpp
│       ├── ...
│       └── onnx_ops_impl.cpp
├── parrots
│   ├── ...
│   ├── ops.cpp
│   ├── ops_parrots.cpp
│   └── ops_pytorch.h
├── pytorch
│   ├── info.cpp
│   ├── pybind.cpp
│   ├── ...
│   ├── ops.cpp
│   ├── cuda
│   │   ├── ...
│   │   └── ops_cuda.cu
│   └── cpu
│       ├── ...
│       └── ops.cpp
└── tensorrt
    ├── trt_cuda_helper.cuh
    ├── trt_plugin_helper.hpp
    ├── trt_plugin.hpp
    ├── trt_serialize.hpp
    ├── ...
    ├── trt_ops.hpp
    └── plugins
        ├── trt_cuda_helper.cu
        ├── trt_plugin.cpp
        ├── ...
        ├── trt_ops.cpp
        └── trt_ops_kernel.cu
```

## Components

- `common`: This directory contains all tools and shared codes.
  - `cuda`: The cuda kernels which can be shared by all backends. **HIP** kernel is also here since they have similar syntax.
- `onnxruntime`: **ONNX Runtime** support for custom ops.
  - `cpu`: CPU implementation of supported ops.
- `parrots`: **Parrots** is a deep learning frame for model training and inference. Parrots custom ops are placed in this directory.
- `pytorch`: **PyTorch** custom ops are supported by binding C++ to Python with **pybind11**. The ops implementation and binding codes are placed in this directory.
  - `cuda`: This directory contains cuda kernel launchers, which feed memory pointers of tensor to the cuda kernel in `common/cuda`. The launchers provide c++ interface of cuda implementation of corresponding custom ops.
  - `cpu`: This directory contain cpu implementations of corresponding custom ops.
- `tensorrt`: **TensorRT** support for custom ops.
  - `plugins`: This directory contains the implementation of the supported custom ops. Some ops might also use shared cuda kernel in `common/cuda`.

## How to add new PyTorch ops?

1. (Optional) Add shared kernel in `common` to support special hardware platform.

    ```c++
    // src/common/cuda/new_ops_cuda_kernel.cuh

    template <typename T>
    __global__ void new_ops_forward_cuda_kernel(const T* input, T* output, ...) {
        // forward here
    }

    ```

    Add cuda kernel launcher in `pytorch/cuda`.

    ```c++
    // src/pytorch/cuda
    #include <new_ops_cuda_kernel.cuh>

    void NewOpsForwardCUDAKernelLauncher(Tensor input, Tensor output, ...){
        // initialize
        at::cuda::CUDAGuard device_guard(input.device());
        cudaStream_t stream = at::cuda::getCurrentCUDAStream();
        ...
        AT_DISPATCH_FLOATING_TYPES_AND_HALF(
            input.scalar_type(), "new_ops_forward_cuda_kernel", ([&] {
                new_ops_forward_cuda_kernel<scalar_t>
                    <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                        input.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),...);
            }));
        AT_CUDA_CHECK(cudaGetLastError());
    }
    ```

2. Register implementation for different devices.

    ```c++
    // src/pytorch/cuda/cudabind.cpp
    ...

    Tensor new_ops_forward_cuda(Tensor input, Tensor output, ...){
        // implement cuda forward here
        // use `NewOpsForwardCUDAKernelLauncher` here
    }
    // declare interface here.
    Tensor new_ops_forward_impl(Tensor input, Tensor output, ...);
    // register the implementation for given device (CUDA here).
    REGISTER_DEVICE_IMPL(new_ops_forward_impl, CUDA, new_ops_forward_cuda);
    ```

3. Add ops implementation in `pytorch` directory. Select different implementations according to device type.

    ```c++
    // src/pytorch/new_ops.cpp
    Tensor new_ops_forward_impl(Tensor input, Tensor output, ...){
        // dispatch the implementation according to the device type of input.
        DISPATCH_DEVICE_IMPL(new_ops_forward_impl, input, output, ...);
    }
    ...

    Tensor new_ops_forward(Tensor input, Tensor output, ...){
        return new_ops_forward_impl(input, output, ...);
    }
    ```

4. Binding the implementation in `pytorch/pybind.cpp`

    ```c++
    // src/pytorch/pybind.cpp

    ...

    Tensor new_ops_forward(Tensor input, Tensor output, ...);

    ...

    // bind with pybind11
    m.def("new_ops_forward", &new_ops_forward, "new_ops_forward",
            py::arg("input"), py::arg("output"), ...);

    ...

    ```

5. Build MMCV again. Enjoy new ops in python

    ```python
    from ..utils import ext_loader
    ext_module = ext_loader.load_ext('_ext', ['new_ops_forward'])

    ...

    ext_module.new_ops_forward(input, output, ...)

    ```


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/box_iou_rotated_utils.hpp
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h
#pragma once
#include <cassert>
#include <cmath>

#ifdef __CUDACC__
// Designates functions callable from the host (CPU) and the device (GPU)
#define HOST_DEVICE __host__ __device__
#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__
#else
#include <algorithm>
#define HOST_DEVICE
#define HOST_DEVICE_INLINE HOST_DEVICE inline
#endif

namespace {

template <typename T>
struct RotatedBox {
  T x_ctr, y_ctr, w, h, a;
};

template <typename T>
struct Point {
  T x, y;
  HOST_DEVICE_INLINE Point(const T& px = 0, const T& py = 0) : x(px), y(py) {}
  HOST_DEVICE_INLINE Point operator+(const Point& p) const {
    return Point(x + p.x, y + p.y);
  }
  HOST_DEVICE_INLINE Point& operator+=(const Point& p) {
    x += p.x;
    y += p.y;
    return *this;
  }
  HOST_DEVICE_INLINE Point operator-(const Point& p) const {
    return Point(x - p.x, y - p.y);
  }
  HOST_DEVICE_INLINE Point operator*(const T coeff) const {
    return Point(x * coeff, y * coeff);
  }
};

template <typename T>
HOST_DEVICE_INLINE T dot_2d(const Point<T>& A, const Point<T>& B) {
  return A.x * B.x + A.y * B.y;
}

template <typename T>
HOST_DEVICE_INLINE T cross_2d(const Point<T>& A, const Point<T>& B) {
  return A.x * B.y - B.x * A.y;
}

template <typename T>
HOST_DEVICE_INLINE void get_rotated_vertices(const RotatedBox<T>& box,
                                             Point<T> (&pts)[4]) {
  // M_PI / 180. == 0.01745329251
  // double theta = box.a * 0.01745329251;
  // MODIFIED
  double theta = box.a;
  T cosTheta2 = (T)cos(theta) * 0.5f;
  T sinTheta2 = (T)sin(theta) * 0.5f;

  // y: top --> down; x: left --> right
  pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w;
  pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w;
  pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w;
  pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w;
  pts[2].x = 2 * box.x_ctr - pts[0].x;
  pts[2].y = 2 * box.y_ctr - pts[0].y;
  pts[3].x = 2 * box.x_ctr - pts[1].x;
  pts[3].y = 2 * box.y_ctr - pts[1].y;
}

template <typename T>
HOST_DEVICE_INLINE int get_intersection_points(const Point<T> (&pts1)[4],
                                               const Point<T> (&pts2)[4],
                                               Point<T> (&intersections)[24]) {
  // Line vector
  // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
  Point<T> vec1[4], vec2[4];
  for (int i = 0; i < 4; i++) {
    vec1[i] = pts1[(i + 1) % 4] - pts1[i];
    vec2[i] = pts2[(i + 1) % 4] - pts2[i];
  }

  // Line test - test all line combos for intersection
  int num = 0;  // number of intersections
  for (int i = 0; i < 4; i++) {
    for (int j = 0; j < 4; j++) {
      // Solve for 2x2 Ax=b
      T det = cross_2d<T>(vec2[j], vec1[i]);

      // This takes care of parallel lines
      if (fabs(det) <= 1e-14) {
        continue;
      }

      auto vec12 = pts2[j] - pts1[i];

      T t1 = cross_2d<T>(vec2[j], vec12) / det;
      T t2 = cross_2d<T>(vec1[i], vec12) / det;

      if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
        intersections[num++] = pts1[i] + vec1[i] * t1;
      }
    }
  }

  // Check for vertices of rect1 inside rect2
  {
    const auto& AB = vec2[0];
    const auto& DA = vec2[3];
    auto ABdotAB = dot_2d<T>(AB, AB);
    auto ADdotAD = dot_2d<T>(DA, DA);
    for (int i = 0; i < 4; i++) {
      // assume ABCD is the rectangle, and P is the point to be judged
      // P is inside ABCD iff. P's projection on AB lies within AB
      // and P's projection on AD lies within AD

      auto AP = pts1[i] - pts2[0];

      auto APdotAB = dot_2d<T>(AP, AB);
      auto APdotAD = -dot_2d<T>(AP, DA);

      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
          (APdotAD <= ADdotAD)) {
        intersections[num++] = pts1[i];
      }
    }
  }

  // Reverse the check - check for vertices of rect2 inside rect1
  {
    const auto& AB = vec1[0];
    const auto& DA = vec1[3];
    auto ABdotAB = dot_2d<T>(AB, AB);
    auto ADdotAD = dot_2d<T>(DA, DA);
    for (int i = 0; i < 4; i++) {
      auto AP = pts2[i] - pts1[0];

      auto APdotAB = dot_2d<T>(AP, AB);
      auto APdotAD = -dot_2d<T>(AP, DA);

      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
          (APdotAD <= ADdotAD)) {
        intersections[num++] = pts2[i];
      }
    }
  }

  return num;
}

template <typename T>
HOST_DEVICE_INLINE int convex_hull_graham(const Point<T> (&p)[24],
                                          const int& num_in, Point<T> (&q)[24],
                                          bool shift_to_zero = false) {
  assert(num_in >= 2);

  // Step 1:
  // Find point with minimum y
  // if more than 1 points have the same minimum y,
  // pick the one with the minimum x.
  int t = 0;
  for (int i = 1; i < num_in; i++) {
    if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
      t = i;
    }
  }
  auto& start = p[t];  // starting point

  // Step 2:
  // Subtract starting point from every points (for sorting in the next step)
  for (int i = 0; i < num_in; i++) {
    q[i] = p[i] - start;
  }

  // Swap the starting point to position 0
  auto tmp = q[0];
  q[0] = q[t];
  q[t] = tmp;

  // Step 3:
  // Sort point 1 ~ num_in according to their relative cross-product values
  // (essentially sorting according to angles)
  // If the angles are the same, sort according to their distance to origin
  T dist[24];
  for (int i = 0; i < num_in; i++) {
    dist[i] = dot_2d<T>(q[i], q[i]);
  }

#ifdef __CUDACC__
  // CUDA version
  // In the future, we can potentially use thrust
  // for sorting here to improve speed (though not guaranteed)
  for (int i = 1; i < num_in - 1; i++) {
    for (int j = i + 1; j < num_in; j++) {
      T crossProduct = cross_2d<T>(q[i], q[j]);
      if ((crossProduct < -1e-6) ||
          (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) {
        auto q_tmp = q[i];
        q[i] = q[j];
        q[j] = q_tmp;
        auto dist_tmp = dist[i];
        dist[i] = dist[j];
        dist[j] = dist_tmp;
      }
    }
  }
#else
  // CPU version
  std::sort(q + 1, q + num_in,
            [](const Point<T>& A, const Point<T>& B) -> bool {
              T temp = cross_2d<T>(A, B);
              if (fabs(temp) < 1e-6) {
                return dot_2d<T>(A, A) < dot_2d<T>(B, B);
              } else {
                return temp > 0;
              }
            });
  // compute distance to origin after sort, since the points are now different.
  for (int i = 0; i < num_in; i++) {
    dist[i] = dot_2d<T>(q[i], q[i]);
  }
#endif

  // Step 4:
  // Make sure there are at least 2 points (that don't overlap with each other)
  // in the stack
  int k;  // index of the non-overlapped second point
  for (k = 1; k < num_in; k++) {
    if (dist[k] > 1e-8) {
      break;
    }
  }
  if (k == num_in) {
    // We reach the end, which means the convex hull is just one point
    q[0] = p[t];
    return 1;
  }
  q[1] = q[k];
  int m = 2;  // 2 points in the stack
  // Step 5:
  // Finally we can start the scanning process.
  // When a non-convex relationship between the 3 points is found
  // (either concave shape or duplicated points),
  // we pop the previous point from the stack
  // until the 3-point relationship is convex again, or
  // until the stack only contains two points
  for (int i = k + 1; i < num_in; i++) {
    while (m > 1 && cross_2d<T>(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) {
      m--;
    }
    q[m++] = q[i];
  }

  // Step 6 (Optional):
  // In general sense we need the original coordinates, so we
  // need to shift the points back (reverting Step 2)
  // But if we're only interested in getting the area/perimeter of the shape
  // We can simply return.
  if (!shift_to_zero) {
    for (int i = 0; i < m; i++) {
      q[i] += start;
    }
  }

  return m;
}

template <typename T>
HOST_DEVICE_INLINE T polygon_area(const Point<T> (&q)[24], const int& m) {
  if (m <= 2) {
    return 0;
  }

  T area = 0;
  for (int i = 1; i < m - 1; i++) {
    area += fabs(cross_2d<T>(q[i] - q[0], q[i + 1] - q[0]));
  }

  return area / 2.0;
}

template <typename T>
HOST_DEVICE_INLINE T rotated_boxes_intersection(const RotatedBox<T>& box1,
                                                const RotatedBox<T>& box2) {
  // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
  // from rotated_rect_intersection_pts
  Point<T> intersectPts[24], orderedPts[24];

  Point<T> pts1[4];
  Point<T> pts2[4];
  get_rotated_vertices<T>(box1, pts1);
  get_rotated_vertices<T>(box2, pts2);

  int num = get_intersection_points<T>(pts1, pts2, intersectPts);

  if (num <= 2) {
    return 0.0;
  }

  // Convex Hull to order the intersection points in clockwise order and find
  // the contour area.
  int num_convex = convex_hull_graham<T>(intersectPts, num, orderedPts, true);
  return polygon_area<T>(orderedPts, num_convex);
}

}  // namespace

template <typename T>
HOST_DEVICE_INLINE T single_box_iou_rotated(T const* const box1_raw,
                                            T const* const box2_raw,
                                            const int mode_flag) {
  // shift center to the middle point to achieve higher precision in result
  RotatedBox<T> box1, box2;
  auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0;
  auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0;
  box1.x_ctr = box1_raw[0] - center_shift_x;
  box1.y_ctr = box1_raw[1] - center_shift_y;
  box1.w = box1_raw[2];
  box1.h = box1_raw[3];
  box1.a = box1_raw[4];
  box2.x_ctr = box2_raw[0] - center_shift_x;
  box2.y_ctr = box2_raw[1] - center_shift_y;
  box2.w = box2_raw[2];
  box2.h = box2_raw[3];
  box2.a = box2_raw[4];

  const T area1 = box1.w * box1.h;
  const T area2 = box2.w * box2.h;
  if (area1 < 1e-14 || area2 < 1e-14) {
    return 0.f;
  }

  const T intersection = rotated_boxes_intersection<T>(box1, box2);
  T baseS = 1.0;
  if (mode_flag == 0) {
    baseS = (area1 + area2 - intersection);
  } else if (mode_flag == 1) {
    baseS = area1;
  }
  const T iou = intersection / baseS;
  return iou;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/cuda/ActiveRotatingFilter_cuda.cu
#ifndef ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH
#define ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename scalar_t>
__global__ void active_rotated_filter_forward_cuda_kernel(
    const int nthreads, const scalar_t* weight_data, const int* indices_data,
    const int num_input_planes, const int num_output_planes,
    const int num_orientations, const int num_rotations, const int nEntry,
    scalar_t* output_data) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int l = index % nEntry;
    int j = (index / nEntry) % num_input_planes;
    int i = index / nEntry / num_input_planes;
    int k;
    scalar_t val = *(weight_data + index);
    for (k = 0; k < num_rotations; k++) {
      int idx = (int)(*(indices_data + l * num_rotations + k)) - 1;
      scalar_t* target = output_data +
                         i * (num_rotations * num_input_planes * nEntry) +
                         k * (num_input_planes * nEntry) + j * (nEntry) + idx;
      *target = val;
    }
  }
}

template <typename scalar_t>
__global__ void active_rotated_filter_backward_cuda_kernel(
    const int nthreads, const scalar_t* gradWeight_data,
    const int* indices_data, const int num_input_planes,
    const int num_output_planes, const int num_orientations,
    const int num_rotations, const int nEntry, scalar_t* weight_data) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int l = index % nEntry;
    int j = (index / nEntry) % num_input_planes;
    int i = index / nEntry / num_input_planes;
    int k;
    scalar_t* val = weight_data + index;
    *val = 0;
    scalar_t tmp = 0;
    for (k = 0; k < num_rotations; k++) {
      int idx = (int)(*(indices_data + l * num_rotations + k)) - 1;
      scalar_t target =
          *(gradWeight_data + i * (num_rotations * num_input_planes * nEntry) +
            k * (num_input_planes * nEntry) + j * (nEntry) + idx);
      tmp = tmp + target;
    }
    *val = tmp;
  }
}
#endif  // ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH
#define ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

// input: points(B,N0,M,O), centers(B,N0,M,O), scores(B,N1,K,M), knn_idx(B,N1,K)
// output: fout(B,O,N)
// algo: fout(b,i,k,j) = s(b,i,k,m)*p(b,c(i),k,m,j) =  s(b,i,k,m)*p(b,i(k),m,j)
//       i(k) = idx(b,i,k)
//      sum: fout(b,i,j) = fout(b,i,j) + s(b,i,k,m)*p(b,i,k,m,j)
//      avg: fout(b,i,j) = sum(fout(b,i,k,j)) / k
//      max: fout(b,i,j) = max(fout(b,i,k,j), sum(s(b,i,k,m)*p(b,i,k,m,j)))

template <typename T>
__global__ void assign_score_withk_forward_cuda_kernel(
    const int B, const int N0, const int N1, const int M, const int K,
    const int O, const int aggregate, const T* points, const T* centers,
    const T* scores, const int64_t* knn_idx, T* output) {
  // ----- parallel loop for B, N1, K and O ---------
  CUDA_1D_KERNEL_LOOP(i, B * O * N1 * K) {
    // ------- loop for M ----------
    const int b = (int)(i / (O * N1 * K));
    const int o = (int)(i % (O * N1 * K) / (N1 * K));
    const int n = (int)(i % (N1 * K) / K);
    const int k = (int)(i % K);
    const int cn = (int)knn_idx[b * K * N1 + n * K +
                                0];  // The first neighbor is the center point
    const int kn = (int)knn_idx[b * K * N1 + n * K + k];
    if (kn >= N0 ||
        kn < 0) {  // if index overflows, it is out of the neighborhood range
      return;
    }
    assert(b < B);
    assert(kn < N0);
    assert(cn < N0);
    assert(o < O);
    assert(n < N1);
    const int out_idx = b * N1 * O * K + o * N1 * K + n * K + k;
    T val = output[out_idx];
    for (int m = 0; m < M; m++) {
      val += points[b * N0 * M * O + kn * M * O + m * O + o] *
                 scores[b * N1 * K * M + n * K * M + k * M + m] -
             centers[b * N0 * M * O + cn * M * O + m * O + o] *
                 scores[b * N1 * K * M + n * K * M + k * M + m];
    }
    output[out_idx] = val;
  }
}

template <typename T>
__global__ void assign_score_withk_points_backward_cuda_kernel(
    const int B, const int N0, const int N, const int M, const int K,
    const int O, const int aggregate, const T* grad_out, const T* scores,
    const int64_t* knn_idx, T* grad_points, T* grad_centers) {
  // ----- parallel loop for B, M, O ---------
  CUDA_1D_KERNEL_LOOP(i, B * M * O) {
    int b = (int)(i / (M * O));
    int m = (int)(i % (M * O) / O);
    int o = (int)(i % O);

    // ----- loop for N,K ---------
    for (int n = 0; n < N; n++) {
      for (int k = 0; k < K; k++) {
        int kn = knn_idx[b * N * K + n * K + k];
        int cn = knn_idx[b * N * K + n * K + 0];
        if (kn >= N0 || kn < 0) {  // if index overflows, it is out of the
                                   // neighborhood range
          continue;
        }
        atomicAdd(grad_points + b * N0 * M * O + kn * M * O + m * O + o,
                  scores[b * N * K * M + n * K * M + k * M + m] *
                      grad_out[b * O * N * K + o * N * K + n * K + k]);
        atomicAdd(grad_centers + b * N0 * M * O + cn * M * O + m * O + o,
                  -scores[b * N * K * M + n * K * M + k * M + m] *
                      grad_out[b * O * N * K + o * N * K + n * K + k]);
      }
    }
  }
}

template <typename T>
__global__ void assign_score_withk_scores_backward_cuda_kernel(
    const int B, const int N0, const int N, const int M, const int K,
    const int O, const int aggregate, const T* grad_out, const T* points,
    const T* centers, const int64_t* knn_idx, T* grad_scores) {
  // ----- parallel loop for B, N, K, M ---------
  CUDA_1D_KERNEL_LOOP(i, B * N * K * M) {
    const int b = (int)(i / (N * M * K));
    const int n = (int)(i % (N * M * K) / M / K);
    const int k = (int)(i % (M * K) / M);
    const int m = (int)(i % M);
    const int cn = knn_idx[b * N * K + n * K + 0];
    const int kn = knn_idx[b * N * K + n * K + k];
    if (kn >= N0 ||
        kn < 0) {  // if index overflows, it is out of the neighborhood range
      return;
    }

    // -------------- loop for O ------------------------
    const int out_idx = b * N * K * M + n * K * M + k * M + m;
    T val = grad_scores[out_idx];
    for (int o = 0; o < O; o++) {
      val += (points[b * N0 * M * O + kn * M * O + m * O + o] -
              centers[b * N0 * M * O + cn * M * O + m * O + o]) *
             grad_out[b * O * N * K + o * N * K + n * K + k];
    }
    grad_scores[out_idx] = val;
  }
}

#endif  // ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query_gpu.cu
#ifndef BALL_QUERY_CUDA_KERNEL_CUH
#define BALL_QUERY_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void ball_query_forward_cuda_kernel(int b, int n, int m,
                                               float min_radius,
                                               float max_radius, int nsample,
                                               const T* new_xyz, const T* xyz,
                                               int* idx) {
  // new_xyz: (B, M, 3)
  // xyz: (B, N, 3)
  // output:
  //      idx: (B, M, nsample)
  int bs_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(pt_idx, m) {
    if (bs_idx >= b) return;

    new_xyz += bs_idx * m * 3 + pt_idx * 3;
    xyz += bs_idx * n * 3;
    idx += bs_idx * m * nsample + pt_idx * nsample;

    float max_radius2 = max_radius * max_radius;
    float min_radius2 = min_radius * min_radius;
    T new_x = new_xyz[0];
    T new_y = new_xyz[1];
    T new_z = new_xyz[2];

    int cnt = 0;
    for (int k = 0; k < n; ++k) {
      T x = xyz[k * 3 + 0];
      T y = xyz[k * 3 + 1];
      T z = xyz[k * 3 + 2];
      T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
             (new_z - z) * (new_z - z);
      if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) {
        if (cnt == 0) {
          for (int l = 0; l < nsample; ++l) {
            idx[l] = k;
          }
        }
        idx[cnt] = k;
        ++cnt;
        if (cnt >= nsample) break;
      }
    }
  }
}

#endif  // BALL_QUERY_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef BBOX_OVERLAPS_CUDA_KERNEL_CUH
#define BBOX_OVERLAPS_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2,
                                          T* ious, const int num_bbox1,
                                          const int num_bbox2, const int mode,
                                          const bool aligned,
                                          const int offset) {
  if (aligned) {
    CUDA_1D_KERNEL_LOOP(index, num_bbox1) {
      int b1 = index;
      int b2 = index;

      int base1 = b1 * 4;
      T b1_x1 = bbox1[base1];
      T b1_y1 = bbox1[base1 + 1];
      T b1_x2 = bbox1[base1 + 2];
      T b1_y2 = bbox1[base1 + 3];
      T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset);

      int base2 = b2 * 4;
      T b2_x1 = bbox2[base2];
      T b2_y1 = bbox2[base2 + 1];
      T b2_x2 = bbox2[base2 + 2];
      T b2_y2 = bbox2[base2 + 3];
      T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset);

      T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2);
      T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
      T width = fmaxf(right - left + offset, 0.f);
      T height = fmaxf(bottom - top + offset, 0.f);
      T interS = width * height;
      T baseS = 1.0;
      if (mode == 0) {
        baseS = fmaxf(b1_area + b2_area - interS, T(offset));
      } else if (mode == 1) {
        baseS = fmaxf(b1_area, T(offset));
      }
      ious[index] = interS / baseS;
    }
  } else {
    CUDA_1D_KERNEL_LOOP(index, num_bbox1 * num_bbox2) {
      int b1 = index / num_bbox2;
      int b2 = index % num_bbox2;

      int base1 = b1 * 4;
      T b1_x1 = bbox1[base1];
      T b1_y1 = bbox1[base1 + 1];
      T b1_x2 = bbox1[base1 + 2];
      T b1_y2 = bbox1[base1 + 3];
      T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset);

      int base2 = b2 * 4;
      T b2_x1 = bbox2[base2];
      T b2_y1 = bbox2[base2 + 1];
      T b2_x2 = bbox2[base2 + 2];
      T b2_y2 = bbox2[base2 + 3];
      T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset);

      T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2);
      T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
      T width = fmaxf(right - left + offset, 0.f);
      T height = fmaxf(bottom - top + offset, 0.f);
      T interS = width * height;
      T baseS = 1.0;
      if (mode == 0) {
        baseS = fmaxf(b1_area + b2_area - interS, T(offset));
      } else if (mode == 1) {
        baseS = fmaxf(b1_area, T(offset));
      }
      ious[index] = interS / baseS;
    }
  }
}

#endif  // BBOX_OVERLAPS_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/border_align_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/csrc/border_align/border_align_kernel.cu.
// the main difference: (1) use `argmax_idx` for fast computing of gradient
// during the backward. (2) `wh` is directly computed by `boxes`, rather than
// passing it as argument to forward or backward functions.

#ifndef BORDER_ALIGN_CUDA_KERNEL_CUH
#define BORDER_ALIGN_CUDA_KERNEL_CUH

#include <float.h>
#ifdef MMCV_WITH_TRT
#include "common_cuda_helper.hpp"
#else  // MMCV_WITH_TRT
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else  // MMCV_USE_PARROTS
#include "pytorch_cuda_helper.hpp"
#endif  // MMCV_USE_PARROTS
#endif  // MMCV_WITH_TRT

enum BorderMode { Top = 0, Left = 1, Bottom = 2, Right = 3 };

/*** Forward ***/
template <typename T>
__global__ void border_align_forward_cuda_kernel(
    const int nthreads, const T* input, const T* boxes, T* output,
    int* argmax_idx, const int channels, const int box_size, const int height,
    const int width, const int pool_size) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (batch_idx, c_idx, box_idx) is an element paralleled for computing
    // output, and `extreme_idx` is in range [0,3]
    int batch_idx, c_idx, box_idx, extreme_idx, maxidx, *offset_argmax_idx;
    const T *offset_box, *offset_input, *offset_box_x;
    T *offset_output, box_width, box_height, stride, x_stride, y_stride, x, y,
        val, maxval;

    extreme_idx = threadIdx.y;
    // shape (N, C, box_size, 4) for output
    batch_idx = index / channels / box_size;
    // shape (N, box_size, 4) for boxes
    box_idx = index % box_size + batch_idx * box_size;
    c_idx = (index / box_size) % channels;

    offset_box = boxes + box_idx * 4;
    box_width = *(offset_box + 2) - *offset_box;
    box_height = *(offset_box + 3) - *(offset_box + 1);
    offset_output = output + index * 4 + extreme_idx;
    offset_argmax_idx = argmax_idx + index * 4 + extreme_idx;
    // shape (N, 4C, h, w) for input.
    // [0,C) for top feature, [C,2C) for left feature,
    // [2C,3C) for bottom feature, [3C,4C) for right feature
    offset_input =
        input + (batch_idx * channels * 4 + extreme_idx * channels + c_idx) *
                    height * width;

    // extreme_idx in [0,1] -> offset_box_x indexed at x1
    // extreme_idx in [2,3] -> offset_box_x indexed at x2
    offset_box_x = offset_box + extreme_idx / 2 * 2;

    // (x1,y1) or (x2,y2) for (x,y)
    x = *offset_box_x;
    y = *(offset_box_x + 1);

    switch (extreme_idx) {
      // top
      case BorderMode::Top:
        stride = box_width / pool_size;
        x_stride = stride;
        y_stride = 0;
        break;
      // left
      case BorderMode::Left:
        stride = box_height / pool_size;
        x_stride = 0;
        y_stride = stride;
        break;
      // bottom
      case BorderMode::Bottom:
        stride = box_width / pool_size;
        x_stride = -stride;
        y_stride = 0;
        break;
      // right
      case BorderMode::Right:
        stride = box_height / pool_size;
        x_stride = 0;
        y_stride = -stride;
        break;
    }

    // initialize maxval and maxidx with the start position (e.g. (x1,y1) or
    // (x2,y2))
    maxval = bilinear_interpolate(offset_input, height, width, y, x, index);
    maxidx = 0;

    // do max_pool along the border
    for (int i = 1; i <= pool_size; i++) {
      x += x_stride;
      y += y_stride;
      val = bilinear_interpolate(offset_input, height, width, y, x, index);
      if (val > maxval) {
        maxval = val;
        maxidx = i;
      }
    }

    // update output and argmax_idx
    *offset_output = maxval;
    *offset_argmax_idx = maxidx;
  }
}

/*** Backward ***/
template <typename T>
__global__ void border_align_backward_cuda_kernel(
    const int nthreads, const T* grad_output, const T* boxes,
    const int* argmax_idx, T* grad_input, const int channels,
    const int box_size, const int height, const int width,
    const int pool_size) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (batch_idx, c_idx, box_idx) is an element paralleled for computing
    // output, and `extreme_idx` is in range [0,3]
    int batch_idx, c_idx, box_idx, extreme_idx;
    const int* offset_argmax_idx;
    const T *offset_grad_output, *offset_box, *offset_box_x;
    T *offset_grad_input, box_width, box_height, stride, x_stride, y_stride, x,
        y;

    extreme_idx = threadIdx.y;
    batch_idx = index / channels / box_size;
    box_idx = index % box_size + batch_idx * box_size;
    c_idx = (index / box_size) % channels;

    offset_box = boxes + box_idx * 4;
    box_width = *(offset_box + 2) - *offset_box;
    box_height = *(offset_box + 3) - *(offset_box + 1);
    offset_grad_output = grad_output + index * 4 + extreme_idx;
    offset_argmax_idx = argmax_idx + index * 4 + extreme_idx;
    // [0,C) for top feature grad, [C,2C) for left feature grad,
    // [2C,3C) for bottom feature grad, [3C,4C) for right feature grad
    offset_grad_input = grad_input + (batch_idx * channels * 4 +
                                      extreme_idx * channels + c_idx) *
                                         height * width;

    // extreme_idx in [0,1] -> offset_box_x indexed at x1
    // extreme_idx in [2,3] -> offset_box_x indexed at x2
    offset_box_x = offset_box + extreme_idx / 2 * 2;

    switch (extreme_idx) {
      // top
      case BorderMode::Top:
        stride = box_width / pool_size;
        x_stride = stride;
        y_stride = 0;
        break;
      // left
      case BorderMode::Left:
        stride = box_height / pool_size;
        x_stride = 0;
        y_stride = stride;
        break;
      // bottom
      case BorderMode::Bottom:
        stride = box_width / pool_size;
        x_stride = -stride;
        y_stride = 0;
        break;
      // right
      case BorderMode::Right:
        stride = box_height / pool_size;
        x_stride = 0;
        y_stride = -stride;
        break;
    }

    // get position (x,y) which has maximum value during forward
    x = *offset_box_x;
    y = *(offset_box_x + 1);
    x += x_stride * (T)(*offset_argmax_idx);
    y += y_stride * (T)(*offset_argmax_idx);

    T w1, w2, w3, w4;
    int x_low, x_high, y_low, y_high;
    bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low,
                                  x_high, y_low, y_high, index);

    // update grad_output
    atomicAdd(offset_grad_input + y_low * width + x_low,
              *offset_grad_output * w1);
    atomicAdd(offset_grad_input + y_low * width + x_high,
              *offset_grad_output * w2);
    atomicAdd(offset_grad_input + y_high * width + x_low,
              *offset_grad_output * w3);
    atomicAdd(offset_grad_input + y_high * width + x_high,
              *offset_grad_output * w4);
  }
}

#endif  // BORDER_ALIGN_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
#ifndef BOX_IOU_ROTATED_CUDA_CUH
#define BOX_IOU_ROTATED_CUDA_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif
#include "box_iou_rotated_utils.hpp"

// 2D block with 32 * 16 = 512 threads per block
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 16;

inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); }

template <typename T>
__global__ void box_iou_rotated_cuda_kernel(
    const int n_boxes1, const int n_boxes2, const T* dev_boxes1,
    const T* dev_boxes2, T* dev_ious, const int mode_flag, const bool aligned) {
  if (aligned) {
    CUDA_1D_KERNEL_LOOP(index, n_boxes1) {
      int b1 = index;
      int b2 = index;

      int base1 = b1 * 5;

      float block_boxes1[5];
      float block_boxes2[5];

      block_boxes1[0] = dev_boxes1[base1 + 0];
      block_boxes1[1] = dev_boxes1[base1 + 1];
      block_boxes1[2] = dev_boxes1[base1 + 2];
      block_boxes1[3] = dev_boxes1[base1 + 3];
      block_boxes1[4] = dev_boxes1[base1 + 4];

      int base2 = b2 * 5;

      block_boxes2[0] = dev_boxes2[base2 + 0];
      block_boxes2[1] = dev_boxes2[base2 + 1];
      block_boxes2[2] = dev_boxes2[base2 + 2];
      block_boxes2[3] = dev_boxes2[base2 + 3];
      block_boxes2[4] = dev_boxes2[base2 + 4];

      dev_ious[index] =
          single_box_iou_rotated<T>(block_boxes1, block_boxes2, mode_flag);
    }
  } else {
    CUDA_1D_KERNEL_LOOP(index, n_boxes1 * n_boxes2) {
      int b1 = index / n_boxes2;
      int b2 = index % n_boxes2;

      int base1 = b1 * 5;

      float block_boxes1[5];
      float block_boxes2[5];

      block_boxes1[0] = dev_boxes1[base1 + 0];
      block_boxes1[1] = dev_boxes1[base1 + 1];
      block_boxes1[2] = dev_boxes1[base1 + 2];
      block_boxes1[3] = dev_boxes1[base1 + 3];
      block_boxes1[4] = dev_boxes1[base1 + 4];

      int base2 = b2 * 5;

      block_boxes2[0] = dev_boxes2[base2 + 0];
      block_boxes2[1] = dev_boxes2[base2 + 1];
      block_boxes2[2] = dev_boxes2[base2 + 2];
      block_boxes2[3] = dev_boxes2[base2 + 3];
      block_boxes2[4] = dev_boxes2[base2 + 4];

      dev_ious[index] =
          single_box_iou_rotated<T>(block_boxes1, block_boxes2, mode_flag);
    }
  }
}

#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CARAFE_CUDA_KERNEL_CUH
#define CARAFE_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

#ifdef HIP_DIFF
#define WARP_SIZE 64
#else
#define WARP_SIZE 32
#endif
#define THREADS_PER_PIXEL 32
#define MAX_SHARED_MEMORY 49152
#define MAX_SHARED_SCALAR_T 6144  // 49152 / 8 = 6144
#define MAXIMIZE_KERNEL_SIZE true
#define kTileDim 32
#define kBlockRows 8
#define FULL_MASK 0xffffffff

inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); }

__device__ inline int Loc2Index(const int n, const int c, const int h,
                                const int w, const int channel_num,
                                const int height, const int width) {
  int index = w + (h + (c + n * channel_num) * height) * width;
  return index;
}
#ifndef HIP_DIFF
/* TODO: move this to a common place */
template <typename scalar_t>
__device__ inline scalar_t min(scalar_t a, scalar_t b) {
  return a < b ? a : b;
}

template <typename scalar_t>
__device__ inline scalar_t max(scalar_t a, scalar_t b) {
  return a > b ? a : b;
}
#endif
template <typename scalar_t>
__device__ __forceinline__ scalar_t warpReduceSum(scalar_t val) {
  for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2)
#ifdef HIP_DIFF
    val += __shfl_down(val, offset);
#else
    val += __shfl_down_sync(FULL_MASK, val, offset);
#endif
  return val;
}

template <>
__device__ __forceinline__ phalf warpReduceSum(phalf val) {
  for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2)
#ifdef HIP_DIFF
    __PHALF(val) += __shfl_down(FULL_MASK, val, offset);
#else
    __PHALF(val) +=
        __shfl_down_sync(FULL_MASK, static_cast<__half>(__PHALF(val)), offset);
#endif
  return val;
}

// Splits the original matrix into submatrices with size 32 * 32.
// Each block transposes one submatrix by loading it into shared memory.
// Reference https://devblogs.nvidia.com/efficient-matrix-transpose-cuda-cc/
template <typename scalar_t>
__global__ void BatchTranspose2DCUDAKernel(const int N, const int H,
                                           const int W, const int dh,
                                           const int dw,
                                           const scalar_t *__restrict__ X,
                                           scalar_t *__restrict__ Y) {
  __shared__ scalar_t tile[kTileDim][kTileDim + 1];
  const int n = blockIdx.x / (dh * dw);
  const int k = blockIdx.x % (dh * dw);
  const int r = k / dw;
  const int c = k % dw;
  const int offset = n * H * W;
  int x = c * kTileDim + threadIdx.x;
  int y = r * kTileDim + threadIdx.y;
  if (x < W) {
    for (int i = 0; threadIdx.y + i < kTileDim && y + i < H; i += kBlockRows) {
      tile[threadIdx.y + i][threadIdx.x] = X[offset + (y + i) * W + x];
    }
  }
  __syncthreads();
  x = r * kTileDim + threadIdx.x;
  y = c * kTileDim + threadIdx.y;
  if (x < H) {
    for (int i = 0; threadIdx.y + i < kTileDim && y + i < W; i += kBlockRows) {
      Y[offset + (y + i) * H + x] = tile[threadIdx.x][threadIdx.y + i];
    }
  }
}
template <typename scalar_t>
__global__ void CARAFEForward(
    const int num_kernels, const scalar_t *__restrict__ bottom_data,
    const scalar_t *__restrict__ bottom_masks, const int kernel_size,
    const int group_size, const int scale_factor, const int channels,
    const int down_height, const int down_width, const int height,
    const int width, const int mask_channels, scalar_t *__restrict__ top_data) {
#if MAXIMIZE_KERNEL_SIZE
  __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2];
#else
  __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T];
#endif

  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index > num_kernels - 1) {
    return;
  }
  const int pixel_id = threadIdx.x / THREADS_PER_PIXEL;
  const int split_id = threadIdx.x % THREADS_PER_PIXEL;
  index = index / THREADS_PER_PIXEL;
  const int pw = index % width;
  const int ph = (index / width) % height;
  const int n = index / width / height;

  const int down_pw = pw / scale_factor;
  const int down_ph = ph / scale_factor;

  const int start_w = down_pw - (kernel_size - 1) / 2;
  const int end_w = down_pw + (kernel_size - 1) / 2 + 1;
  const int start_h = down_ph - (kernel_size - 1) / 2;
  const int end_h = down_ph + (kernel_size - 1) / 2 + 1;
  for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) {
    int mask_index = Loc2Index(n, ph, pw, c, height, width, mask_channels);
    shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index];
  }
  __syncthreads();

  const int channels_per_group = ceilf(channels / (float)group_size);
#pragma unroll
  for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) {
    int mask_group = c / channels_per_group;
    scalar_t output_val = 0;
#pragma unroll
    for (int iy = start_h; iy < end_h; iy++) {
#pragma unroll
      for (int ix = start_w; ix < end_w; ix++) {
        if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) {
          continue;
        }
        int mask_iy = iy - down_ph + (kernel_size - 1) / 2;
        int mask_ix = ix - down_pw + (kernel_size - 1) / 2;
        int mask_c =
            (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
        int feat_index =
            Loc2Index(n, iy, ix, c, down_height, down_width, channels);

        output_val += bottom_data[feat_index] *
                      shared_mask[mask_c * WARP_SIZE + pixel_id];
      }
    }

    int top_index = Loc2Index(n, ph, pw, c, height, width, channels);
    top_data[top_index] = output_val;
  }
}

template <typename scalar_t>
__global__ void CARAFEBackward_Feature(
    const int num_kernels, const scalar_t *__restrict__ top_diff,
    const scalar_t *__restrict__ bottom_masks, const int kernel_size,
    const int group_size, const int scale_factor, const int channels,
    const int down_height, const int down_width, const int height,
    const int width, const int mask_channels,
    scalar_t *__restrict__ bottom_diff) {
#if MAXIMIZE_KERNEL_SIZE
  __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2];
#else
  __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T];
#endif

  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index > num_kernels - 1) {
    return;
  }

  const int pixel_id = threadIdx.x / THREADS_PER_PIXEL;
  const int split_id = threadIdx.x % THREADS_PER_PIXEL;
  // (n, c, ph, pw) is an element in the bottom_data
  index = index / THREADS_PER_PIXEL;
  const int pw = index % width;
  const int ph = (index / width) % height;
  const int n = index / width / height;

  const int start_w = pw - (kernel_size - 1) * scale_factor / 2;
  const int end_w = pw + (kernel_size - 1) * scale_factor / 2 + 1;
  const int start_h = ph - (kernel_size - 1) * scale_factor / 2;
  const int end_h = ph + (kernel_size - 1) * scale_factor / 2 + 1;
  for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) {
    const int mask_w = (c % kernel_size) * scale_factor;
    const int mask_h = (c / kernel_size % kernel_size) * scale_factor;
    const int mask_x = start_w + mask_w;
    const int mask_y = start_h + mask_h;
    if (mask_y < 0 || mask_y > height - 1 || mask_x < 0 || mask_x > width - 1) {
      shared_mask[c * WARP_SIZE + pixel_id] = 0;
      continue;
    }
    const int mask_group = c / (kernel_size * kernel_size);
    const int mask_c = (2 * mask_group + 1) * kernel_size * kernel_size - c - 1;
    int mask_index =
        Loc2Index(n, mask_c, mask_y, mask_x, mask_channels, height, width);
    shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index];
  }
  __syncthreads();
  const int channels_per_group = ceilf(channels / (float)group_size);
#pragma unroll
  for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) {
    int mask_group = c / channels_per_group;
    int top_index = Loc2Index(n, ph, pw, c, height, width, channels);
    scalar_t output_val = 0;
#pragma unroll
    for (int iy = start_h; iy < end_h; iy += scale_factor) {
#pragma unroll
      for (int ix = start_w; ix < end_w; ix += scale_factor) {
        if (iy < 0 || iy > height - 1 || ix < 0 || ix > width - 1) {
          continue;
        }
        int mask_iy =
            (iy - ph + (kernel_size - 1) * scale_factor / 2) / scale_factor;
        int mask_ix =
            (ix - pw + (kernel_size - 1) * scale_factor / 2) / scale_factor;
        int mask_c =
            (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
        int feat_index = Loc2Index(n, iy, ix, c, height, width, channels);
        output_val +=
            shared_mask[mask_c * WARP_SIZE + pixel_id] * top_diff[feat_index];
      }
    }
    bottom_diff[top_index] = output_val;
  }
}

template <typename scalar_t>
__global__ void FeatureSum(const int num_kernels,
                           const scalar_t *__restrict__ input_data,
                           const int scale_factor, const int channels,
                           const int height, const int width,
                           scalar_t *__restrict__ output_data) {
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index > num_kernels - 1) {
    return;
  }
  const int split_id = threadIdx.x % THREADS_PER_PIXEL;
  index = index / THREADS_PER_PIXEL;
  const int pw = index % width;
  const int ph = (index / width) % height;
  const int n = index / width / height;
  for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) {
    scalar_t output_val = 0;
    for (int iy = ph * scale_factor; iy < (ph + 1) * scale_factor; iy++) {
      for (int ix = pw * scale_factor; ix < (pw + 1) * scale_factor; ix++) {
        int input_id = Loc2Index(n, iy, ix, c, height * scale_factor,
                                 width * scale_factor, channels);
        output_val += input_data[input_id];
      }
    }
    const int output_id = Loc2Index(n, ph, pw, c, height, width, channels);
    output_data[output_id] = output_val;
  }
}

template <typename scalar_t>
__global__ void CARAFEBackward_Mask(const int num_kernels,
                                    const scalar_t *__restrict__ top_diff,
                                    const scalar_t *__restrict__ bottom_data,
                                    const int kernel_size, const int group_size,
                                    const int scale_factor, const int channels,
                                    const int down_height, const int down_width,
                                    const int height, const int width,
                                    const int mask_channels,
                                    scalar_t *__restrict__ mask_diff) {
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index > num_kernels - 1) {
    return;
  }

  const int lane_id = index % WARP_SIZE;
  index = index / WARP_SIZE;
  const int mask_c = index % mask_channels;
  // (n, c, ph, pw) is an element in the bottom_data
  index = index / mask_channels;
  const int pw = index % width;
  const int ph = (index / width) % height;
  const int n = index / width / height;

  const int down_pw = pw / scale_factor;
  const int down_ph = ph / scale_factor;

  const int mask_group = mask_c / (kernel_size * kernel_size);
  const int mask_loc = mask_c % (kernel_size * kernel_size);

  const int offset_x = mask_loc % kernel_size - (kernel_size - 1) / 2;
  const int offset_y =
      mask_loc / kernel_size % kernel_size - (kernel_size - 1) / 2;

  const int down_x = down_pw + offset_x;
  const int down_y = down_ph + offset_y;

  scalar_t output_val = 0;

  if (down_y >= 0 && down_y <= down_height - 1 && down_x >= 0 &&
      down_x <= down_width - 1) {
    const int channels_per_mask = ceilf(channels / (float)group_size);
    const int start = channels_per_mask * mask_group;
    const int end = min(channels_per_mask * (mask_group + 1), channels);
    for (int c = start + lane_id; c < end; c += WARP_SIZE) {
      int bottom_id =
          Loc2Index(n, down_y, down_x, c, down_height, down_width, channels);
      int top_id = Loc2Index(n, ph, pw, c, height, width, channels);
      output_val += top_diff[top_id] * bottom_data[bottom_id];
    }
  }
#ifdef HIP_DIFF
  __syncthreads();
#else
  __syncwarp();
#endif
  output_val = warpReduceSum(output_val);
  if (lane_id == 0) {
    const int mask_id =
        Loc2Index(n, ph, pw, mask_c, height, width, mask_channels);
    mask_diff[mask_id] = output_val;
  }
}

#endif  // CARAFE_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CARAFE_NAIVE_CUDA_KERNEL_CUH
#define CARAFE_NAIVE_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

__device__ inline int Loc2Index(const int n, const int c, const int h,
                                const int w, const int channel_num,
                                const int height, const int width) {
  int index = w + (h + (c + n * channel_num) * height) * width;
  return index;
}

template <typename scalar_t>
__global__ void carafe_naive_forward_cuda_kernel(
    const int nthreads, const scalar_t *bottom_data,
    const scalar_t *bottom_masks, scalar_t *top_data, const int kernel_size,
    const int group_size, const int scale_factor, const int channels,
    const int height, const int width) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the bottom_data
    int pw = index % width;
    int ph = (index / width) % height;
    int c = (index / width / height) % channels;
    int n = index / width / height / channels;

    int mask_channels = kernel_size * kernel_size * group_size;
    int mask_group = c / (channels / group_size);

    int down_pw = pw / scale_factor;
    int down_ph = ph / scale_factor;
    int down_width = width / scale_factor;
    int down_height = height / scale_factor;
    int start_w = down_pw - (kernel_size - 1) / 2;
    int end_w = down_pw + (kernel_size - 1) / 2 + 1;
    int start_h = down_ph - (kernel_size - 1) / 2;
    int end_h = down_ph + (kernel_size - 1) / 2 + 1;

    scalar_t output_val = 0;
    for (int iy = start_h; iy < end_h; iy++) {
      for (int ix = start_w; ix < end_w; ix++) {
        if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) {
          continue;
        }
        int mask_iy = iy - down_ph + (kernel_size - 1) / 2;
        int mask_ix = ix - down_pw + (kernel_size - 1) / 2;
        int mask_c =
            (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
        int feat_index =
            Loc2Index(n, c, iy, ix, channels, down_height, down_width);
        int mask_index =
            Loc2Index(n, mask_c, ph, pw, mask_channels, height, width);
        output_val += bottom_data[feat_index] * bottom_masks[mask_index];
      }
    }
    top_data[index] = output_val;
  }
}

template <typename scalar_t>
__global__ void carafe_naive_backward_cuda_kernel(
    const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_data,
    const scalar_t *bottom_masks, scalar_t *bottom_diff, scalar_t *mask_diff,
    const int kernel_size, const int group_size, const int scale_factor,
    const int channels, const int height, const int width) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the bottom_data
    int pw = index % width;
    int ph = (index / width) % height;
    int c = (index / width / height) % channels;
    int n = index / width / height / channels;

    int mask_channels = kernel_size * kernel_size * group_size;
    int mask_group = c / (channels / group_size);

    int down_pw = pw / scale_factor;
    int down_ph = ph / scale_factor;
    int down_width = width / scale_factor;
    int down_height = height / scale_factor;
    int start_w = down_pw - (kernel_size - 1) / 2;
    int end_w = down_pw + (kernel_size - 1) / 2 + 1;
    int start_h = down_ph - (kernel_size - 1) / 2;
    int end_h = down_ph + (kernel_size - 1) / 2 + 1;

    for (int iy = start_h; iy < end_h; iy++) {
      for (int ix = start_w; ix < end_w; ix++) {
        if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) {
          continue;
        }
        int mask_iy = iy - down_ph + (kernel_size - 1) / 2;
        int mask_ix = ix - down_pw + (kernel_size - 1) / 2;
        int mask_c =
            (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
        int feat_index =
            Loc2Index(n, c, iy, ix, channels, down_height, down_width);
        int mask_index =
            Loc2Index(n, mask_c, ph, pw, mask_channels, height, width);
        atomicAdd(bottom_diff + feat_index,
                  bottom_masks[mask_index] * top_diff[index]);
        atomicAdd(mask_diff + mask_index,
                  bottom_data[feat_index] * top_diff[index]);
      }
    }
  }
}

#endif  // CARAFE_NAIVE_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/common_cuda_helper.hpp
================================================
#ifndef COMMON_CUDA_HELPER
#define COMMON_CUDA_HELPER

#include <cuda.h>

#define CUDA_1D_KERNEL_LOOP(i, n)                              \
  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
       i += blockDim.x * gridDim.x)

#define CUDA_2D_KERNEL_LOOP(i, n, j, m)                             \
  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n);   \
       i += blockDim.x * gridDim.x)                                 \
    for (size_t j = blockIdx.y * blockDim.y + threadIdx.y; j < (m); \
         j += blockDim.y * gridDim.y)

#define CUDA_2D_KERNEL_BLOCK_LOOP(i, n, j, m)          \
  for (size_t i = blockIdx.x; i < (n); i += gridDim.x) \
    for (size_t j = blockIdx.y; j < (m); j += gridDim.y)

#define THREADS_PER_BLOCK 512

inline int GET_BLOCKS(const int N, const int num_threads = THREADS_PER_BLOCK) {
  int optimal_block_num = (N + num_threads - 1) / num_threads;
  int max_block_num = 4096;
  return min(optimal_block_num, max_block_num);
}

template <typename T>
__device__ T bilinear_interpolate(const T* input, const int height,
                                  const int width, T y, T x,
                                  const int index /* index for debug only*/) {
  // deal with cases that inverse elements are out of feature map boundary
  if (y < -1.0 || y > height || x < -1.0 || x > width) return 0;

  if (y <= 0) y = 0;
  if (x <= 0) x = 0;

  int y_low = (int)y;
  int x_low = (int)x;
  int y_high;
  int x_high;

  if (y_low >= height - 1) {
    y_high = y_low = height - 1;
    y = (T)y_low;
  } else {
    y_high = y_low + 1;
  }

  if (x_low >= width - 1) {
    x_high = x_low = width - 1;
    x = (T)x_low;
  } else {
    x_high = x_low + 1;
  }

  T ly = y - y_low;
  T lx = x - x_low;
  T hy = 1. - ly, hx = 1. - lx;
  // do bilinear interpolation
  T v1 = input[y_low * width + x_low];
  T v2 = input[y_low * width + x_high];
  T v3 = input[y_high * width + x_low];
  T v4 = input[y_high * width + x_high];
  T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);

  return val;
}

template <typename T>
__device__ void bilinear_interpolate_gradient(
    const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4,
    int& x_low, int& x_high, int& y_low, int& y_high,
    const int index /* index for debug only*/) {
  // deal with cases that inverse elements are out of feature map boundary
  if (y < -1.0 || y > height || x < -1.0 || x > width) {
    // empty
    w1 = w2 = w3 = w4 = 0.;
    x_low = x_high = y_low = y_high = -1;
    return;
  }

  if (y <= 0) y = 0;
  if (x <= 0) x = 0;

  y_low = (int)y;
  x_low = (int)x;

  if (y_low >= height - 1) {
    y_high = y_low = height - 1;
    y = (T)y_low;
  } else {
    y_high = y_low + 1;
  }

  if (x_low >= width - 1) {
    x_high = x_low = width - 1;
    x = (T)x_low;
  } else {
    x_high = x_low + 1;
  }

  T ly = y - y_low;
  T lx = x - x_low;
  T hy = 1. - ly, hx = 1. - lx;

  // reference in forward
  // T v1 = input[y_low * width + x_low];
  // T v2 = input[y_low * width + x_high];
  // T v3 = input[y_high * width + x_low];
  // T v4 = input[y_high * width + x_high];
  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);

  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

  return;
}
#endif  // COMMON_CUDA_HELPER


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CONVEX_IOU_CUDA_KERNEL_CUH
#define CONVEX_IOU_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

#define MAXN 100
#define NMAX 512
__device__ const double EPS = 1E-8;

__device__ inline int sig(double d) { return (d > EPS) - (d < -EPS); }

struct Point {
  double x, y;
  __device__ Point() {}
  __device__ Point(double x, double y) : x(x), y(y) {}
};

__device__ inline bool point_same(Point& a, Point& b) {
  return sig(a.x - b.x) == 0 && sig(a.y - b.y) == 0;
}

__device__ inline void swap1(Point* a, Point* b) {
  Point temp;
  temp.x = a->x;
  temp.y = a->y;

  a->x = b->x;
  a->y = b->y;

  b->x = temp.x;
  b->y = temp.y;
}

__device__ inline void reverse1(Point* a, const int n) {
  for (int i = 0; i < (n - 1) / 2.0; i++) {
    Point* j = &(a[i]);
    Point* k = &(a[n - 1 - i]);
    swap1(j, k);
  }
}

__device__ inline double cross(Point o, Point a, Point b) {
  return (a.x - o.x) * (b.y - o.y) - (b.x - o.x) * (a.y - o.y);
}

__device__ inline double dis(Point a, Point b) {
  return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
}
__device__ inline double area(Point* ps, int n) {
  ps[n] = ps[0];
  double res = 0;
  for (int i = 0; i < n; i++) {
    res += ps[i].x * ps[i + 1].y - ps[i].y * ps[i + 1].x;
  }
  return res / 2.0;
}
__device__ inline double polygon_area_grad(Point* ps, int n,
                                           int* polygon_to_pred_index,
                                           int n_pred, double* grad_C) {
  ps[n] = ps[0];
  double partion_grad[4 * 30 + 2];
  double res = 0;
  for (int i = 0; i < n; i++) {
    res += ps[i].x * ps[i + 1].y - ps[i].y * ps[i + 1].x;
    partion_grad[i * 4 + 2] = ps[i + 1].y;
    partion_grad[i * 4 + 3] = -ps[i + 1].x;
    if (i != n - 1) {
      partion_grad[i * 4 + 4] = -ps[i].y;
      partion_grad[i * 4 + 5] = ps[i].x;
    } else {
      partion_grad[0] = -ps[i].y;
      partion_grad[1] = ps[i].x;
    }
  }
  for (int i = 0; i < n; i++) {
    for (int j = 0; j < n_pred; j++) {
      if (i == polygon_to_pred_index[j]) {
        grad_C[2 * polygon_to_pred_index[j + n_pred]] =
            (partion_grad[i * 4] + partion_grad[i * 4 + 2]) / 2;
        break;
      }
    }
    for (int j = 0; j < n_pred; j++) {
      if (i == polygon_to_pred_index[j]) {
        grad_C[2 * polygon_to_pred_index[j + n_pred] + 1] =
            (partion_grad[i * 4 + 1] + partion_grad[i * 4 + 1 + 2]) / 2;
        break;
      }
    }
  }

  return res / 2.0;
}

__device__ inline int lineCross(Point a, Point b, Point c, Point d, Point& p,
                                double* cut_grad, int m, int n, int i) {
  double s1, s2;
  double s2_s1_2;
  double ds1_dxc, ds1_dyc, ds2_dxd, ds2_dyd;
  double dxp_dxc, dxp_dyc, dxp_dxd, dxp_dyd, dyp_dxc, dyp_dyc, dyp_dxd, dyp_dyd;
  s1 = cross(a, b, c);
  s2 = cross(a, b, d);

  ds1_dxc = -(b.y - a.y);
  ds1_dyc = b.x - a.x;
  ds2_dxd = ds1_dxc;
  ds2_dyd = ds1_dyc;
  s2_s1_2 = (s2 - s1) * (s2 - s1);

  if (sig(s1) == 0 && sig(s2) == 0) return 2;
  if (sig(s2 - s1) == 0) return 0;

  dxp_dxc =
      ((s2 - d.x * ds1_dxc) * (s2 - s1) - (c.x * s2 - d.x * s1) * (-ds1_dxc)) /
      (s2_s1_2);
  dxp_dyc =
      ((0 - d.x * ds1_dyc) * (s2 - s1) - (c.x * s2 - d.x * s1) * (-ds1_dyc)) /
      (s2_s1_2);
  dxp_dxd =
      ((c.x * ds2_dxd - s1) * (s2 - s1) - (c.x * s2 - d.x * s1) * (ds2_dxd)) /
      (s2_s1_2);
  dxp_dyd =
      ((c.x * ds2_dyd - 0) * (s2 - s1) - (c.x * s2 - d.x * s1) * (ds2_dyd)) /
      (s2_s1_2);

  dyp_dxc =
      ((0 - d.y * ds1_dxc) * (s2 - s1) - (c.y * s2 - d.y * s1) * (-ds1_dxc)) /
      (s2_s1_2);
  dyp_dyc =
      ((s2 - d.y * ds1_dyc) * (s2 - s1) - (c.y * s2 - d.y * s1) * (-ds1_dyc)) /
      (s2_s1_2);
  dyp_dxd =
      ((c.y * ds2_dxd - 0) * (s2 - s1) - (c.y * s2 - d.y * s1) * (ds2_dxd)) /
      (s2_s1_2);
  dyp_dyd =
      ((c.y * ds2_dyd - s1) * (s2 - s1) - (c.y * s2 - d.y * s1) * (ds2_dyd)) /
      (s2_s1_2);

  p.x = (c.x * s2 - d.x * s1) / (s2 - s1);
  p.y = (c.y * s2 - d.y * s1) / (s2 - s1);
  if (i == n - 1) {
    cut_grad[4 * n * m + 4 * i] = dxp_dxc;  // + dyp_dxc;
    cut_grad[4 * n * m + 4 * i + 1] = dyp_dxc;
    cut_grad[4 * n * m + 4 * i + 2] = dxp_dyc;  // + dyp_dyc;
    cut_grad[4 * n * m + 4 * i + 3] = dyp_dyc;
    cut_grad[4 * n * m + 0] = dxp_dxd;  // + dyp_dxd;
    cut_grad[4 * n * m + 1] = dyp_dxd;
    cut_grad[4 * n * m + 2] = dxp_dyd;  // + dyp_dyd;
    cut_grad[4 * n * m + 3] = dyp_dyd;
  } else {
    cut_grad[4 * n * m + 4 * i] = dxp_dxc;  // + dyp_dxc;
    cut_grad[4 * n * m + 4 * i + 1] = dyp_dxc;
    cut_grad[4 * n * m + 4 * i + 2] = dxp_dyc;  // + dyp_dyc;
    cut_grad[4 * n * m + 4 * i + 3] = dyp_dyc;
    cut_grad[4 * n * m + 4 * (i + 1)] = dxp_dxd;  // + dyp_dxd;
    cut_grad[4 * n * m + 4 * (i + 1) + 1] = dyp_dxd;
    cut_grad[4 * n * m + 4 * (i + 1) + 2] = dxp_dyd;  // + dyp_dyd;
    cut_grad[4 * n * m + 4 * (i + 1) + 3] = dyp_dyd;
  }

  return 1;
}
__device__ inline void polygon_cut(Point* p, int& n, Point a, Point b,
                                   double* cut_grad) {
  Point pp[MAXN];
  double ccur_grad[MAXN] = {};
  int m = 0;
  p[n] = p[0];
  int k = n;
  for (int i = 0; i < n; i++) {
    if (sig(cross(a, b, p[i])) > 0) {
      pp[m] = p[i];
      ccur_grad[4 * n * m + 4 * i] = 1.0;
      ccur_grad[4 * n * m + 4 * i + 3] = 1.0;
      m++;
    }
    if (sig(cross(a, b, p[i])) != sig(cross(a, b, p[i + 1]))) {
      lineCross(a, b, p[i], p[i + 1], pp[m], ccur_grad, m, n, i);
      m++;
    }
  }

  n = 0;
  for (int i = 0; i < m; i++) {
    if (!i || !(point_same(pp[i], pp[i - 1]))) {
      p[n] = pp[i];
      for (int j = 0; j < 4 * k; j++) {
        cut_grad[4 * k * n + j] = ccur_grad[4 * k * i + j];
      }
      n++;
    }
  }

  while (n > 1 && point_same(p[n - 1], p[0])) n--;
}

__device__ inline double intersectArea(Point a, Point b, Point c, Point d,
                                       double* grad_AB, int order,
                                       int convex_n) {
  Point o(0, 0);
  int res_flag = 0;
  int s1 = sig(cross(o, a, b));
  int s2 = sig(cross(o, c, d));
  if (s1 == 0 || s2 == 0) return 0.0;
  if (s1 == -1) {
    Point* i = &a;
    Point* j = &b;
    swap1(i, j);
    res_flag = 1;
  }
  if (s2 == -1) {
    Point* i = &c;
    Point* j = &d;
    swap1(i, j);
  }
  Point p[10] = {o, a, b};
  int n = 3, n0 = 3, n1, n2, n3;
  double cut_grad1[MAXN] = {};
  double cut_grad2[MAXN] = {};
  double cut_grad3[MAXN] = {};
  double p1_p_grad[10][10] = {};
  double p2_p1_grad[10][10] = {};
  double p3_p2_grad[10][10] = {};

  double p3_p1_grad[10][10] = {};
  double p3_p_grad[10][10] = {};

  // 1
  polygon_cut(p, n, o, c, cut_grad1);
  n1 = n;
  for (int i = 0; i < n; i++) {
    for (int j = 0; j < 4 * n0; j++) {
      if (!(j % 2)) {
        p1_p_grad[2 * i][j / 2] = cut_grad1[4 * n0 * i + j];
      } else {
        p1_p_grad[2 * i + 1][j / 2] = cut_grad1[4 * n0 * i + j];
      }
    }
  }

  // 2
  polygon_cut(p, n, c, d, cut_grad2);
  n2 = n;
  for (int i = 0; i < n; i++) {
    for (int j = 0; j < 4 * n1; j++) {
      if (!(j % 2)) {
        p2_p1_grad[2 * i][j / 2] = cut_grad2[4 * n1 * i + j];
      } else {
        p2_p1_grad[2 * i + 1][j / 2] = cut_grad2[4 * n1 * i + j];
      }
    }
  }
  // 3
  polygon_cut(p, n, d, o, cut_grad3);
  n3 = n;
  for (int i = 0; i < n; i++) {
    for (int j = 0; j < 4 * n2; j++) {
      if (!(j % 2)) {
        p3_p2_grad[2 * i][j / 2] = cut_grad3[4 * n2 * i + j];
      } else {
        p3_p2_grad[2 * i + 1][j / 2] = cut_grad3[4 * n2 * i + j];
      }
    }
  }

  // mul
  //  p3_p2(n3 * n2) * p2_p1(n2 * n1) = p3_p1 (n3 * n1)
  for (int i = 0; i < 2 * n3; i++) {
    for (int j = 0; j < 2 * n1; j++) {
      double sum = 0.0;
      for (int m = 0; m < 2 * n2; m++) {
        sum = sum + p3_p2_grad[i][m] * p2_p1_grad[m][j];
      }
      p3_p1_grad[i][j] = sum;
    }
  }

  // p3_p1 (n3 * n1) * p1_p (n1 * n0) = p3_p (n3 * n0)
  for (int i = 0; i < 2 * n3; i++) {
    for (int j = 0; j < 2 * n0; j++) {
      double sum = 0.0;
      for (int m = 0; m < 2 * n1; m++) {
        sum = sum + p3_p1_grad[i][m] * p1_p_grad[m][j];
      }
      p3_p_grad[i][j] = sum;
    }
  }

  // calculate S_grad
  int polygon_index_box_index[20];
  double grad_polygon[20];
  double S_grad[6];

  for (int i = 0; i < n3; i++) {
    polygon_index_box_index[i] = i;
    polygon_index_box_index[i + n3] = i;
  }

  double res =
      polygon_area_grad(p, n3, polygon_index_box_index, n3, grad_polygon);

  if (s1 * s2 == -1) {
    for (int j = 0; j < 2 * 3; j++) {
      double sum = 0.0;
      for (int m = 0; m < 2 * n3; m++) {
        sum = sum - grad_polygon[m] * p3_p_grad[m][j];
      }
      S_grad[j] = sum;
    }

    if (order != convex_n - 1) {
      if (res_flag) {
        grad_AB[2 * order] += S_grad[4];
        grad_AB[2 * order + 1] += S_grad[5];
        grad_AB[2 * order + 2] += S_grad[2];
        grad_AB[2 * order + 3] += S_grad[3];

      } else {
        grad_AB[2 * order] += S_grad[2];
        grad_AB[2 * order + 1] += S_grad[3];
        grad_AB[2 * order + 2] += S_grad[4];
        grad_AB[2 * order + 3] += S_grad[5];
      }
    } else {
      if (res_flag) {
        grad_AB[2 * order] += S_grad[4];
        grad_AB[2 * order + 1] += S_grad[5];
        grad_AB[0] += S_grad[2];
        grad_AB[1] += S_grad[3];

      } else {
        grad_AB[2 * order] += S_grad[2];
        grad_AB[2 * order + 1] += S_grad[3];
        grad_AB[0] += S_grad[4];
        grad_AB[1] += S_grad[5];
      }
    }
    res = -res;
  } else {
    for (int j = 0; j < 2 * 3; j++) {
      double sum = 0.0;
      for (int m = 0; m < 2 * n3; m++) {
        sum = sum + grad_polygon[m] * p3_p_grad[m][j];
      }
      S_grad[j] = sum;
    }

    if (order != convex_n - 1) {
      if (res_flag) {
        grad_AB[2 * order] += S_grad[4];
        grad_AB[2 * order + 1] += S_grad[5];
        grad_AB[2 * order + 2] += S_grad[2];
        grad_AB[2 * order + 3] += S_grad[3];
      } else {
        grad_AB[2 * order] += S_grad[2];
        grad_AB[2 * order + 1] += S_grad[3];
        grad_AB[2 * order + 2] += S_grad[4];
        grad_AB[2 * order + 3] += S_grad[5];
      }
    } else {
      if (res_flag) {
        grad_AB[2 * order] += S_grad[4];
        grad_AB[2 * order + 1] += S_grad[5];
        grad_AB[0] += S_grad[2];
        grad_AB[1] += S_grad[3];
      } else {
        grad_AB[2 * order] += S_grad[2];
        grad_AB[2 * order + 1] += S_grad[3];
        grad_AB[0] += S_grad[4];
        grad_AB[1] += S_grad[5];
      }
    }
  }
  return res;
}

__device__ inline double intersectAreaO(Point* ps1, int n1, Point* ps2, int n2,
                                        double* grad_AB) {
  if (area(ps1, n1) < 0) reverse1(ps1, n1);
  if (area(ps2, n2) < 0) reverse1(ps2, n2);
  ps1[n1] = ps1[0];
  ps2[n2] = ps2[0];
  double res = 0;
  for (int i = 0; i < n1; i++) {
    for (int j = 0; j < n2; j++) {
      res +=
          intersectArea(ps1[i], ps1[i + 1], ps2[j], ps2[j + 1], grad_AB, i, n1);
    }
  }
  return res;
}

__device__ inline void Jarvis(Point* in_poly, int& n_poly) {
  Point p_max, p_k;
  int max_index, k_index;
  int Stack[NMAX] = {}, top1, top2;
  double sign;
  Point right_point[10], left_point[10];

  for (int i = 0; i < n_poly; i++) {
    if (in_poly[i].y < in_poly[0].y ||
        in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) {
      Point* j = &(in_poly[0]);
      Point* k = &(in_poly[i]);
      swap1(j, k);
    }
    if (i == 0) {
      p_max = in_poly[0];
      max_index = 0;
    }
    if (in_poly[i].y > p_max.y ||
        in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) {
      p_max = in_poly[i];
      max_index = i;
    }
  }

  if (max_index == 0) {
    max_index = 1;
    p_max = in_poly[max_index];
  }

  k_index = 0, Stack[0] = 0, top1 = 0;
  while (k_index != max_index) {
    p_k = p_max;
    k_index = max_index;
    for (int i = 1; i < n_poly; i++) {
      sign = cross(in_poly[Stack[top1]], in_poly[i], p_k);
      if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) >
                                         dis(in_poly[Stack[top1]], p_k)))) {
        p_k = in_poly[i];
        k_index = i;
      }
    }
    top1++;
    Stack[top1] = k_index;
  }
  for (int i = 0; i <= top1; i++) right_point[i] = in_poly[Stack[i]];

  k_index = 0, Stack[0] = 0, top2 = 0;

  while (k_index != max_index) {
    p_k = p_max;
    k_index = max_index;
    for (int i = 1; i < n_poly; i++) {
      sign = cross(in_poly[Stack[top2]], in_poly[i], p_k);
      if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) >
                                        dis(in_poly[Stack[top2]], p_k))) {
        p_k = in_poly[i];
        k_index = i;
      }
    }
    top2++;
    Stack[top2] = k_index;
  }
  for (int i = top2 - 1; i >= 0; i--) left_point[i] = in_poly[Stack[i]];

  for (int i = 0; i < top1 + top2; i++) {
    if (i <= top1) {
      in_poly[i] = right_point[i];
    } else {
      in_poly[i] = left_point[top2 - (i - top1)];
    }
  }
  n_poly = top1 + top2;
}

__device__ inline double intersectAreaPoly(Point* ps1, int n1, Point* ps2,
                                           int n2, double* grad_C) {
  Point polygon[MAXN];
  int n = n1 + n2, n_poly = 0;
  for (int i = 0; i < n1; i++) {
    for (int j = 0; j < n - n1; j++) {
      if (point_same(ps1[i], ps2[j])) {
        for (int k = j; k < n - n1 - 1; k++) {
          ps2[k] = ps2[k + 1];
        }
        n2--;
        break;
      }
    }
  }
  n_poly = n1 + n2;
  for (int i = 0; i < n_poly; i++) {
    if (i < n1) {
      polygon[i] = ps1[i];
    } else {
      polygon[i] = ps2[i - n1];
    }
  }

  Jarvis(polygon, n_poly);

  int polygon_to_pred_index[18] = {-1, -1, -1, -1, -1, -1, -1, -1, -1,
                                   -1, -1, -1, -1, -1, -1, -1, -1, -1};
  int n_pred = 0;
  for (int i = 0; i < n_poly; i++) {
    for (int j = 0; j < n1; j++) {
      if (polygon[i].x == ps1[j].x && polygon[i].y == ps1[j].y) {
        polygon_to_pred_index[n_pred] = i;
        polygon_to_pred_index[n_pred + n1] = j;
        n_pred += 1;
        break;
      }
    }
  }
  if (n_pred == 0) {
    double polygon_area = fabs(area(polygon, n_poly));
    for (int i = 0; i < 18; i++) {
      grad_C[i] = 0.0;
    }
    return polygon_area;
  } else {
    double polygon_area =
        polygon_area_grad(polygon, n_poly, polygon_to_pred_index, n1, grad_C);
    if (polygon_area < 0) {
      for (int i = 0; i < 18; i++) {
        grad_C[i] = -grad_C[i];
      }
    }
    return fabs(polygon_area);
  }
}

// convex_find and get the polygon_index_box_index
__device__ inline void Jarvis_and_index(Point* in_poly, int& n_poly,
                                        int* points_to_convex_ind) {
  int n_input = n_poly;
  Point input_poly[20];
  for (int i = 0; i < n_input; i++) {
    input_poly[i].x = in_poly[i].x;
    input_poly[i].y = in_poly[i].y;
  }
  Point p_max, p_k;
  int max_index, k_index;
  int Stack[20], top1, top2;
  double sign;
  Point right_point[10], left_point[10];

  for (int i = 0; i < n_poly; i++) {
    if (in_poly[i].y < in_poly[0].y ||
        in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) {
      Point* j = &(in_poly[0]);
      Point* k = &(in_poly[i]);
      swap1(j, k);
    }
    if (i == 0) {
      p_max = in_poly[0];
      max_index = 0;
    }
    if (in_poly[i].y > p_max.y ||
        in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) {
      p_max = in_poly[i];
      max_index = i;
    }
  }
  if (max_index == 0) {
    max_index = 1;
    p_max = in_poly[max_index];
  }

  k_index = 0, Stack[0] = 0, top1 = 0;
  while (k_index != max_index) {
    p_k = p_max;
    k_index = max_index;
    for (int i = 1; i < n_poly; i++) {
      sign = cross(in_poly[Stack[top1]], in_poly[i], p_k);
      if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) >
                                         dis(in_poly[Stack[top1]], p_k)))) {
        p_k = in_poly[i];
        k_index = i;
      }
    }
    top1++;
    Stack[top1] = k_index;
  }
  for (int i = 0; i <= top1; i++) {
    right_point[i] = in_poly[Stack[i]];
  }

  k_index = 0, Stack[0] = 0, top2 = 0;

  while (k_index != max_index) {
    p_k = p_max;
    k_index = max_index;
    for (int i = 1; i < n_poly; i++) {
      sign = cross(in_poly[Stack[top2]], in_poly[i], p_k);
      if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) >
                                        dis(in_poly[Stack[top2]], p_k))) {
        p_k = in_poly[i];
        k_index = i;
      }
    }
    top2++;
    Stack[top2] = k_index;
  }

  for (int i = top2 - 1; i >= 0; i--) {
    left_point[i] = in_poly[Stack[i]];
  }

  for (int i = 0; i < top1 + top2; i++) {
    if (i <= top1) {
      in_poly[i] = right_point[i];
    } else {
      in_poly[i] = left_point[top2 - (i - top1)];
    }
  }
  n_poly = top1 + top2;
  for (int i = 0; i < n_poly; i++) {
    for (int j = 0; j < n_input; j++) {
      if (point_same(in_poly[i], input_poly[j])) {
        points_to_convex_ind[i] = j;
        break;
      }
    }
  }
}

template <typename T>
__device__ inline float devrIoU(T const* const p, T const* const q,
                                T* point_grad, const int idx) {
  Point ps1[MAXN], ps2[MAXN];

  Point convex[MAXN];
  for (int i = 0; i < 9; i++) {
    convex[i].x = (double)p[i * 2];
    convex[i].y = (double)p[i * 2 + 1];
  }
  int n_convex = 9;
  int points_to_convex_ind[9] = {-1, -1, -1, -1, -1, -1, -1, -1, -1};
  Jarvis_and_index(convex, n_convex, points_to_convex_ind);

  int n1 = n_convex;
  int n2 = 4;

  for (int i = 0; i < n1; i++) {
    ps1[i].x = (double)convex[i].x;
    ps1[i].y = (double)convex[i].y;
  }

  for (int i = 0; i < n2; i++) {
    ps2[i].x = (double)q[i * 2];
    ps2[i].y = (double)q[i * 2 + 1];
  }

  int polygon_index_box_index[18];
  for (int i = 0; i < n1; i++) {
    polygon_index_box_index[i] = i;
    polygon_index_box_index[i + n1] = i;
  }

  double grad_A[18] = {};
  double grad_AB[18] = {};
  double grad_C[18] = {};

  double inter_area = intersectAreaO(ps1, n1, ps2, n2, grad_AB);
  double S_pred =
      polygon_area_grad(ps1, n1, polygon_index_box_index, n1, grad_A);
  if (S_pred < 0) {
    for (int i = 0; i < n_convex * 2; i++) {
      grad_A[i] = -grad_A[i];
    }
  }
  double union_area = fabs(S_pred) + fabs(area(ps2, n2)) - inter_area;

  double iou = inter_area / union_area;
  double polygon_area = intersectAreaPoly(ps1, n1, ps2, n2, grad_C);

  //    printf("%d:live\n", idx);
  double rot_giou = iou - (polygon_area - union_area) / polygon_area;

  float grad_point_temp[18] = {};

  for (int i = 0; i < n_convex; i++) {
    int grad_point = points_to_convex_ind[i];
    grad_point_temp[2 * grad_point] =
        (float)((union_area + inter_area) / (union_area * union_area) *
                    grad_AB[2 * i] -
                iou / union_area * grad_A[2 * i] -
                1 / polygon_area * (grad_AB[2 * i] - grad_A[2 * i]) -
                (union_area) / polygon_area / polygon_area * grad_C[2 * i]);
    grad_point_temp[2 * grad_point + 1] =
        (float)((union_area + inter_area) / (union_area * union_area) *
                    grad_AB[2 * i + 1] -
                iou / union_area * grad_A[2 * i + 1] -
                1 / polygon_area * (grad_AB[2 * i + 1] - grad_A[2 * i + 1]) -
                (union_area) / polygon_area / polygon_area * grad_C[2 * i + 1]);
  }

  for (int i = 0; i < 9; i++) {
    point_grad[2 * i] = grad_point_temp[2 * i];
    point_grad[2 * i + 1] = grad_point_temp[2 * i + 1];
  }
  return (float)rot_giou;
}

template <typename T>
__global__ void convex_giou_cuda_kernel(const int ex_n_boxes,
                                        const int gt_n_boxes, const T* ex_boxes,
                                        const T* gt_boxes, T* point_grad) {
  CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) {
    const T* cur_box = ex_boxes + index * 18;
    const T* cur_gt_box = gt_boxes + index * 8;
    T* cur_grad = point_grad + index * 19;
    T giou = devrIoU(cur_box, cur_gt_box, cur_grad, threadIdx.x);
    cur_grad[18] = giou;
  }
}

__device__ inline int lineCross(Point a, Point b, Point c, Point d, Point& p) {
  double s1, s2;
  s1 = cross(a, b, c);
  s2 = cross(a, b, d);
  if (sig(s1) == 0 && sig(s2) == 0) return 2;
  if (sig(s2 - s1) == 0) return 0;
  p.x = (c.x * s2 - d.x * s1) / (s2 - s1);
  p.y = (c.y * s2 - d.y * s1) / (s2 - s1);
  return 1;
}

__device__ inline void polygon_cut(Point* p, int& n, Point a, Point b) {
  Point pp[MAXN];
  int m = 0;
  p[n] = p[0];
  for (int i = 0; i < n; i++) {
    if (sig(cross(a, b, p[i])) > 0) {
      pp[m] = p[i];
      m++;
    }
    if (sig(cross(a, b, p[i])) != sig(cross(a, b, p[i + 1]))) {
      lineCross(a, b, p[i], p[i + 1], pp[m]);
      m++;
    }
  }
  n = 0;
  for (int i = 0; i < m; i++) {
    if (!i || !(point_same(pp[i], pp[i - 1]))) {
      p[n] = pp[i];
      n++;
    }
  }

  while (n > 1 && point_same(p[n - 1], p[0])) n--;
}

__device__ inline double intersectArea(Point a, Point b, Point c, Point d) {
  Point o(0, 0);
  int s1 = sig(cross(o, a, b));
  int s2 = sig(cross(o, c, d));
  if (s1 == 0 || s2 == 0) return 0.0;
  if (s1 == -1) {
    Point* i = &a;
    Point* j = &b;
    swap1(i, j);
  }
  if (s2 == -1) {
    Point* i = &c;
    Point* j = &d;
    swap1(i, j);
  }
  Point p[10] = {o, a, b};
  int n = 3;

  polygon_cut(p, n, o, c);
  polygon_cut(p, n, c, d);
  polygon_cut(p, n, d, o);
  double res = area(p, n);
  if (s1 * s2 == -1) res = -res;
  return res;
}
__device__ inline double intersectAreaO(Point* ps1, int n1, Point* ps2,
                                        int n2) {
  if (area(ps1, n1) < 0) reverse1(ps1, n1);
  if (area(ps2, n2) < 0) reverse1(ps2, n2);
  ps1[n1] = ps1[0];
  ps2[n2] = ps2[0];
  double res = 0;
  for (int i = 0; i < n1; i++) {
    for (int j = 0; j < n2; j++) {
      res += intersectArea(ps1[i], ps1[i + 1], ps2[j], ps2[j + 1]);
    }
  }
  return res;
}

template <typename T>
__device__ inline float devrIoU(T const* const p, T const* const q) {
  Point ps1[MAXN], ps2[MAXN];
  Point convex[MAXN];
  for (int i = 0; i < 9; i++) {
    convex[i].x = (double)p[i * 2];
    convex[i].y = (double)p[i * 2 + 1];
  }
  int n_convex = 9;
  int points_to_convex_ind[9] = {-1, -1, -1, -1, -1, -1, -1, -1, -1};
  Jarvis_and_index(convex, n_convex, points_to_convex_ind);
  int n1 = n_convex;
  for (int i = 0; i < n1; i++) {
    ps1[i].x = (double)convex[i].x;
    ps1[i].y = (double)convex[i].y;
  }
  int n2 = 4;
  for (int i = 0; i < n2; i++) {
    ps2[i].x = (double)q[i * 2];
    ps2[i].y = (double)q[i * 2 + 1];
  }
  double inter_area = intersectAreaO(ps1, n1, ps2, n2);
  double S_pred = area(ps1, n1);
  double union_area = fabs(S_pred) + fabs(area(ps2, n2)) - inter_area;
  double iou = inter_area / union_area;
  return (float)iou;
}

template <typename T>
__global__ void convex_iou_cuda_kernel(const int ex_n_boxes,
                                       const int gt_n_boxes, const T* ex_boxes,
                                       const T* gt_boxes, T* iou) {
  CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) {
    const T* cur_box = ex_boxes + index * 18;
    for (int i = 0; i < gt_n_boxes; i++) {
      iou[index * gt_n_boxes + i] = devrIoU(cur_box, gt_boxes + i * 8);
    }
  }
}
#endif  // CONVEX_IOU_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/ClementPinard/Pytorch-Correlation-extension/blob/master/Correlation_Module/correlation_cuda_kernel.cu
// Original licence: Under MIT License

#ifndef CORRELATION_CUDA
#define CORRELATION_CUDA

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

#include <cuda.h>
#include <cuda_runtime.h>
// Using <torch/extension.h> is recommended in the official documentation in
// https://pytorch.org/tutorials/advanced/cpp_extension.html#writing-the-c-op.
// However, we use <torch/types.h> for compatibility with CUDA 9.0
// Read https://github.com/pytorch/extension-cpp/issues/35 for more details.
#include <torch/types.h>

#include <iostream>
#include <vector>

using namespace torch;

#define TensorAcc4R PackedTensorAccessor32<scalar_t, 4, RestrictPtrTraits>
#define TensorAcc5R PackedTensorAccessor32<scalar_t, 5, RestrictPtrTraits>
#define WITHIN_BOUNDS(x, y, H, W) (x >= 0 && x < H && y >= 0 && y < W)

#define THREADS_FORWARD 32
#define THREADS_BACKWARD 16

template <typename scalar_t>
__global__ void correlation_forward_cuda_kernel(
    const TensorAcc4R rInput1, const TensorAcc4R rInput2, TensorAcc5R output,
    int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH,
    int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) {
  const int iH = rInput1.size(1);
  const int iW = rInput1.size(2);
  const int C = rInput1.size(3);

  const int n = blockIdx.x;
  const int h = blockIdx.y;
  const int w = blockIdx.z;
  const int thread = threadIdx.x;

  const int start_i = -padH + h * dH;
  const int start_j = -padW + w * dW;

  const int patchRadH = dilation_patchH * (patchH - 1) / 2;
  const int patchRadW = dilation_patchW * (patchW - 1) / 2;

  __shared__ scalar_t prod_sum[THREADS_FORWARD];

  for (int ph = 0; ph < patchH; ++ph) {
    int ph_dilated = ph * dilation_patchH - patchRadH;
    for (int pw = 0; pw < patchW; ++pw) {
      int pw_dilated = pw * dilation_patchW - patchRadW;
      prod_sum[thread] = 0;
      for (int i = 0; i < kH; ++i) {
        int i1 = start_i + i * dilationH;
        int i2 = i1 + ph_dilated;
        if
          WITHIN_BOUNDS(i1, i2, iH, iH) {
            for (int j = 0; j < kW; ++j) {
              int j1 = start_j + j * dilationW;
              int j2 = j1 + pw_dilated;
              if
                WITHIN_BOUNDS(j1, j2, iW, iW) {
                  for (int c = thread; c < C; c += THREADS_FORWARD) {
                    scalar_t v1 = rInput1[n][i1][j1][c];
                    scalar_t v2 = rInput2[n][i2][j2][c];
                    prod_sum[thread] += v1 * v2;
                  }
                }
            }
          }
      }
      // accumulate
      __syncthreads();
      if (thread == 0) {
        scalar_t reduce_sum = 0;
        for (int index = 0; index < THREADS_FORWARD; ++index) {
          reduce_sum += prod_sum[index];
        }
        output[n][ph][pw][h][w] = reduce_sum;
      }
    }
  }
}

template <typename scalar_t>
__global__ void correlation_backward_cuda_kernel_input1(
    const TensorAcc5R grad_output, const TensorAcc4R input2,
    TensorAcc4R grad_input1, const int kH, const int kW, const int patchH,
    const int patchW, const int padH, const int padW, const int dilationH,
    const int dilationW, const int dilation_patchH, const int dilation_patchW,
    const int dH, const int dW, const int batch) {
  const int iH = input2.size(2);
  const int iW = input2.size(3);

  const int H = grad_output.size(3);
  const int W = grad_output.size(4);

  const int patchRadH = (patchH - 1) / 2;
  const int patchRadW = (patchW - 1) / 2;

  const int n = batch;
  const int c = blockIdx.x;
  const int h = blockIdx.y;
  const int w = blockIdx.z;
  const int ph_off = threadIdx.x;
  const int pw_off = threadIdx.y;

  const int h_2 = h + padH;
  const int w_2 = w + padW;
  const int min_h = h_2 - kH * dilationH;
  const int min_w = w_2 - kW * dilationW;

  __shared__ scalar_t prod_sum[THREADS_BACKWARD][THREADS_BACKWARD];
  prod_sum[ph_off][pw_off] = 0;

  for (int ph = ph_off; ph < patchH; ph += THREADS_BACKWARD) {
    int i1 = h + dilation_patchH * (ph - patchRadH);
    for (int pw = pw_off; pw < patchW; pw += THREADS_BACKWARD) {
      int j1 = w + dilation_patchW * (pw - patchRadW);
      if (WITHIN_BOUNDS(i1, j1, iH, iW)) {
        scalar_t val = input2[n][c][i1][j1];
        for (int h_3 = h_2; h_3 > min_h; h_3 -= dilationH) {
          int i2 = (h_3) / dH;
          if (i2 * dH != h_3) continue;
          for (int w_3 = w_2; w_3 > min_w; w_3 -= dilationW) {
            int j2 = (w_3) / dW;
            if (j2 * dW != w_3) continue;
            if
              WITHIN_BOUNDS(i2, j2, H, W) {
                prod_sum[ph_off][pw_off] +=
                    grad_output[n][ph][pw][i2][j2] * val;
              }
          }
        }
      }
    }
  }

  __syncthreads();

  if (ph_off == 0 && pw_off == 0) {
    scalar_t reduce_sum = 0;
    for (int ph = 0; ph < THREADS_BACKWARD; ++ph) {
      for (int pw = 0; pw < THREADS_BACKWARD; ++pw) {
        reduce_sum += prod_sum[ph][pw];
      }
    }
    grad_input1[n][c][h][w] = reduce_sum;
  }
}

template <typename scalar_t>
__global__ void correlation_backward_cuda_kernel_input2(
    const TensorAcc5R grad_output, const TensorAcc4R input1,
    TensorAcc4R grad_input2, int kH, int kW, int patchH, int patchW, int padH,
    int padW, int dilationH, int dilationW, int dilation_patchH,
    int dilation_patchW, int dH, int dW, int batch) {
  const int iH = input1.size(2);
  const int iW = input1.size(3);

  const int patchRadH = (patchH - 1) / 2;
  const int patchRadW = (patchW - 1) / 2;

  const int H = grad_output.size(3);
  const int W = grad_output.size(4);

  const int dilatedKH = kH * dilationH;
  const int dilatedKW = kW * dilationW;

  const int n = batch;
  const int c = blockIdx.x;
  const int h = blockIdx.y;
  const int w = blockIdx.z;
  const int ph_off = threadIdx.x;
  const int pw_off = threadIdx.y;

  __shared__ scalar_t prod_sum[THREADS_BACKWARD][THREADS_BACKWARD];
  prod_sum[ph_off][pw_off] = 0;

  for (int ph = ph_off; ph < patchH; ph += THREADS_BACKWARD) {
    int i1 = h - dilation_patchH * (ph - patchRadH);
    for (int pw = pw_off; pw < patchW; pw += THREADS_BACKWARD) {
      int j1 = w - dilation_patchW * (pw - patchRadW);
      if
        WITHIN_BOUNDS(i1, j1, iH, iW) {
          scalar_t val = input1[n][c][i1][j1];

          const int h_2 = i1 + padH;
          const int w_2 = j1 + padW;
          const int min_h = h_2 - dilatedKH;
          const int min_w = w_2 - dilatedKW;

          for (int h_3 = h_2; h_3 > min_h; h_3 -= dilationH) {
            int i2 = (h_3) / dH;
            if (i2 * dH != h_3) continue;
            for (int w_3 = w_2; w_3 > min_w; w_3 -= dilationW) {
              int j2 = (w_3) / dW;
              if (j2 * dW != w_3) continue;
              if
                WITHIN_BOUNDS(i2, j2, H, W) {
                  prod_sum[ph_off][pw_off] +=
                      grad_output[n][ph][pw][i2][j2] * val;
                }
            }
          }
        }
    }
  }

  __syncthreads();

  if (ph_off == 0 && pw_off == 0) {
    scalar_t reduce_sum = 0;
    for (int ph = 0; ph < THREADS_BACKWARD; ++ph) {
      for (int pw = 0; pw < THREADS_BACKWARD; ++pw) {
        reduce_sum += prod_sum[ph][pw];
      }
    }
    grad_input2[n][c][h][w] = reduce_sum;
  }
}
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh
================================================
/*!
 ******************* BEGIN Caffe Copyright Notice and Disclaimer
 *****************
 *
 * COPYRIGHT
 *
 * All contributions by the University of California:
 * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
 * All rights reserved.
 *
 * All other contributions:
 * Copyright (c) 2014-2017, the respective contributors
 * All rights reserved.
 *
 * Caffe uses a shared copyright model: each contributor holds copyright over
 * their contributions to Caffe. The project versioning records all such
 * contribution and copyright details. If a contributor wants to further mark
 * their specific copyright on a particular contribution, they should indicate
 * their copyright solely in the commit message of the change when it is
 * committed.
 *
 * LICENSE
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * CONTRIBUTION AGREEMENT
 *
 * By contributing to the BVLC/caffe repository through pull-request, comment,
 * or otherwise, the contributor releases their content to the
 * license and copyright terms herein.
 *
 ***************** END Caffe Copyright Notice and Disclaimer
 *********************
 *
 * Copyright (c) 2018 Microsoft
 * Licensed under The MIT License [see LICENSE for details]
 * \file modulated_deformable_im2col.cuh
 * \brief Function definitions of converting an image to
 * column matrix based on kernel, padding, dilation, and offset.
 * These functions are mainly used in deformable convolution operators.
 * \ref: https://arxiv.org/abs/1703.06211
 * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
 */

// modified from
// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu

#ifndef DEFORM_CONV_CUDA_KERNEL_CUH
#define DEFORM_CONV_CUDA_KERNEL_CUH

#include <float.h>
#ifdef MMCV_WITH_TRT
#include "common_cuda_helper.hpp"
#else  // MMCV_WITH_TRT
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else  // MMCV_USE_PARROTS
#include "pytorch_cuda_helper.hpp"
#endif  // MMCV_USE_PARROTS
#endif  // MMCV_WITH_TRT

template <typename T>
__device__ T deformable_im2col_bilinear(const T *input, const int data_width,
                                        const int height, const int width, T h,
                                        T w) {
  if (h <= -1 || height <= h || w <= -1 || width <= w) {
    return 0;
  }

  int h_low = floorf(h);
  int w_low = floorf(w);
  int h_high = h_low + 1;
  int w_high = w_low + 1;

  T lh = h - h_low;
  T lw = w - w_low;
  T hh = 1 - lh, hw = 1 - lw;

  T v1 = 0;
  if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low];
  T v2 = 0;
  if (h_low >= 0 && w_high <= width - 1)
    v2 = input[h_low * data_width + w_high];
  T v3 = 0;
  if (h_high <= height - 1 && w_low >= 0)
    v3 = input[h_high * data_width + w_low];
  T v4 = 0;
  if (h_high <= height - 1 && w_high <= width - 1)
    v4 = input[h_high * data_width + w_high];

  T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;

  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
  return val;
}

template <typename T>
__device__ T get_gradient_weight(T argmax_h, T argmax_w, const int h,
                                 const int w, const int height,
                                 const int width) {
  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
      argmax_w >= width) {
    // empty
    return 0;
  }

  int argmax_h_low = floorf(argmax_h);
  int argmax_w_low = floorf(argmax_w);
  int argmax_h_high = argmax_h_low + 1;
  int argmax_w_high = argmax_w_low + 1;

  T weight = 0;
  if (h == argmax_h_low && w == argmax_w_low)
    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
  if (h == argmax_h_low && w == argmax_w_high)
    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
  if (h == argmax_h_high && w == argmax_w_low)
    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
  if (h == argmax_h_high && w == argmax_w_high)
    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
  return weight;
}

template <typename T>
__device__ T get_coordinate_weight(T argmax_h, T argmax_w, const int height,
                                   const int width, const T *im_data,
                                   const int data_width, const int bp_dir) {
  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
      argmax_w >= width) {
    // empty
    return 0;
  }

  int argmax_h_low = floorf(argmax_h);
  int argmax_w_low = floorf(argmax_w);
  int argmax_h_high = argmax_h_low + 1;
  int argmax_w_high = argmax_w_low + 1;

  T weight = 0;

  if (bp_dir == 0) {
    if (argmax_h_low >= 0 && argmax_w_low >= 0)
      weight += -1 * (argmax_w_low + 1 - argmax_w) *
                im_data[argmax_h_low * data_width + argmax_w_low];
    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
      weight += -1 * (argmax_w - argmax_w_low) *
                im_data[argmax_h_low * data_width + argmax_w_high];
    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
      weight += (argmax_w_low + 1 - argmax_w) *
                im_data[argmax_h_high * data_width + argmax_w_low];
    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
      weight += (argmax_w - argmax_w_low) *
                im_data[argmax_h_high * data_width + argmax_w_high];
  } else if (bp_dir == 1) {
    if (argmax_h_low >= 0 && argmax_w_low >= 0)
      weight += -1 * (argmax_h_low + 1 - argmax_h) *
                im_data[argmax_h_low * data_width + argmax_w_low];
    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
      weight += (argmax_h_low + 1 - argmax_h) *
                im_data[argmax_h_low * data_width + argmax_w_high];
    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
      weight += -1 * (argmax_h - argmax_h_low) *
                im_data[argmax_h_high * data_width + argmax_w_low];
    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
      weight += (argmax_h - argmax_h_low) *
                im_data[argmax_h_high * data_width + argmax_w_high];
  }

  return weight;
}

template <typename T>
__global__ void deformable_im2col_gpu_kernel(
    const int n, const T *data_im, const T *data_offset, const int height,
    const int width, const int kernel_h, const int kernel_w, const int pad_h,
    const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
    const int channel_per_deformable_group, const int batch_size,
    const int num_channels, const int deformable_group, const int height_col,
    const int width_col, T *data_col) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    // index index of output matrix
    const int w_col = index % width_col;
    const int h_col = (index / width_col) % height_col;
    const int b_col = (index / width_col / height_col) % batch_size;
    const int c_im = (index / width_col / height_col) / batch_size;
    const int c_col = c_im * kernel_h * kernel_w;

    // compute deformable group index
    const int deformable_group_index = c_im / channel_per_deformable_group;

    const int h_in = h_col * stride_h - pad_h;
    const int w_in = w_col * stride_w - pad_w;
    T *data_col_ptr =
        data_col +
        ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
    const T *data_im_ptr =
        data_im + (b_col * num_channels + c_im) * height * width;
    const T *data_offset_ptr =
        data_offset + (b_col * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;

    for (int i = 0; i < kernel_h; ++i) {
      for (int j = 0; j < kernel_w; ++j) {
        const int data_offset_h_ptr =
            ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
        const int data_offset_w_ptr =
            ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
            w_col;
        const T offset_h = data_offset_ptr[data_offset_h_ptr];
        const T offset_w = data_offset_ptr[data_offset_w_ptr];
        T val = static_cast<T>(0);
        const T h_im = h_in + i * dilation_h + offset_h;
        const T w_im = w_in + j * dilation_w + offset_w;
        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
          val = deformable_im2col_bilinear(data_im_ptr, width, height, width,
                                           h_im, w_im);
        *data_col_ptr = val;
        data_col_ptr += batch_size * height_col * width_col;
      }
    }
  }
}

template <typename T>
__global__ void deformable_col2im_gpu_kernel(
    const int n, const T *data_col, const T *data_offset, const int channels,
    const int height, const int width, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
    const int channel_per_deformable_group, const int batch_size,
    const int deformable_group, const int height_col, const int width_col,
    T *grad_im) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    const int j = (index / width_col / height_col / batch_size) % kernel_w;
    const int i =
        (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
    const int c =
        index / width_col / height_col / batch_size / kernel_w / kernel_h;
    // compute the start and end of the output

    const int deformable_group_index = c / channel_per_deformable_group;

    int w_out = index % width_col;
    int h_out = (index / width_col) % height_col;
    int b = (index / width_col / height_col) % batch_size;
    int w_in = w_out * stride_w - pad_w;
    int h_in = h_out * stride_h - pad_h;

    const T *data_offset_ptr =
        data_offset + (b * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;
    const int data_offset_h_ptr =
        ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
    const int data_offset_w_ptr =
        ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
    const T offset_h = data_offset_ptr[data_offset_h_ptr];
    const T offset_w = data_offset_ptr[data_offset_w_ptr];
    const T cur_inv_h_data = h_in + i * dilation_h + offset_h;
    const T cur_inv_w_data = w_in + j * dilation_w + offset_w;

    const T cur_top_grad = data_col[index];
    const int cur_h = (int)cur_inv_h_data;
    const int cur_w = (int)cur_inv_w_data;
    for (int dy = -2; dy <= 2; dy++) {
      for (int dx = -2; dx <= 2; dx++) {
        if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
            cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
            abs(cur_inv_w_data - (cur_w + dx)) < 1) {
          int cur_bottom_grad_pos =
              ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
          T weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data,
                                         cur_h + dy, cur_w + dx, height, width);
          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
        }
      }
    }
  }
}

template <typename T>
__global__ void deformable_col2im_coord_gpu_kernel(
    const int n, const T *data_col, const T *data_im, const T *data_offset,
    const int channels, const int height, const int width, const int kernel_h,
    const int kernel_w, const int pad_h, const int pad_w, const int stride_h,
    const int stride_w, const int dilation_h, const int dilation_w,
    const int channel_per_deformable_group, const int batch_size,
    const int offset_channels, const int deformable_group, const int height_col,
    const int width_col, T *grad_offset) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    T val = 0;
    int w = index % width_col;
    int h = (index / width_col) % height_col;
    int c = (index / width_col / height_col) % offset_channels;
    int b = (index / width_col / height_col) / offset_channels;
    // compute the start and end of the output

    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
    const int col_step = kernel_h * kernel_w;
    int cnt = 0;
    const T *data_col_ptr = data_col + deformable_group_index *
                                           channel_per_deformable_group *
                                           batch_size * width_col * height_col;
    const T *data_im_ptr =
        data_im + (b * deformable_group + deformable_group_index) *
                      channel_per_deformable_group / kernel_h / kernel_w *
                      height * width;
    const T *data_offset_ptr =
        data_offset + (b * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;

    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;

    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
         col_c += col_step) {
      const int col_pos =
          (((col_c * batch_size + b) * height_col) + h) * width_col + w;
      const int bp_dir = offset_c % 2;

      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
      int i =
          (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
      int w_out = col_pos % width_col;
      int h_out = (col_pos / width_col) % height_col;
      int w_in = w_out * stride_w - pad_w;
      int h_in = h_out * stride_h - pad_h;
      const int data_offset_h_ptr =
          (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
      const int data_offset_w_ptr =
          (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
           w_out);
      const T offset_h = data_offset_ptr[data_offset_h_ptr];
      const T offset_w = data_offset_ptr[data_offset_w_ptr];
      T inv_h = h_in + i * dilation_h + offset_h;
      T inv_w = w_in + j * dilation_w + offset_w;
      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
        inv_h = inv_w = -2;
      const T weight = get_coordinate_weight(inv_h, inv_w, height, width,
                                             data_im_ptr + cnt * height * width,
                                             width, bp_dir);
      val += weight * data_col_ptr[col_pos];
      cnt += 1;
    }

    grad_offset[index] = val;
  }
}

#endif  // DEFORM_CONV_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef DEFORM_ROI_POOL_CUDA_KERNEL_CUH
#define DEFORM_ROI_POOL_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void deform_roi_pool_forward_cuda_kernel(
    const int nthreads, const T* input, const T* rois, const T* offset,
    T* output, const int pooled_height, const int pooled_width,
    const T spatial_scale, const int sampling_ratio, const T gamma,
    const int channels, const int height, const int width) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    const T* offset_rois = rois + n * 5;
    int roi_batch_ind = offset_rois[0];

    // Do not using rounding; this implementation detail is critical
    T roi_start_w = offset_rois[1] * spatial_scale - 0.5;
    T roi_start_h = offset_rois[2] * spatial_scale - 0.5;
    T roi_end_w = offset_rois[3] * spatial_scale - 0.5;
    T roi_end_h = offset_rois[4] * spatial_scale - 0.5;

    T roi_width = roi_end_w - roi_start_w;
    T roi_height = roi_end_h - roi_start_h;

    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

    const T* offset_input =
        input + (roi_batch_ind * channels + c) * height * width;

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h =
        (sampling_ratio > 0)
            ? sampling_ratio
            : static_cast<int>(ceilf(roi_height / pooled_height));
    int roi_bin_grid_w =
        (sampling_ratio > 0)
            ? sampling_ratio
            : static_cast<int>(ceilf(roi_width / pooled_width));

    // Compute roi offset
    if (offset != NULL) {
      const T* offset_cur_w = offset + n * pooled_width * pooled_height * 2 +
                              ph * pooled_width + pw;
      T offset_roi_w = gamma * roi_width * offset_cur_w[0];
      T offset_roi_h =
          gamma * roi_height * offset_cur_w[pooled_width * pooled_height];
      roi_start_w += offset_roi_w;
      roi_start_h += offset_roi_h;
    }

    // We do average pooling inside a bin
    const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1);
    T output_val = 0.;
    for (int iy = 0; iy < roi_bin_grid_h; iy++) {
      const T y = roi_start_h + ph * bin_size_h +
                  static_cast<T>(iy + .5f) * bin_size_h /
                      static_cast<T>(roi_bin_grid_h);
      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
        const T x = roi_start_w + pw * bin_size_w +
                    static_cast<T>(ix + .5f) * bin_size_w /
                        static_cast<T>(roi_bin_grid_w);
        T val = bilinear_interpolate(offset_input, height, width, y, x, index);
        output_val += val;
      }
    }
    output[index] = output_val / count;
  }
}

template <typename T>
__global__ void deform_roi_pool_backward_cuda_kernel(
    const int nthreads, const T* grad_output, const T* input, const T* rois,
    const T* offset, T* grad_input, T* grad_offset, const int pooled_height,
    const int pooled_width, const T spatial_scale, const int sampling_ratio,
    const T gamma, const int channels, const int height, const int width) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    const T* offset_rois = rois + n * 5;
    int roi_batch_ind = offset_rois[0];
    const T* offset_input =
        input + ((roi_batch_ind * channels + c) * height * width);
    T* offset_grad_input =
        grad_input + ((roi_batch_ind * channels + c) * height * width);

    // Do not using rounding; this implementation detail is critical
    T roi_start_w = offset_rois[1] * spatial_scale - 0.5;
    T roi_start_h = offset_rois[2] * spatial_scale - 0.5;
    T roi_end_w = offset_rois[3] * spatial_scale - 0.5;
    T roi_end_h = offset_rois[4] * spatial_scale - 0.5;

    T roi_width = roi_end_w - roi_start_w;
    T roi_height = roi_end_h - roi_start_h;

    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h =
        (sampling_ratio > 0)
            ? sampling_ratio
            : static_cast<int>(ceilf(roi_height / pooled_height));
    int roi_bin_grid_w =
        (sampling_ratio > 0)
            ? sampling_ratio
            : static_cast<int>(ceilf(roi_width / pooled_width));

    // Compute roi offset
    if (offset != NULL) {
      const T* offset_cur_w = offset + n * pooled_width * pooled_height * 2 +
                              ph * pooled_width + pw;
      T offset_roi_w = gamma * roi_width * offset_cur_w[0];
      T offset_roi_h =
          gamma * roi_height * offset_cur_w[pooled_width * pooled_height];
      roi_start_w += offset_roi_w;
      roi_start_h += offset_roi_h;
    }

    // We do average (integral) pooling inside a bin
    const T count = roi_bin_grid_h * roi_bin_grid_w;  // e.g. = 4
    const T grad_output_this_bin = grad_output[index] / count;

    for (int iy = 0; iy < roi_bin_grid_h; iy++) {
      const T y = roi_start_h + ph * bin_size_h +
                  static_cast<T>(iy + .5f) * bin_size_h /
                      static_cast<T>(roi_bin_grid_h);
      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
        const T x = roi_start_w + pw * bin_size_w +
                    static_cast<T>(ix + .5f) * bin_size_w /
                        static_cast<T>(roi_bin_grid_w);

        T w1, w2, w3, w4;
        int x_low, x_high, y_low, y_high;
        bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
                                      x_low, x_high, y_low, y_high, index);

        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
          atomicAdd(offset_grad_input + y_low * width + x_low,
                    grad_output_this_bin * w1);
          atomicAdd(offset_grad_input + y_low * width + x_high,
                    grad_output_this_bin * w2);
          atomicAdd(offset_grad_input + y_high * width + x_low,
                    grad_output_this_bin * w3);
          atomicAdd(offset_grad_input + y_high * width + x_high,
                    grad_output_this_bin * w4);
          if (offset != NULL) {
            T input_00 = offset_input[y_low * width + x_low];
            T input_10 = offset_input[y_low * width + x_high];
            T input_01 = offset_input[y_high * width + x_low];
            T input_11 = offset_input[y_high * width + x_high];
            T ogx = gamma * roi_width * grad_output_this_bin *
                    (input_11 * (y - y_low) + input_10 * (y_high - y) +
                     input_01 * (y_low - y) + input_00 * (y - y_high));
            T ogy = gamma * roi_height * grad_output_this_bin *
                    (input_11 * (x - x_low) + input_01 * (x_high - x) +
                     input_10 * (x_low - x) + input_00 * (x - x_high));
            atomicAdd(grad_offset + n * pooled_width * pooled_height * 2 +
                          ph * pooled_width + pw,
                      ogx);
            atomicAdd(grad_offset + n * pooled_width * pooled_height * 2 +
                          pooled_width * pooled_height + ph * pooled_width + pw,
                      ogy);
          }
        }
      }
    }
  }
}

#endif  // DEFORM_ROI_POOL_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH
#define FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
                         int idx1, int idx2) {
  const float v1 = dists[idx1], v2 = dists[idx2];
  const int i1 = dists_i[idx1], i2 = dists_i[idx2];
  dists[idx1] = max(v1, v2);
  dists_i[idx1] = v2 > v1 ? i2 : i1;
}

template <unsigned int block_size>
__global__ void furthest_point_sampling_forward_cuda_kernel(
    int b, int n, int m, const float *__restrict__ dataset,
    float *__restrict__ temp, int *__restrict__ idxs) {
  // dataset: (B, N, 3)
  // tmp: (B, N)
  // output:
  //      idx: (B, M)

  if (m <= 0) return;
  __shared__ float dists[block_size];
  __shared__ int dists_i[block_size];

  int batch_index = blockIdx.x;
  dataset += batch_index * n * 3;
  temp += batch_index * n;
  idxs += batch_index * m;

  int tid = threadIdx.x;
  const int stride = block_size;

  int old = 0;
  if (threadIdx.x == 0) idxs[0] = old;

  __syncthreads();
  for (int j = 1; j < m; j++) {
    int besti = 0;
    float best = -1;
    float x1 = dataset[old * 3 + 0];
    float y1 = dataset[old * 3 + 1];
    float z1 = dataset[old * 3 + 2];
    for (int k = tid; k < n; k += stride) {
      float x2, y2, z2;
      x2 = dataset[k * 3 + 0];
      y2 = dataset[k * 3 + 1];
      z2 = dataset[k * 3 + 2];
      // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
      // if (mag <= 1e-3)
      // continue;

      float d =
          (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
      float d2 = min(d, temp[k]);
      temp[k] = d2;
      besti = d2 > best ? k : besti;
      best = d2 > best ? d2 : best;
    }
    dists[tid] = best;
    dists_i[tid] = besti;
    __syncthreads();

#pragma unroll
    for (int block_size_thres = 1024; block_size_thres >= 2;
         block_size_thres >>= 1) {
      const int tid_thres = block_size_thres / 2;
      if (block_size >= block_size_thres && tid < tid_thres) {
        __update(dists, dists_i, tid, tid + tid_thres);
      }
      __syncthreads();
    }

    old = dists_i[0];
    if (tid == 0) idxs[j] = old;
  }
}

// Modified from
// https://github.com/qiqihaer/3DSSD-pytorch/blob/master/lib/pointnet2/src/sampling_gpu.cu
template <unsigned int block_size>
__global__ void furthest_point_sampling_with_dist_forward_cuda_kernel(
    int b, int n, int m, const float *__restrict__ dataset,
    float *__restrict__ temp, int *__restrict__ idxs) {
  // dataset: (B, N, N)
  // tmp: (B, N)
  // output:
  //      idx: (B, M)

  if (m <= 0) return;
  __shared__ float dists[block_size];
  __shared__ int dists_i[block_size];

  int batch_index = blockIdx.x;
  dataset += batch_index * n * n;
  temp += batch_index * n;
  idxs += batch_index * m;

  int tid = threadIdx.x;
  const int stride = block_size;

  int old = 0;
  if (threadIdx.x == 0) idxs[0] = old;

  __syncthreads();
  for (int j = 1; j < m; j++) {
    int besti = 0;
    float best = -1;
    // float x1 = dataset[old * 3 + 0];
    // float y1 = dataset[old * 3 + 1];
    // float z1 = dataset[old * 3 + 2];
    for (int k = tid; k < n; k += stride) {
      // float x2, y2, z2;
      // x2 = dataset[k * 3 + 0];
      // y2 = dataset[k * 3 + 1];
      // z2 = dataset[k * 3 + 2];

      // float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) *
      // (z2 - z1);
      float d = dataset[old * n + k];

      float d2 = min(d, temp[k]);
      temp[k] = d2;
      besti = d2 > best ? k : besti;
      best = d2 > best ? d2 : best;
    }
    dists[tid] = best;
    dists_i[tid] = besti;
    __syncthreads();

#pragma unroll
    for (int block_size_thres = 1024; block_size_thres >= 2;
         block_size_thres >>= 1) {
      const int tid_thres = block_size_thres / 2;
      if (block_size >= block_size_thres && tid < tid_thres) {
        __update(dists, dists_i, tid, tid + tid_thres);
      }
      __syncthreads();
    }

    old = dists_i[0];
    if (tid == 0) idxs[j] = old;
  }
}

#endif  // FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef GATHER_POINTS_CUDA_KERNEL_CUH
#define GATHER_POINTS_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

#define TOTAL_THREADS 1024

template <typename T>
__global__ void gather_points_forward_cuda_kernel(int b, int c, int n, int m,
                                                  const T *points,
                                                  const int *__restrict__ idx,
                                                  T *out) {
  // points: (B, C, N)
  // idx: (B, M)
  // output:
  //      out: (B, C, M)

  int bs_idx = blockIdx.z;
  int c_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(pt_idx, m) {
    if (bs_idx >= b || c_idx >= c) return;

    out += bs_idx * c * m + c_idx * m + pt_idx;
    idx += bs_idx * m + pt_idx;
    points += bs_idx * c * n + c_idx * n;
    out[0] = points[idx[0]];
  }
}

template <typename T>
__global__ void gather_points_backward_cuda_kernel(int b, int c, int n, int m,
                                                   const T *grad_out,
                                                   const int *__restrict__ idx,
                                                   T *grad_points) {
  // grad_out: (B, C, M)
  // idx: (B, M)
  // output:
  //      grad_points: (B, C, N)

  int bs_idx = blockIdx.z;
  int c_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(pt_idx, m) {
    if (bs_idx >= b || c_idx >= c) return;

    grad_out += bs_idx * c * m + c_idx * m + pt_idx;
    idx += bs_idx * m + pt_idx;
    grad_points += bs_idx * c * n + c_idx * n;

    atomicAdd(grad_points + idx[0], grad_out[0]);
  }
}

#endif  // GATHER_POINTS_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points_gpu.cu
#ifndef GROUP_POINTS_CUDA_KERNEL_CUH
#define GROUP_POINTS_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void group_points_forward_cuda_kernel(int b, int c, int n,
                                                 int npoints, int nsample,
                                                 const T *points,
                                                 const int *__restrict__ idx,
                                                 T *out) {
  // points: (B, C, N)
  // idx: (B, npoints, nsample)
  // output:
  //      out: (B, C, npoints, nsample)
  int bs_idx = blockIdx.z;
  int c_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(index, npoints * nsample) {
    if (bs_idx >= b || c_idx >= c) return;

    int pt_idx = index / nsample;
    int sample_idx = index % nsample;

    idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
    int in_idx = bs_idx * c * n + c_idx * n + idx[0];
    int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample +
                  pt_idx * nsample + sample_idx;

    out[out_idx] = points[in_idx];
  }
}

template <typename T>
__global__ void group_points_backward_cuda_kernel(int b, int c, int n,
                                                  int npoints, int nsample,
                                                  const T *grad_out,
                                                  const int *__restrict__ idx,
                                                  T *grad_points) {
  // grad_out: (B, C, npoints, nsample)
  // idx: (B, npoints, nsample)
  // output:
  //      grad_points: (B, C, N)
  int bs_idx = blockIdx.z;
  int c_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(index, npoints * nsample) {
    int pt_idx = index / nsample;
    if (bs_idx >= b || c_idx >= c) return;

    int sample_idx = index % nsample;
    grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample +
                pt_idx * nsample + sample_idx;
    idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;

    atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0], grad_out[0]);
  }
}

#endif  // GROUP_POINTS_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef IOU3D_CUDA_KERNEL_CUH
#define IOU3D_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

const int THREADS_PER_BLOCK_IOU3D = 16;
const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
__device__ const float EPS = 1e-8;

struct Point {
  float x, y;
  __device__ Point() {}
  __device__ Point(double _x, double _y) { x = _x, y = _y; }

  __device__ void set(float _x, float _y) {
    x = _x;
    y = _y;
  }

  __device__ Point operator+(const Point &b) const {
    return Point(x + b.x, y + b.y);
  }

  __device__ Point operator-(const Point &b) const {
    return Point(x - b.x, y - b.y);
  }
};

__device__ inline float cross(const Point &a, const Point &b) {
  return a.x * b.y - a.y * b.x;
}

__device__ inline float cross(const Point &p1, const Point &p2,
                              const Point &p0) {
  return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y);
}

__device__ int check_rect_cross(const Point &p1, const Point &p2,
                                const Point &q1, const Point &q2) {
  int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) &&
            min(q1.x, q2.x) <= max(p1.x, p2.x) &&
            min(p1.y, p2.y) <= max(q1.y, q2.y) &&
            min(q1.y, q2.y) <= max(p1.y, p2.y);
  return ret;
}

__device__ inline int check_in_box2d(const float *box, const Point &p) {
  // params: box (5) [x1, y1, x2, y2, angle]
  const float MARGIN = 1e-5;

  float center_x = (box[0] + box[2]) / 2;
  float center_y = (box[1] + box[3]) / 2;
  float angle_cos = cos(-box[4]),
        angle_sin =
            sin(-box[4]);  // rotate the point in the opposite direction of box
  float rot_x =
      (p.x - center_x) * angle_cos - (p.y - center_y) * angle_sin + center_x;
  float rot_y =
      (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + center_y;

  return (rot_x > box[0] - MARGIN && rot_x < box[2] + MARGIN &&
          rot_y > box[1] - MARGIN && rot_y < box[3] + MARGIN);
}

__device__ inline int intersection(const Point &p1, const Point &p0,
                                   const Point &q1, const Point &q0,
                                   Point &ans_point) {
  // fast exclusion
  if (check_rect_cross(p0, p1, q0, q1) == 0) return 0;

  // check cross standing
  float s1 = cross(q0, p1, p0);
  float s2 = cross(p1, q1, p0);
  float s3 = cross(p0, q1, q0);
  float s4 = cross(q1, p1, q0);

  if (!(s1 * s2 > 0 && s3 * s4 > 0)) return 0;

  // calculate intersection of two lines
  float s5 = cross(q1, p1, p0);
  if (fabs(s5 - s1) > EPS) {
    ans_point.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);
    ans_point.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);

  } else {
    float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y;
    float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y;
    float D = a0 * b1 - a1 * b0;

    ans_point.x = (b0 * c1 - b1 * c0) / D;
    ans_point.y = (a1 * c0 - a0 * c1) / D;
  }

  return 1;
}

__device__ inline void rotate_around_center(const Point &center,
                                            const float angle_cos,
                                            const float angle_sin, Point &p) {
  float new_x =
      (p.x - center.x) * angle_cos - (p.y - center.y) * angle_sin + center.x;
  float new_y =
      (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;
  p.set(new_x, new_y);
}

__device__ inline int point_cmp(const Point &a, const Point &b,
                                const Point &center) {
  return atan2(a.y - center.y, a.x - center.x) >
         atan2(b.y - center.y, b.x - center.x);
}

__device__ inline float box_overlap(const float *box_a, const float *box_b) {
  // params: box_a (5) [x1, y1, x2, y2, angle]
  // params: box_b (5) [x1, y1, x2, y2, angle]

  float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 = box_a[3],
        a_angle = box_a[4];
  float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 = box_b[3],
        b_angle = box_b[4];

  Point center_a((a_x1 + a_x2) / 2, (a_y1 + a_y2) / 2);
  Point center_b((b_x1 + b_x2) / 2, (b_y1 + b_y2) / 2);

  Point box_a_corners[5];
  box_a_corners[0].set(a_x1, a_y1);
  box_a_corners[1].set(a_x2, a_y1);
  box_a_corners[2].set(a_x2, a_y2);
  box_a_corners[3].set(a_x1, a_y2);

  Point box_b_corners[5];
  box_b_corners[0].set(b_x1, b_y1);
  box_b_corners[1].set(b_x2, b_y1);
  box_b_corners[2].set(b_x2, b_y2);
  box_b_corners[3].set(b_x1, b_y2);

  // get oriented corners
  float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle);
  float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle);

  for (int k = 0; k < 4; k++) {
    rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
    rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
  }

  box_a_corners[4] = box_a_corners[0];
  box_b_corners[4] = box_b_corners[0];

  // get intersection of lines
  Point cross_points[16];
  Point poly_center;
  int cnt = 0, flag = 0;

  poly_center.set(0, 0);
  for (int i = 0; i < 4; i++) {
    for (int j = 0; j < 4; j++) {
      flag = intersection(box_a_corners[i + 1], box_a_corners[i],
                          box_b_corners[j + 1], box_b_corners[j],
                          cross_points[cnt]);
      if (flag) {
        poly_center = poly_center + cross_points[cnt];
        cnt++;
      }
    }
  }

  // check corners
  for (int k = 0; k < 4; k++) {
    if (check_in_box2d(box_a, box_b_corners[k])) {
      poly_center = poly_center + box_b_corners[k];
      cross_points[cnt] = box_b_corners[k];
      cnt++;
    }
    if (check_in_box2d(box_b, box_a_corners[k])) {
      poly_center = poly_center + box_a_corners[k];
      cross_points[cnt] = box_a_corners[k];
      cnt++;
    }
  }

  poly_center.x /= cnt;
  poly_center.y /= cnt;

  // sort the points of polygon
  Point temp;
  for (int j = 0; j < cnt - 1; j++) {
    for (int i = 0; i < cnt - j - 1; i++) {
      if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) {
        temp = cross_points[i];
        cross_points[i] = cross_points[i + 1];
        cross_points[i + 1] = temp;
      }
    }
  }

  // get the overlap areas
  float area = 0;
  for (int k = 0; k < cnt - 1; k++) {
    area += cross(cross_points[k] - cross_points[0],
                  cross_points[k + 1] - cross_points[0]);
  }

  return fabs(area) / 2.0;
}

__device__ inline float iou_bev(const float *box_a, const float *box_b) {
  // params: box_a (5) [x1, y1, x2, y2, angle]
  // params: box_b (5) [x1, y1, x2, y2, angle]
  float sa = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1]);
  float sb = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1]);
  float s_overlap = box_overlap(box_a, box_b);
  return s_overlap / fmaxf(sa + sb - s_overlap, EPS);
}

__global__ void iou3d_boxes_overlap_bev_forward_cuda_kernel(
    const int num_a, const float *boxes_a, const int num_b,
    const float *boxes_b, float *ans_overlap) {
  CUDA_2D_KERNEL_LOOP(b_idx, num_b, a_idx, num_a) {
    if (a_idx >= num_a || b_idx >= num_b) {
      return;
    }
    const float *cur_box_a = boxes_a + a_idx * 5;
    const float *cur_box_b = boxes_b + b_idx * 5;
    float s_overlap = box_overlap(cur_box_a, cur_box_b);
    ans_overlap[a_idx * num_b + b_idx] = s_overlap;
  }
}

__global__ void iou3d_boxes_iou_bev_forward_cuda_kernel(const int num_a,
                                                        const float *boxes_a,
                                                        const int num_b,
                                                        const float *boxes_b,
                                                        float *ans_iou) {
  CUDA_2D_KERNEL_LOOP(b_idx, num_b, a_idx, num_a) {
    if (a_idx >= num_a || b_idx >= num_b) {
      return;
    }

    const float *cur_box_a = boxes_a + a_idx * 5;
    const float *cur_box_b = boxes_b + b_idx * 5;
    float cur_iou_bev = iou_bev(cur_box_a, cur_box_b);
    ans_iou[a_idx * num_b + b_idx] = cur_iou_bev;
  }
}

__global__ void nms_forward_cuda_kernel(const int boxes_num,
                                        const float nms_overlap_thresh,
                                        const float *boxes,
                                        unsigned long long *mask) {
  // params: boxes (N, 5) [x1, y1, x2, y2, ry]
  // params: mask (N, N/THREADS_PER_BLOCK_NMS)
  const int blocks =
      (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;
  CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) {
    // if (row_start > col_start) return;

    const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS,
                               THREADS_PER_BLOCK_NMS);
    const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,
                               THREADS_PER_BLOCK_NMS);

    __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5];

    if (threadIdx.x < col_size) {
      block_boxes[threadIdx.x * 5 + 0] =
          boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0];
      block_boxes[threadIdx.x * 5 + 1] =
          boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1];
      block_boxes[threadIdx.x * 5 + 2] =
          boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2];
      block_boxes[threadIdx.x * 5 + 3] =
          boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3];
      block_boxes[threadIdx.x * 5 + 4] =
          boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4];
    }
    __syncthreads();

    if (threadIdx.x < row_size) {
      const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
      const float *cur_box = boxes + cur_box_idx * 5;

      int i = 0;
      unsigned long long t = 0;
      int start = 0;
      if (row_start == col_start) {
        start = threadIdx.x + 1;
      }
      for (i = start; i < col_size; i++) {
        if (iou_bev(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
          t |= 1ULL << i;
        }
      }
      const int col_blocks =
          (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;
      mask[cur_box_idx * col_blocks + col_start] = t;
    }
  }
}

__device__ inline float iou_normal(float const *const a, float const *const b) {
  float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
  float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
  float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f);
  float interS = width * height;
  float Sa = (a[2] - a[0]) * (a[3] - a[1]);
  float Sb = (b[2] - b[0]) * (b[3] - b[1]);
  return interS / fmaxf(Sa + Sb - interS, EPS);
}

__global__ void nms_normal_forward_cuda_kernel(const int boxes_num,
                                               const float nms_overlap_thresh,
                                               const float *boxes,
                                               unsigned long long *mask) {
  // params: boxes (N, 5) [x1, y1, x2, y2, ry]
  // params: mask (N, N/THREADS_PER_BLOCK_NMS)

  const int blocks =
      (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;
  CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) {
    // if (row_start > col_start) return;

    const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS,
                               THREADS_PER_BLOCK_NMS);
    const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,
                               THREADS_PER_BLOCK_NMS);

    __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5];

    if (threadIdx.x < col_size) {
      block_boxes[threadIdx.x * 5 + 0] =
          boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0];
      block_boxes[threadIdx.x * 5 + 1] =
          boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1];
      block_boxes[threadIdx.x * 5 + 2] =
          boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2];
      block_boxes[threadIdx.x * 5 + 3] =
          boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3];
      block_boxes[threadIdx.x * 5 + 4] =
          boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4];
    }
    __syncthreads();

    if (threadIdx.x < row_size) {
      const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
      const float *cur_box = boxes + cur_box_idx * 5;

      int i = 0;
      unsigned long long t = 0;
      int start = 0;
      if (row_start == col_start) {
        start = threadIdx.x + 1;
      }
      for (i = start; i < col_size; i++) {
        if (iou_normal(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
          t |= 1ULL << i;
        }
      }
      const int col_blocks =
          (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;
      mask[cur_box_idx * col_blocks + col_start] = t;
    }
  }
}

#endif  // IOU3D_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap
#ifndef KNN_CUDA_KERNEL_CUH
#define KNN_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

inline __device__ void swap_float(float *x, float *y) {
  float tmp = *x;
  *x = *y;
  *y = tmp;
}

inline __device__ void swap_int(int *x, int *y) {
  int tmp = *x;
  *x = *y;
  *y = tmp;
}

__device__ void reheap(float *dist, int *idx, int k) {
  int root = 0;
  int child = root * 2 + 1;
  while (child < k) {
    if (child + 1 < k && dist[child + 1] > dist[child]) child++;
    if (dist[root] > dist[child]) return;
    swap_float(&dist[root], &dist[child]);
    swap_int(&idx[root], &idx[child]);
    root = child;
    child = root * 2 + 1;
  }
}

__device__ void heap_sort(float *dist, int *idx, int k) {
  int i;
  for (i = k - 1; i > 0; i--) {
    swap_float(&dist[0], &dist[i]);
    swap_int(&idx[0], &idx[i]);
    reheap(dist, idx, i);
  }
}

// input: xyz (b, n, 3) new_xyz (b, m, 3)
// output: idx (b, m, nsample) dist2 (b, m, nsample)
template <typename T>
__global__ void knn_forward_cuda_kernel(int b, int n, int m, int nsample,
                                        const T *xyz, const T *new_xyz,
                                        int *__restrict__ idx, T *dist2) {
  int bs_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(pt_idx, m) {
    if (bs_idx >= b) return;

    new_xyz += bs_idx * m * 3 + pt_idx * 3;
    xyz += bs_idx * n * 3;
    idx += bs_idx * m * nsample + pt_idx * nsample;
    dist2 += bs_idx * m * nsample + pt_idx * nsample;

    T new_x = new_xyz[0];
    T new_y = new_xyz[1];
    T new_z = new_xyz[2];

    float best_dist[100];
    int best_idx[100];
    for (int i = 0; i < nsample; i++) {
      best_dist[i] = 1e10;
      best_idx[i] = 0;
    }
    for (int i = 0; i < n; i++) {
      T x = xyz[i * 3 + 0];
      T y = xyz[i * 3 + 1];
      T z = xyz[i * 3 + 2];
      T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
             (new_z - z) * (new_z - z);
      if (d2 < best_dist[0]) {
        best_dist[0] = d2;
        best_idx[0] = i;
        reheap(best_dist, best_idx, nsample);
      }
    }
    heap_sort(best_dist, best_idx, nsample);
    for (int i = 0; i < nsample; i++) {
      idx[i] = best_idx[i];
      dist2[i] = best_dist[i];
    }
  }
}

#endif  // KNN_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef MASKED_CONV2D_CUDA_KERNEL_CUH
#define MASKED_CONV2D_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename scalar_t>
__global__ void MaskedIm2colForward(const int n, const scalar_t *data_im,
                                    const int height, const int width,
                                    const int kernel_h, const int kernel_w,
                                    const int pad_h, const int pad_w,
                                    const int64_t *mask_h_idx,
                                    const int64_t *mask_w_idx,
                                    const int mask_cnt, scalar_t *data_col) {
  // mask_cnt * channels
  CUDA_1D_KERNEL_LOOP(index, n) {
    const int m_index = index % mask_cnt;
    const int h_col = mask_h_idx[m_index];
    const int w_col = mask_w_idx[m_index];
    const int c_im = index / mask_cnt;
    const int c_col = c_im * kernel_h * kernel_w;
    const int h_offset = h_col - pad_h;
    const int w_offset = w_col - pad_w;
    scalar_t *data_col_ptr = data_col + c_col * mask_cnt + m_index;
    for (int i = 0; i < kernel_h; ++i) {
      int h_im = h_offset + i;
      for (int j = 0; j < kernel_w; ++j) {
        int w_im = w_offset + j;
        if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
          *data_col_ptr =
              (scalar_t)data_im[(c_im * height + h_im) * width + w_im];
        } else {
          *data_col_ptr = 0.0;
        }
        data_col_ptr += mask_cnt;
      }
    }
  }
}

template <typename scalar_t>
__global__ void MaskedCol2imForward(const int n, const scalar_t *data_col,
                                    const int height, const int width,
                                    const int channels,
                                    const int64_t *mask_h_idx,
                                    const int64_t *mask_w_idx,
                                    const int mask_cnt, scalar_t *data_im) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    const int m_index = index % mask_cnt;
    const int h_im = mask_h_idx[m_index];
    const int w_im = mask_w_idx[m_index];
    const int c_im = index / mask_cnt;
    // compute the start and end of the output
    data_im[(c_im * height + h_im) * width + w_im] = data_col[index];
  }
}

#endif  // MASKED_CONV2D_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef MIN_AREA_POLYGONS_CUDA_KERNEL_CUH
#define MIN_AREA_POLYGONS_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

#define MAXN 20
__device__ const float PI = 3.1415926;

struct Point {
  float x, y;
  __device__ Point() {}
  __device__ Point(float x, float y) : x(x), y(y) {}
};

__device__ inline void swap1(Point *a, Point *b) {
  Point temp;
  temp.x = a->x;
  temp.y = a->y;

  a->x = b->x;
  a->y = b->y;

  b->x = temp.x;
  b->y = temp.y;
}
__device__ inline float cross(Point o, Point a, Point b) {
  return (a.x - o.x) * (b.y - o.y) - (b.x - o.x) * (a.y - o.y);
}

__device__ inline float dis(Point a, Point b) {
  return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
}
__device__ inline void minBoundingRect(Point *ps, int n_points, float *minbox) {
  float convex_points[2][MAXN];
  for (int j = 0; j < n_points; j++) {
    convex_points[0][j] = ps[j].x;
  }
  for (int j = 0; j < n_points; j++) {
    convex_points[1][j] = ps[j].y;
  }

  Point edges[MAXN];
  float edges_angles[MAXN];
  float unique_angles[MAXN];
  int n_edges = n_points - 1;
  int n_unique = 0;
  int unique_flag = 0;

  for (int i = 0; i < n_edges; i++) {
    edges[i].x = ps[i + 1].x - ps[i].x;
    edges[i].y = ps[i + 1].y - ps[i].y;
  }
  for (int i = 0; i < n_edges; i++) {
    edges_angles[i] = atan2((double)edges[i].y, (double)edges[i].x);
    if (edges_angles[i] >= 0) {
      edges_angles[i] = fmod((double)edges_angles[i], (double)PI / 2);
    } else {
      edges_angles[i] =
          edges_angles[i] - (int)(edges_angles[i] / (PI / 2) - 1) * (PI / 2);
    }
  }
  unique_angles[0] = edges_angles[0];
  n_unique += 1;
  for (int i = 1; i < n_edges; i++) {
    for (int j = 0; j < n_unique; j++) {
      if (edges_angles[i] == unique_angles[j]) {
        unique_flag += 1;
      }
    }
    if (unique_flag == 0) {
      unique_angles[n_unique] = edges_angles[i];
      n_unique += 1;
      unique_flag = 0;
    } else {
      unique_flag = 0;
    }
  }

  float minarea = 1e12;
  for (int i = 0; i < n_unique; i++) {
    float R[2][2];
    float rot_points[2][MAXN];
    R[0][0] = cos(unique_angles[i]);
    R[0][1] = sin(unique_angles[i]);
    R[1][0] = -sin(unique_angles[i]);
    R[1][1] = cos(unique_angles[i]);
    // R x Points
    for (int m = 0; m < 2; m++) {
      for (int n = 0; n < n_points; n++) {
        float sum = 0.0;
        for (int k = 0; k < 2; k++) {
          sum = sum + R[m][k] * convex_points[k][n];
        }
        rot_points[m][n] = sum;
      }
    }

    // xmin;
    float xmin, ymin, xmax, ymax;
    xmin = 1e12;
    for (int j = 0; j < n_points; j++) {
      if (isinf(rot_points[0][j]) || isnan(rot_points[0][j])) {
        continue;
      } else {
        if (rot_points[0][j] < xmin) {
          xmin = rot_points[0][j];
        }
      }
    }
    // ymin
    ymin = 1e12;
    for (int j = 0; j < n_points; j++) {
      if (isinf(rot_points[1][j]) || isnan(rot_points[1][j])) {
        continue;
      } else {
        if (rot_points[1][j] < ymin) {
          ymin = rot_points[1][j];
        }
      }
    }
    // xmax
    xmax = -1e12;
    for (int j = 0; j < n_points; j++) {
      if (isinf(rot_points[0][j]) || isnan(rot_points[0][j])) {
        continue;
      } else {
        if (rot_points[0][j] > xmax) {
          xmax = rot_points[0][j];
        }
      }
    }
    // ymax
    ymax = -1e12;
    for (int j = 0; j < n_points; j++) {
      if (isinf(rot_points[1][j]) || isnan(rot_points[1][j])) {
        continue;
      } else {
        if (rot_points[1][j] > ymax) {
          ymax = rot_points[1][j];
        }
      }
    }
    float area = (xmax - xmin) * (ymax - ymin);
    if (area < minarea) {
      minarea = area;
      minbox[0] = unique_angles[i];
      minbox[1] = xmin;
      minbox[2] = ymin;
      minbox[3] = xmax;
      minbox[4] = ymax;
    }
  }
}

// convex_find
__device__ inline void Jarvis(Point *in_poly, int &n_poly) {
  int n_input = n_poly;
  Point input_poly[20];
  for (int i = 0; i < n_input; i++) {
    input_poly[i].x = in_poly[i].x;
    input_poly[i].y = in_poly[i].y;
  }
  Point p_max, p_k;
  int max_index, k_index;
  int Stack[20], top1, top2;
  // float sign;
  double sign;
  Point right_point[10], left_point[10];

  for (int i = 0; i < n_poly; i++) {
    if (in_poly[i].y < in_poly[0].y ||
        in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) {
      Point *j = &(in_poly[0]);
      Point *k = &(in_poly[i]);
      swap1(j, k);
    }
    if (i == 0) {
      p_max = in_poly[0];
      max_index = 0;
    }
    if (in_poly[i].y > p_max.y ||
        in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) {
      p_max = in_poly[i];
      max_index = i;
    }
  }
  if (max_index == 0) {
    max_index = 1;
    p_max = in_poly[max_index];
  }

  k_index = 0, Stack[0] = 0, top1 = 0;
  while (k_index != max_index) {
    p_k = p_max;
    k_index = max_index;
    for (int i = 1; i < n_poly; i++) {
      sign = cross(in_poly[Stack[top1]], in_poly[i], p_k);
      if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) >
                                         dis(in_poly[Stack[top1]], p_k)))) {
        p_k = in_poly[i];
        k_index = i;
      }
    }
    top1++;
    Stack[top1] = k_index;
  }

  for (int i = 0; i <= top1; i++) {
    right_point[i] = in_poly[Stack[i]];
  }

  k_index = 0, Stack[0] = 0, top2 = 0;

  while (k_index != max_index) {
    p_k = p_max;
    k_index = max_index;
    for (int i = 1; i < n_poly; i++) {
      sign = cross(in_poly[Stack[top2]], in_poly[i], p_k);
      if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) >
                                        dis(in_poly[Stack[top2]], p_k))) {
        p_k = in_poly[i];
        k_index = i;
      }
    }
    top2++;
    Stack[top2] = k_index;
  }

  for (int i = top2 - 1; i >= 0; i--) {
    left_point[i] = in_poly[Stack[i]];
  }

  for (int i = 0; i < top1 + top2; i++) {
    if (i <= top1) {
      in_poly[i] = right_point[i];
    } else {
      in_poly[i] = left_point[top2 - (i - top1)];
    }
  }
  n_poly = top1 + top2;
}

template <typename T>
__device__ inline void Findminbox(T const *const p, T *minpoints) {
  Point ps1[MAXN];
  Point convex[MAXN];
  for (int i = 0; i < 9; i++) {
    convex[i].x = p[i * 2];
    convex[i].y = p[i * 2 + 1];
  }
  int n_convex = 9;
  Jarvis(convex, n_convex);
  int n1 = n_convex;
  for (int i = 0; i < n1; i++) {
    ps1[i].x = convex[i].x;
    ps1[i].y = convex[i].y;
  }
  ps1[n1].x = convex[0].x;
  ps1[n1].y = convex[0].y;

  float minbbox[5] = {0};
  minBoundingRect(ps1, n1 + 1, minbbox);
  float angle = minbbox[0];
  float xmin = minbbox[1];
  float ymin = minbbox[2];
  float xmax = minbbox[3];
  float ymax = minbbox[4];
  float R[2][2];

  R[0][0] = cos(angle);
  R[0][1] = sin(angle);
  R[1][0] = -sin(angle);
  R[1][1] = cos(angle);

  minpoints[0] = xmax * R[0][0] + ymin * R[1][0];
  minpoints[1] = xmax * R[0][1] + ymin * R[1][1];
  minpoints[2] = xmin * R[0][0] + ymin * R[1][0];
  minpoints[3] = xmin * R[0][1] + ymin * R[1][1];
  minpoints[4] = xmin * R[0][0] + ymax * R[1][0];
  minpoints[5] = xmin * R[0][1] + ymax * R[1][1];
  minpoints[6] = xmax * R[0][0] + ymax * R[1][0];
  minpoints[7] = xmax * R[0][1] + ymax * R[1][1];
}

template <typename T>
__global__ void min_area_polygons_cuda_kernel(const int ex_n_boxes,
                                              const T *ex_boxes, T *minbox) {
  CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) {
    const T *cur_box = ex_boxes + index * 18;
    T *cur_min_box = minbox + index * 8;
    Findminbox(cur_box, cur_min_box);
  }
}

#endif  // MIN_AREA_POLYGONS_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh
================================================
/*!
 ******************* BEGIN Caffe Copyright Notice and Disclaimer
 *****************
 *
 * COPYRIGHT
 *
 * All contributions by the University of California:
 * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
 * All rights reserved.
 *
 * All other contributions:
 * Copyright (c) 2014-2017, the respective contributors
 * All rights reserved.
 *
 * Caffe uses a shared copyright model: each contributor holds copyright over
 * their contributions to Caffe. The project versioning records all such
 * contribution and copyright details. If a contributor wants to further mark
 * their specific copyright on a particular contribution, they should indicate
 * their copyright solely in the commit message of the change when it is
 * committed.
 *
 * LICENSE
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * CONTRIBUTION AGREEMENT
 *
 * By contributing to the BVLC/caffe repository through pull-request, comment,
 * or otherwise, the contributor releases their content to the
 * license and copyright terms herein.
 *
 ***************** END Caffe Copyright Notice and Disclaimer
 *********************
 *
 * Copyright (c) 2018 Microsoft
 * Licensed under The MIT License [see LICENSE for details]
 * \file modulated_deformable_im2col.cuh
 * \brief Function definitions of converting an image to
 * column matrix based on kernel, padding, dilation, and offset.
 * These functions are mainly used in deformable convolution operators.
 * \ref: https://arxiv.org/abs/1703.06211
 * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
 */

// modified from
// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu

#ifndef MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH
#define MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH

#include <float.h>
#ifdef MMCV_WITH_TRT
#include "common_cuda_helper.hpp"
#else  // MMCV_WITH_TRT
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else  // MMCV_USE_PARROTS
#include "pytorch_cuda_helper.hpp"
#endif  // MMCV_USE_PARROTS
#endif  // MMCV_WITH_TRT

template <typename T>
__device__ T dmcn_im2col_bilinear(const T *input, const int data_width,
                                  const int height, const int width, T h, T w) {
  int h_low = floorf(h);
  int w_low = floorf(w);
  int h_high = h_low + 1;
  int w_high = w_low + 1;

  T lh = h - h_low;
  T lw = w - w_low;
  T hh = 1 - lh, hw = 1 - lw;

  T v1 = 0;
  if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low];
  T v2 = 0;
  if (h_low >= 0 && w_high <= width - 1)
    v2 = input[h_low * data_width + w_high];
  T v3 = 0;
  if (h_high <= height - 1 && w_low >= 0)
    v3 = input[h_high * data_width + w_low];
  T v4 = 0;
  if (h_high <= height - 1 && w_high <= width - 1)
    v4 = input[h_high * data_width + w_high];

  T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;

  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
  return val;
}

template <typename T>
__device__ T dmcn_get_gradient_weight(T argmax_h, T argmax_w, const int h,
                                      const int w, const int height,
                                      const int width) {
  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
      argmax_w >= width) {
    // empty
    return 0;
  }

  int argmax_h_low = floorf(argmax_h);
  int argmax_w_low = floorf(argmax_w);
  int argmax_h_high = argmax_h_low + 1;
  int argmax_w_high = argmax_w_low + 1;

  T weight = 0;
  if (h == argmax_h_low && w == argmax_w_low)
    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
  if (h == argmax_h_low && w == argmax_w_high)
    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
  if (h == argmax_h_high && w == argmax_w_low)
    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
  if (h == argmax_h_high && w == argmax_w_high)
    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
  return weight;
}

template <typename T>
__device__ T dmcn_get_coordinate_weight(T argmax_h, T argmax_w,
                                        const int height, const int width,
                                        const T *im_data, const int data_width,
                                        const int bp_dir) {
  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
      argmax_w >= width) {
    // empty
    return 0;
  }

  int argmax_h_low = floorf(argmax_h);
  int argmax_w_low = floorf(argmax_w);
  int argmax_h_high = argmax_h_low + 1;
  int argmax_w_high = argmax_w_low + 1;

  T weight = 0;

  if (bp_dir == 0) {
    if (argmax_h_low >= 0 && argmax_w_low >= 0)
      weight += -1 * (argmax_w_low + 1 - argmax_w) *
                im_data[argmax_h_low * data_width + argmax_w_low];
    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
      weight += -1 * (argmax_w - argmax_w_low) *
                im_data[argmax_h_low * data_width + argmax_w_high];
    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
      weight += (argmax_w_low + 1 - argmax_w) *
                im_data[argmax_h_high * data_width + argmax_w_low];
    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
      weight += (argmax_w - argmax_w_low) *
                im_data[argmax_h_high * data_width + argmax_w_high];
  } else if (bp_dir == 1) {
    if (argmax_h_low >= 0 && argmax_w_low >= 0)
      weight += -1 * (argmax_h_low + 1 - argmax_h) *
                im_data[argmax_h_low * data_width + argmax_w_low];
    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
      weight += (argmax_h_low + 1 - argmax_h) *
                im_data[argmax_h_low * data_width + argmax_w_high];
    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
      weight += -1 * (argmax_h - argmax_h_low) *
                im_data[argmax_h_high * data_width + argmax_w_low];
    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
      weight += (argmax_h - argmax_h_low) *
                im_data[argmax_h_high * data_width + argmax_w_high];
  }

  return weight;
}

template <typename T>
__global__ void modulated_deformable_im2col_gpu_kernel(
    const int n, const T *data_im, const T *data_offset, const T *data_mask,
    const int height, const int width, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
    const int channel_per_deformable_group, const int batch_size,
    const int num_channels, const int deformable_group, const int height_col,
    const int width_col, T *data_col) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    // index index of output matrix
    const int w_col = index % width_col;
    const int h_col = (index / width_col) % height_col;
    const int b_col = (index / width_col / height_col) % batch_size;
    const int c_im = (index / width_col / height_col) / batch_size;
    const int c_col = c_im * kernel_h * kernel_w;

    // compute deformable group index
    const int deformable_group_index = c_im / channel_per_deformable_group;

    const int h_in = h_col * stride_h - pad_h;
    const int w_in = w_col * stride_w - pad_w;

    T *data_col_ptr =
        data_col +
        ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
    const T *data_im_ptr =
        data_im + (b_col * num_channels + c_im) * height * width;
    const T *data_offset_ptr =
        data_offset + (b_col * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;

    const T *data_mask_ptr =
        data_mask + (b_col * deformable_group + deformable_group_index) *
                        kernel_h * kernel_w * height_col * width_col;

    for (int i = 0; i < kernel_h; ++i) {
      for (int j = 0; j < kernel_w; ++j) {
        const int data_offset_h_ptr =
            ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
        const int data_offset_w_ptr =
            ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
            w_col;
        const int data_mask_hw_ptr =
            ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
        const T offset_h = data_offset_ptr[data_offset_h_ptr];
        const T offset_w = data_offset_ptr[data_offset_w_ptr];
        const T mask = data_mask_ptr[data_mask_hw_ptr];
        T val = static_cast<T>(0);
        const T h_im = h_in + i * dilation_h + offset_h;
        const T w_im = w_in + j * dilation_w + offset_w;
        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
          val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im,
                                     w_im);
        *data_col_ptr = val * mask;
        data_col_ptr += batch_size * height_col * width_col;
      }
    }
  }
}

template <typename T>
__global__ void modulated_deformable_col2im_gpu_kernel(
    const int n, const T *data_col, const T *data_offset, const T *data_mask,
    const int channels, const int height, const int width, const int kernel_h,
    const int kernel_w, const int pad_h, const int pad_w, const int stride_h,
    const int stride_w, const int dilation_h, const int dilation_w,
    const int channel_per_deformable_group, const int batch_size,
    const int deformable_group, const int height_col, const int width_col,
    T *grad_im) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    const int j = (index / width_col / height_col / batch_size) % kernel_w;
    const int i =
        (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
    const int c =
        index / width_col / height_col / batch_size / kernel_w / kernel_h;
    // compute the start and end of the output

    const int deformable_group_index = c / channel_per_deformable_group;

    int w_out = index % width_col;
    int h_out = (index / width_col) % height_col;
    int b = (index / width_col / height_col) % batch_size;
    int w_in = w_out * stride_w - pad_w;
    int h_in = h_out * stride_h - pad_h;

    const T *data_offset_ptr =
        data_offset + (b * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;
    const T *data_mask_ptr =
        data_mask + (b * deformable_group + deformable_group_index) * kernel_h *
                        kernel_w * height_col * width_col;
    const int data_offset_h_ptr =
        ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
    const int data_offset_w_ptr =
        ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
    const int data_mask_hw_ptr =
        ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
    const T offset_h = data_offset_ptr[data_offset_h_ptr];
    const T offset_w = data_offset_ptr[data_offset_w_ptr];
    const T mask = data_mask_ptr[data_mask_hw_ptr];
    const T cur_inv_h_data = h_in + i * dilation_h + offset_h;
    const T cur_inv_w_data = w_in + j * dilation_w + offset_w;

    const T cur_top_grad = data_col[index] * mask;
    const int cur_h = (int)cur_inv_h_data;
    const int cur_w = (int)cur_inv_w_data;
    for (int dy = -2; dy <= 2; dy++) {
      for (int dx = -2; dx <= 2; dx++) {
        if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
            cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
            abs(cur_inv_w_data - (cur_w + dx)) < 1) {
          int cur_bottom_grad_pos =
              ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
          T weight =
              dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data,
                                       cur_h + dy, cur_w + dx, height, width);
          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
        }
      }
    }
  }
}

template <typename T>
__global__ void modulated_deformable_col2im_coord_gpu_kernel(
    const int n, const T *data_col, const T *data_im, const T *data_offset,
    const T *data_mask, const int channels, const int height, const int width,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int channel_per_deformable_group,
    const int batch_size, const int offset_channels, const int deformable_group,
    const int height_col, const int width_col, T *grad_offset, T *grad_mask) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    T val = 0, mval = 0;
    int w = index % width_col;
    int h = (index / width_col) % height_col;
    int c = (index / width_col / height_col) % offset_channels;
    int b = (index / width_col / height_col) / offset_channels;
    // compute the start and end of the output

    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
    const int col_step = kernel_h * kernel_w;
    int cnt = 0;
    const T *data_col_ptr = data_col + deformable_group_index *
                                           channel_per_deformable_group *
                                           batch_size * width_col * height_col;
    const T *data_im_ptr =
        data_im + (b * deformable_group + deformable_group_index) *
                      channel_per_deformable_group / kernel_h / kernel_w *
                      height * width;
    const T *data_offset_ptr =
        data_offset + (b * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;
    const T *data_mask_ptr =
        data_mask + (b * deformable_group + deformable_group_index) * kernel_h *
                        kernel_w * height_col * width_col;

    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;

    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
         col_c += col_step) {
      const int col_pos =
          (((col_c * batch_size + b) * height_col) + h) * width_col + w;
      const int bp_dir = offset_c % 2;

      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
      int i =
          (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
      int w_out = col_pos % width_col;
      int h_out = (col_pos / width_col) % height_col;
      int w_in = w_out * stride_w - pad_w;
      int h_in = h_out * stride_h - pad_h;
      const int data_offset_h_ptr =
          (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
      const int data_offset_w_ptr =
          (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
           w_out);
      const int data_mask_hw_ptr =
          (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
      const T offset_h = data_offset_ptr[data_offset_h_ptr];
      const T offset_w = data_offset_ptr[data_offset_w_ptr];
      const T mask = data_mask_ptr[data_mask_hw_ptr];
      T inv_h = h_in + i * dilation_h + offset_h;
      T inv_w = w_in + j * dilation_w + offset_w;
      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
        inv_h = inv_w = -2;
      else
        mval += data_col_ptr[col_pos] *
                dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width,
                                     height, width, inv_h, inv_w);
      const T weight = dmcn_get_coordinate_weight(
          inv_h, inv_w, height, width, data_im_ptr + cnt * height * width,
          width, bp_dir);
      val += weight * data_col_ptr[col_pos] * mask;
      cnt += 1;
    }
    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);
    grad_offset[index] = val;
    if (offset_c % 2 == 0)
      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group +
      // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) *
      // height_col + h) * width_col + w], mask_req, mval);
      grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h *
                      kernel_w +
                  offset_c / 2) *
                     height_col +
                 h) *
                    width_col +
                w] = mval;
  }
}

#endif  // MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/ms_deform_attn_cuda_kernel.cuh
================================================
/*!
**************************************************************************************************
* Deformable DETR
* Copyright (c) 2020 SenseTime. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
**************************************************************************************************
* Modified from
*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
**************************************************************************************************
*/
#ifndef DEFORM_ATTN_CUDA_KERNEL
#define DEFORM_ATTN_CUDA_KERNEL

#include "common_cuda_helper.hpp"
#include "pytorch_cuda_helper.hpp"

const int CUDA_NUM_THREADS = 1024;

template <typename scalar_t>
__device__ scalar_t ms_deform_attn_im2col_bilinear(
    const scalar_t *&bottom_data, const int &height, const int &width,
    const int &nheads, const int &channels, const scalar_t &h,
    const scalar_t &w, const int &m, const int &c) {
  const int h_low = floorf(h);
  const int w_low = floorf(w);
  const int h_high = h_low + 1;
  const int w_high = w_low + 1;

  const scalar_t lh = h - h_low;
  const scalar_t lw = w - w_low;
  const scalar_t hh = 1 - lh, hw = 1 - lw;

  const int w_stride = nheads * channels;
  const int h_stride = width * w_stride;
  const int h_low_ptr_offset = h_low * h_stride;
  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
  const int w_low_ptr_offset = w_low * w_stride;
  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
  const int base_ptr = m * channels + c;

  scalar_t v1 = 0;
  if (h_low >= 0 && w_low >= 0) {
    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
    v1 = bottom_data[ptr1];
  }
  scalar_t v2 = 0;
  if (h_low >= 0 && w_high <= width - 1) {
    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
    v2 = bottom_data[ptr2];
  }
  scalar_t v3 = 0;
  if (h_high <= height - 1 && w_low >= 0) {
    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
    v3 = bottom_data[ptr3];
  }
  scalar_t v4 = 0;
  if (h_high <= height - 1 && w_high <= width - 1) {
    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
    v4 = bottom_data[ptr4];
  }

  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;

  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
  return val;
}

template <typename scalar_t>
__device__ void ms_deform_attn_col2im_bilinear(
    const scalar_t *&bottom_data, const int &height, const int &width,
    const int &nheads, const int &channels, const scalar_t &h,
    const scalar_t &w, const int &m, const int &c, const scalar_t &top_grad,
    const scalar_t &attn_weight, scalar_t *&grad_value,
    scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) {
  const int h_low = floorf(h);
  const int w_low = floorf(w);
  const int h_high = h_low + 1;
  const int w_high = w_low + 1;

  const scalar_t lh = h - h_low;
  const scalar_t lw = w - w_low;
  const scalar_t hh = 1 - lh, hw = 1 - lw;

  const int w_stride = nheads * channels;
  const int h_stride = width * w_stride;
  const int h_low_ptr_offset = h_low * h_stride;
  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
  const int w_low_ptr_offset = w_low * w_stride;
  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
  const int base_ptr = m * channels + c;

  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
  const scalar_t top_grad_value = top_grad * attn_weight;
  scalar_t grad_h_weight = 0, grad_w_weight = 0;

  scalar_t v1 = 0;
  if (h_low >= 0 && w_low >= 0) {
    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
    v1 = bottom_data[ptr1];
    grad_h_weight -= hw * v1;
    grad_w_weight -= hh * v1;
    atomicAdd(grad_value + ptr1, w1 * top_grad_value);
  }
  scalar_t v2 = 0;
  if (h_low >= 0 && w_high <= width - 1) {
    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
    v2 = bottom_data[ptr2];
    grad_h_weight -= lw * v2;
    grad_w_weight += hh * v2;
    atomicAdd(grad_value + ptr2, w2 * top_grad_value);
  }
  scalar_t v3 = 0;
  if (h_high <= height - 1 && w_low >= 0) {
    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
    v3 = bottom_data[ptr3];
    grad_h_weight += hw * v3;
    grad_w_weight -= lh * v3;
    atomicAdd(grad_value + ptr3, w3 * top_grad_value);
  }
  scalar_t v4 = 0;
  if (h_high <= height - 1 && w_high <= width - 1) {
    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
    v4 = bottom_data[ptr4];
    grad_h_weight += lw * v4;
    grad_w_weight += lh * v4;
    atomicAdd(grad_value + ptr4, w4 * top_grad_value);
  }

  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
  *grad_attn_weight = top_grad * val;
  *grad_sampling_loc = width * grad_w_weight * top_grad_value;
  *(grad_sampling_loc + 1) = height * grad_h_weight * top_grad_value;
}

template <typename scalar_t>
__device__ void ms_deform_attn_col2im_bilinear_gm(
    const scalar_t *&bottom_data, const int &height, const int &width,
    const int &nheads, const int &channels, const scalar_t &h,
    const scalar_t &w, const int &m, const int &c, const scalar_t &top_grad,
    const scalar_t &attn_weight, scalar_t *&grad_value,
    scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) {
  const int h_low = floorf(h);
  const int w_low = floorf(w);
  const int h_high = h_low + 1;
  const int w_high = w_low + 1;

  const scalar_t lh = h - h_low;
  const scalar_t lw = w - w_low;
  const scalar_t hh = 1 - lh, hw = 1 - lw;

  const int w_stride = nheads * channels;
  const int h_stride = width * w_stride;
  const int h_low_ptr_offset = h_low * h_stride;
  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
  const int w_low_ptr_offset = w_low * w_stride;
  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
  const int base_ptr = m * channels + c;

  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
  const scalar_t top_grad_value = top_grad * attn_weight;
  scalar_t grad_h_weight = 0, grad_w_weight = 0;

  scalar_t v1 = 0;
  if (h_low >= 0 && w_low >= 0) {
    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
    v1 = bottom_data[ptr1];
    grad_h_weight -= hw * v1;
    grad_w_weight -= hh * v1;
    atomicAdd(grad_value + ptr1, w1 * top_grad_value);
  }
  scalar_t v2 = 0;
  if (h_low >= 0 && w_high <= width - 1) {
    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
    v2 = bottom_data[ptr2];
    grad_h_weight -= lw * v2;
    grad_w_weight += hh * v2;
    atomicAdd(grad_value + ptr2, w2 * top_grad_value);
  }
  scalar_t v3 = 0;
  if (h_high <= height - 1 && w_low >= 0) {
    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
    v3 = bottom_data[ptr3];
    grad_h_weight += hw * v3;
    grad_w_weight -= lh * v3;
    atomicAdd(grad_value + ptr3, w3 * top_grad_value);
  }
  scalar_t v4 = 0;
  if (h_high <= height - 1 && w_high <= width - 1) {
    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
    v4 = bottom_data[ptr4];
    grad_h_weight += lw * v4;
    grad_w_weight += lh * v4;
    atomicAdd(grad_value + ptr4, w4 * top_grad_value);
  }

  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
  atomicAdd(grad_attn_weight, top_grad * val);
  atomicAdd(grad_sampling_loc, width * grad_w_weight * top_grad_value);
  atomicAdd(grad_sampling_loc + 1, height * grad_h_weight * top_grad_value);
}

template <typename scalar_t>
__global__ void ms_deformable_im2col_gpu_kernel(
    const int n, const scalar_t *data_value, const int64_t *data_spatial_shapes,
    const int64_t *data_level_start_index, const scalar_t *data_sampling_loc,
    const scalar_t *data_attn_weight, const int batch_size,
    const int spatial_size, const int num_heads, const int channels,
    const int num_levels, const int num_query, const int num_point,
    scalar_t *data_col) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    int _temp = index;
    const int c_col = _temp % channels;
    _temp /= channels;
    const int sampling_index = _temp;
    const int m_col = _temp % num_heads;
    _temp /= num_heads;
    _temp /= num_query;
    const int b_col = _temp;

    scalar_t *data_col_ptr = data_col + index;
    int data_weight_ptr = sampling_index * num_levels * num_point;
    int data_loc_w_ptr = data_weight_ptr << 1;
    const int qid_stride = num_heads * channels;
    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
    scalar_t col = 0;

    for (int l_col = 0; l_col < num_levels; ++l_col) {
      const int level_start_id = data_level_start_index[l_col];
      const int spatial_h_ptr = l_col << 1;
      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
      const scalar_t *data_value_ptr =
          data_value +
          (data_value_ptr_init_offset + level_start_id * qid_stride);
      for (int p_col = 0; p_col < num_point; ++p_col) {
        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
        const scalar_t weight = data_attn_weight[data_weight_ptr];

        const scalar_t h_im = loc_h * spatial_h - 0.5;
        const scalar_t w_im = loc_w * spatial_w - 0.5;

        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
          col += ms_deform_attn_im2col_bilinear(data_value_ptr, spatial_h,
                                                spatial_w, num_heads, channels,
                                                h_im, w_im, m_col, c_col) *
                 weight;
        }

        data_weight_ptr += 1;
        data_loc_w_ptr += 2;
      }
    }
    *data_col_ptr = col;
  }
}

template <typename scalar_t, unsigned int blockSize>
__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1(
    const int n, const scalar_t *grad_col, const scalar_t *data_value,
    const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
    const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
    const int batch_size, const int spatial_size, const int num_heads,
    const int channels, const int num_levels, const int num_query,
    const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
    scalar_t *grad_attn_weight) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];
    __shared__ scalar_t cache_grad_attn_weight[blockSize];
    unsigned int tid = threadIdx.x;
    int _temp = index;
    const int c_col = _temp % channels;
    _temp /= channels;
    const int sampling_index = _temp;
    const int m_col = _temp % num_heads;
    _temp /= num_heads;
    _temp /= num_query;
    const int b_col = _temp;

    const scalar_t top_grad = grad_col[index];

    int data_weight_ptr = sampling_index * num_levels * num_point;
    int data_loc_w_ptr = data_weight_ptr << 1;
    const int grad_sampling_ptr = data_weight_ptr;
    grad_sampling_loc += grad_sampling_ptr << 1;
    grad_attn_weight += grad_sampling_ptr;
    const int grad_weight_stride = 1;
    const int grad_loc_stride = 2;
    const int qid_stride = num_heads * channels;
    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;

    for (int l_col = 0; l_col < num_levels; ++l_col) {
      const int level_start_id = data_level_start_index[l_col];
      const int spatial_h_ptr = l_col << 1;
      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
      const int value_ptr_offset =
          data_value_ptr_init_offset + level_start_id * qid_stride;
      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;

      for (int p_col = 0; p_col < num_point; ++p_col) {
        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
        const scalar_t weight = data_attn_weight[data_weight_ptr];

        const scalar_t h_im = loc_h * spatial_h - 0.5;
        const scalar_t w_im = loc_w * spatial_w - 0.5;
        *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0;
        *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0;
        *(cache_grad_attn_weight + threadIdx.x) = 0;
        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
          ms_deform_attn_col2im_bilinear(
              data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
              w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
              cache_grad_sampling_loc + (threadIdx.x << 1),
              cache_grad_attn_weight + threadIdx.x);
        }

        __syncthreads();
        if (tid == 0) {
          scalar_t _grad_w = cache_grad_sampling_loc[0],
                   _grad_h = cache_grad_sampling_loc[1],
                   _grad_a = cache_grad_attn_weight[0];
          int sid = 2;
          for (unsigned int tid = 1; tid < blockSize; ++tid) {
            _grad_w += cache_grad_sampling_loc[sid];
            _grad_h += cache_grad_sampling_loc[sid + 1];
            _grad_a += cache_grad_attn_weight[tid];
            sid += 2;
          }

          *grad_sampling_loc = _grad_w;
          *(grad_sampling_loc + 1) = _grad_h;
          *grad_attn_weight = _grad_a;
        }
        __syncthreads();

        data_weight_ptr += 1;
        data_loc_w_ptr += 2;
        grad_attn_weight += grad_weight_stride;
        grad_sampling_loc += grad_loc_stride;
      }
    }
  }
}

template <typename scalar_t, unsigned int blockSize>
__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2(
    const int n, const scalar_t *grad_col, const scalar_t *data_value,
    const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
    const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
    const int batch_size, const int spatial_size, const int num_heads,
    const int channels, const int num_levels, const int num_query,
    const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
    scalar_t *grad_attn_weight) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];
    __shared__ scalar_t cache_grad_attn_weight[blockSize];
    unsigned int tid = threadIdx.x;
    int _temp = index;
    const int c_col = _temp % channels;
    _temp /= channels;
    const int sampling_index = _temp;
    const int m_col = _temp % num_heads;
    _temp /= num_heads;
    _temp /= num_query;
    const int b_col = _temp;

    const scalar_t top_grad = grad_col[index];

    int data_weight_ptr = sampling_index * num_levels * num_point;
    int data_loc_w_ptr = data_weight_ptr << 1;
    const int grad_sampling_ptr = data_weight_ptr;
    grad_sampling_loc += grad_sampling_ptr << 1;
    grad_attn_weight += grad_sampling_ptr;
    const int grad_weight_stride = 1;
    const int grad_loc_stride = 2;
    const int qid_stride = num_heads * channels;
    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;

    for (int l_col = 0; l_col < num_levels; ++l_col) {
      const int level_start_id = data_level_start_index[l_col];
      const int spatial_h_ptr = l_col << 1;
      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
      const int value_ptr_offset =
          data_value_ptr_init_offset + level_start_id * qid_stride;
      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;

      for (int p_col = 0; p_col < num_point; ++p_col) {
        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
        const scalar_t weight = data_attn_weight[data_weight_ptr];

        const scalar_t h_im = loc_h * spatial_h - 0.5;
        const scalar_t w_im = loc_w * spatial_w - 0.5;
        *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0;
        *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0;
        *(cache_grad_attn_weight + threadIdx.x) = 0;
        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
          ms_deform_attn_col2im_bilinear(
              data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
              w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
              cache_grad_sampling_loc + (threadIdx.x << 1),
              cache_grad_attn_weight + threadIdx.x);
        }

        __syncthreads();

        for (unsigned int s = blockSize / 2; s > 0; s >>= 1) {
          if (tid < s) {
            const unsigned int xid1 = tid << 1;
            const unsigned int xid2 = (tid + s) << 1;
            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
            cache_grad_sampling_loc[xid1 + 1] +=
                cache_grad_sampling_loc[xid2 + 1];
          }
          __syncthreads();
        }

        if (tid == 0) {
          *grad_sampling_loc = cache_grad_sampling_loc[0];
          *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];
          *grad_attn_weight = cache_grad_attn_weight[0];
        }
        __syncthreads();

        data_weight_ptr += 1;
        data_loc_w_ptr += 2;
        grad_attn_weight += grad_weight_stride;
        grad_sampling_loc += grad_loc_stride;
      }
    }
  }
}

template <typename scalar_t>
__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1(
    const int n, const scalar_t *grad_col, const scalar_t *data_value,
    const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
    const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
    const int batch_size, const int spatial_size, const int num_heads,
    const int channels, const int num_levels, const int num_query,
    const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
    scalar_t *grad_attn_weight) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    extern __shared__ int _s[];
    scalar_t *cache_grad_sampling_loc = reinterpret_cast<scalar_t *>(_s);
    scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
    unsigned int tid = threadIdx.x;
    int _temp = index;
    const int c_col = _temp % channels;
    _temp /= channels;
    const int sampling_index = _temp;
    const int m_col = _temp % num_heads;
    _temp /= num_heads;
    _temp /= num_query;
    const int b_col = _temp;

    const scalar_t top_grad = grad_col[index];

    int data_weight_ptr = sampling_index * num_levels * num_point;
    int data_loc_w_ptr = data_weight_ptr << 1;
    const int grad_sampling_ptr = data_weight_ptr;
    grad_sampling_loc += grad_sampling_ptr << 1;
    grad_attn_weight += grad_sampling_ptr;
    const int grad_weight_stride = 1;
    const int grad_loc_stride = 2;
    const int qid_stride = num_heads * channels;
    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;

    for (int l_col = 0; l_col < num_levels; ++l_col) {
      const int level_start_id = data_level_start_index[l_col];
      const int spatial_h_ptr = l_col << 1;
      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
      const int value_ptr_offset =
          data_value_ptr_init_offset + level_start_id * qid_stride;
      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;

      for (int p_col = 0; p_col < num_point; ++p_col) {
        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
        const scalar_t weight = data_attn_weight[data_weight_ptr];

        const scalar_t h_im = loc_h * spatial_h - 0.5;
        const scalar_t w_im = loc_w * spatial_w - 0.5;
        *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0;
        *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0;
        *(cache_grad_attn_weight + threadIdx.x) = 0;
        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
          ms_deform_attn_col2im_bilinear(
              data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
              w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
              cache_grad_sampling_loc + (threadIdx.x << 1),
              cache_grad_attn_weight + threadIdx.x);
        }

        __syncthreads();
        if (tid == 0) {
          scalar_t _grad_w = cache_grad_sampling_loc[0],
                   _grad_h = cache_grad_sampling_loc[1],
                   _grad_a = cache_grad_attn_weight[0];
          int sid = 2;
          for (unsigned int tid = 1; tid < blockDim.x; ++tid) {
            _grad_w += cache_grad_sampling_loc[sid];
            _grad_h += cache_grad_sampling_loc[sid + 1];
            _grad_a += cache_grad_attn_weight[tid];
            sid += 2;
          }

          *grad_sampling_loc = _grad_w;
          *(grad_sampling_loc + 1) = _grad_h;
          *grad_attn_weight = _grad_a;
        }
        __syncthreads();

        data_weight_ptr += 1;
        data_loc_w_ptr += 2;
        grad_attn_weight += grad_weight_stride;
        grad_sampling_loc += grad_loc_stride;
      }
    }
  }
}

template <typename scalar_t>
__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2(
    const int n, const scalar_t *grad_col, const scalar_t *data_value,
    const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
    const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
    const int batch_size, const int spatial_size, const int num_heads,
    const int channels, const int num_levels, const int num_query,
    const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
    scalar_t *grad_attn_weight) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    extern __shared__ int _s[];
    scalar_t *cache_grad_sampling_loc = reinterpret_cast<scalar_t *>(_s);
    scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
    unsigned int tid = threadIdx.x;
    int _temp = index;
    const int c_col = _temp % channels;
    _temp /= channels;
    const int sampling_index = _temp;
    const int m_col = _temp % num_heads;
    _temp /= num_heads;
    _temp /= num_query;
    const int b_col = _temp;

    const scalar_t top_grad = grad_col[index];

    int data_weight_ptr = sampling_index * num_levels * num_point;
    int data_loc_w_ptr = data_weight_ptr << 1;
    const int grad_sampling_ptr = data_weight_ptr;
    grad_sampling_loc += grad_sampling_ptr << 1;
    grad_attn_weight += grad_sampling_ptr;
    const int grad_weight_stride = 1;
    const int grad_loc_stride = 2;
    const int qid_stride = num_heads * channels;
    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;

    for (int l_col = 0; l_col < num_levels; ++l_col) {
      const int level_start_id = data_level_start_index[l_col];
      const int spatial_h_ptr = l_col << 1;
      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
      const int value_ptr_offset =
          data_value_ptr_init_offset + level_start_id * qid_stride;
      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;

      for (int p_col = 0; p_col < num_point; ++p_col) {
        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
        const scalar_t weight = data_attn_weight[data_weight_ptr];

        const scalar_t h_im = loc_h * spatial_h - 0.5;
        const scalar_t w_im = loc_w * spatial_w - 0.5;
        *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0;
        *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0;
        *(cache_grad_attn_weight + threadIdx.x) = 0;
        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
          ms_deform_attn_col2im_bilinear(
              data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
              w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
              cache_grad_sampling_loc + (threadIdx.x << 1),
              cache_grad_attn_weight + threadIdx.x);
        }

        __syncthreads();

        for (unsigned int s = blockDim.x / 2, spre = blockDim.x; s > 0;
             s >>= 1, spre >>= 1) {
          if (tid < s) {
            const unsigned int xid1 = tid << 1;
            const unsigned int xid2 = (tid + s) << 1;
            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
            cache_grad_sampling_loc[xid1 + 1] +=
                cache_grad_sampling_loc[xid2 + 1];
            if (tid + (s << 1) < spre) {
              cache_grad_attn_weight[tid] +=
                  cache_grad_attn_weight[tid + (s << 1)];
              cache_grad_sampling_loc[xid1] +=
                  cache_grad_sampling_loc[xid2 + (s << 1)];
              cache_grad_sampling_loc[xid1 + 1] +=
                  cache_grad_sampling_loc[xid2 + 1 + (s << 1)];
            }
          }
          __syncthreads();
        }

        if (tid == 0) {
          *grad_sampling_loc = cache_grad_sampling_loc[0];
          *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];
          *grad_attn_weight = cache_grad_attn_weight[0];
        }
        __syncthreads();

        data_weight_ptr += 1;
        data_loc_w_ptr += 2;
        grad_attn_weight += grad_weight_stride;
        grad_sampling_loc += grad_loc_stride;
      }
    }
  }
}

template <typename scalar_t>
__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks(
    const int n, const scalar_t *grad_col, const scalar_t *data_value,
    const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
    const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
    const int batch_size, const int spatial_size, const int num_heads,
    const int channels, const int num_levels, const int num_query,
    const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
    scalar_t *grad_attn_weight) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    extern __shared__ int _s[];
    scalar_t *cache_grad_sampling_loc = reinterpret_cast<scalar_t *>(_s);
    scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
    unsigned int tid = threadIdx.x;
    int _temp = index;
    const int c_col = _temp % channels;
    _temp /= channels;
    const int sampling_index = _temp;
    const int m_col = _temp % num_heads;
    _temp /= num_heads;
    _temp /= num_query;
    const int b_col = _temp;

    const scalar_t top_grad = grad_col[index];

    int data_weight_ptr = sampling_index * num_levels * num_point;
    int data_loc_w_ptr = data_weight_ptr << 1;
    const int grad_sampling_ptr = data_weight_ptr;
    grad_sampling_loc += grad_sampling_ptr << 1;
    grad_attn_weight += grad_sampling_ptr;
    const int grad_weight_stride = 1;
    const int grad_loc_stride = 2;
    const int qid_stride = num_heads * channels;
    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;

    for (int l_col = 0; l_col < num_levels; ++l_col) {
      const int level_start_id = data_level_start_index[l_col];
      const int spatial_h_ptr = l_col << 1;
      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
      const int value_ptr_offset =
          data_value_ptr_init_offset + level_start_id * qid_stride;
      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;

      for (int p_col = 0; p_col < num_point; ++p_col) {
        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
        const scalar_t weight = data_attn_weight[data_weight_ptr];

        const scalar_t h_im = loc_h * spatial_h - 0.5;
        const scalar_t w_im = loc_w * spatial_w - 0.5;
        *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0;
        *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0;
        *(cache_grad_attn_weight + threadIdx.x) = 0;
        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
          ms_deform_attn_col2im_bilinear(
              data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
              w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
              cache_grad_sampling_loc + (threadIdx.x << 1),
              cache_grad_attn_weight + threadIdx.x);
        }

        __syncthreads();

        for (unsigned int s = blockDim.x / 2, spre = blockDim.x; s > 0;
             s >>= 1, spre >>= 1) {
          if (tid < s) {
            const unsigned int xid1 = tid << 1;
            const unsigned int xid2 = (tid + s) << 1;
            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
            cache_grad_sampling_loc[xid1 + 1] +=
                cache_grad_sampling_loc[xid2 + 1];
            if (tid + (s << 1) < spre) {
              cache_grad_attn_weight[tid] +=
                  cache_grad_attn_weight[tid + (s << 1)];
              cache_grad_sampling_loc[xid1] +=
                  cache_grad_sampling_loc[xid2 + (s << 1)];
              cache_grad_sampling_loc[xid1 + 1] +=
                  cache_grad_sampling_loc[xid2 + 1 + (s << 1)];
            }
          }
          __syncthreads();
        }

        if (tid == 0) {
          atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]);
          atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]);
          atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]);
        }
        __syncthreads();

        data_weight_ptr += 1;
        data_loc_w_ptr += 2;
        grad_attn_weight += grad_weight_stride;
        grad_sampling_loc += grad_loc_stride;
      }
    }
  }
}

template <typename scalar_t>
__global__ void ms_deformable_col2im_gpu_kernel_gm(
    const int n, const scalar_t *grad_col, const scalar_t *data_value,
    const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
    const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
    const int batch_size, const int spatial_size, const int num_heads,
    const int channels, const int num_levels, const int num_query,
    const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
    scalar_t *grad_attn_weight) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    int _temp = index;
    const int c_col = _temp % channels;
    _temp /= channels;
    const int sampling_index = _temp;
    const int m_col = _temp % num_heads;
    _temp /= num_heads;
    _temp /= num_query;
    const int b_col = _temp;

    const scalar_t top_grad = grad_col[index];

    int data_weight_ptr = sampling_index * num_levels * num_point;
    int data_loc_w_ptr = data_weight_ptr << 1;
    const int grad_sampling_ptr = data_weight_ptr;
    grad_sampling_loc += grad_sampling_ptr << 1;
    grad_attn_weight += grad_sampling_ptr;
    const int grad_weight_stride = 1;
    const int grad_loc_stride = 2;
    const int qid_stride = num_heads * channels;
    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;

    for (int l_col = 0; l_col < num_levels; ++l_col) {
      const int level_start_id = data_level_start_index[l_col];
      const int spatial_h_ptr = l_col << 1;
      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
      const int value_ptr_offset =
          data_value_ptr_init_offset + level_start_id * qid_stride;
      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;

      for (int p_col = 0; p_col < num_point; ++p_col) {
        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
        const scalar_t weight = data_attn_weight[data_weight_ptr];

        const scalar_t h_im = loc_h * spatial_h - 0.5;
        const scalar_t w_im = loc_w * spatial_w - 0.5;
        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
          ms_deform_attn_col2im_bilinear_gm(
              data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
              w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
              grad_sampling_loc, grad_attn_weight);
        }
        data_weight_ptr += 1;
        data_loc_w_ptr += 2;
        grad_attn_weight += grad_weight_stride;
        grad_sampling_loc += grad_loc_stride;
      }
    }
  }
}
#endif  // DEFORM_ATTN_CUDA_KERNEL


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef NMS_CUDA_KERNEL_CUH
#define NMS_CUDA_KERNEL_CUH

#include <float.h>
#ifdef MMCV_WITH_TRT
#include "common_cuda_helper.hpp"
#else  // MMCV_WITH_TRT
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else  // MMCV_USE_PARROTS
#include "pytorch_cuda_helper.hpp"
#endif  // MMCV_USE_PARROTS
#endif  // MMCV_WITH_TRT

int const threadsPerBlock = sizeof(unsigned long long int) * 8;

__device__ inline bool devIoU(float const *const a, float const *const b,
                              const int offset, const float threshold) {
  float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
  float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
  float width = fmaxf(right - left + offset, 0.f),
        height = fmaxf(bottom - top + offset, 0.f);
  float interS = width * height;
  float Sa = (a[2] - a[0] + offset) * (a[3] - a[1] + offset);
  float Sb = (b[2] - b[0] + offset) * (b[3] - b[1] + offset);
  return interS > threshold * (Sa + Sb - interS);
}

__global__ void nms_cuda(const int n_boxes, const float iou_threshold,
                         const int offset, const float *dev_boxes,
                         unsigned long long *dev_mask) {
  int blocks = (n_boxes + threadsPerBlock - 1) / threadsPerBlock;
  CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) {
    const int tid = threadIdx.x;

    if (row_start > col_start) return;

    const int row_size =
        fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
    const int col_size =
        fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);

    __shared__ float block_boxes[threadsPerBlock * 4];
    if (tid < col_size) {
      block_boxes[tid * 4 + 0] =
          dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 0];
      block_boxes[tid * 4 + 1] =
          dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 1];
      block_boxes[tid * 4 + 2] =
          dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 2];
      block_boxes[tid * 4 + 3] =
          dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 3];
    }
    __syncthreads();

    if (tid < row_size) {
      const int cur_box_idx = threadsPerBlock * row_start + tid;
      const float *cur_box = dev_boxes + cur_box_idx * 4;
      int i = 0;
      unsigned long long int t = 0;
      int start = 0;
      if (row_start == col_start) {
        start = tid + 1;
      }
      for (i = start; i < col_size; i++) {
        if (devIoU(cur_box, block_boxes + i * 4, offset, iou_threshold)) {
          t |= 1ULL << i;
        }
      }
      dev_mask[cur_box_idx * gridDim.y + col_start] = t;
    }
  }
}
#endif  // NMS_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
#ifndef NMS_ROTATED_CUDA_CUH
#define NMS_ROTATED_CUDA_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif
#include "box_iou_rotated_utils.hpp"

__host__ __device__ inline int divideUP(const int x, const int y) {
  return (((x) + (y)-1) / (y));
}

namespace {
int const threadsPerBlock = sizeof(unsigned long long) * 8;
}

template <typename T>
__global__ void nms_rotated_cuda_kernel(const int n_boxes,
                                        const float iou_threshold,
                                        const T* dev_boxes,
                                        unsigned long long* dev_mask,
                                        const int multi_label) {
  // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel

  if (multi_label == 1) {
    const int row_start = blockIdx.y;
    const int col_start = blockIdx.x;

    // if (row_start > col_start) return;

    const int row_size =
        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
    const int col_size =
        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);

    // Compared to nms_cuda_kernel, where each box is represented with 4 values
    // (x1, y1, x2, y2), each rotated box is represented with 5 values
    // (x_center, y_center, width, height, angle_degrees) here.
    __shared__ T block_boxes[threadsPerBlock * 5];
    if (threadIdx.x < col_size) {
      block_boxes[threadIdx.x * 6 + 0] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0];
      block_boxes[threadIdx.x * 6 + 1] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1];
      block_boxes[threadIdx.x * 6 + 2] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2];
      block_boxes[threadIdx.x * 6 + 3] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3];
      block_boxes[threadIdx.x * 6 + 4] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4];
      block_boxes[threadIdx.x * 6 + 5] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5];
    }
    __syncthreads();

    if (threadIdx.x < row_size) {
      const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
      const T* cur_box = dev_boxes + cur_box_idx * 6;
      int i = 0;
      unsigned long long t = 0;
      int start = 0;
      if (row_start == col_start) {
        start = threadIdx.x + 1;
      }
      for (i = start; i < col_size; i++) {
        // Instead of devIoU used by original horizontal nms, here
        // we use the single_box_iou_rotated function from
        // box_iou_rotated_utils.h
        if (single_box_iou_rotated<T>(cur_box, block_boxes + i * 6, 0) >
            iou_threshold) {
          t |= 1ULL << i;
        }
      }
      const int col_blocks = divideUP(n_boxes, threadsPerBlock);
      dev_mask[cur_box_idx * col_blocks + col_start] = t;
    }
  } else {
    const int row_start = blockIdx.y;
    const int col_start = blockIdx.x;

    // if (row_start > col_start) return;

    const int row_size =
        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
    const int col_size =
        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);

    // Compared to nms_cuda_kernel, where each box is represented with 4 values
    // (x1, y1, x2, y2), each rotated box is represented with 5 values
    // (x_center, y_center, width, height, angle_degrees) here.
    __shared__ T block_boxes[threadsPerBlock * 5];
    if (threadIdx.x < col_size) {
      block_boxes[threadIdx.x * 5 + 0] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
      block_boxes[threadIdx.x * 5 + 1] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
      block_boxes[threadIdx.x * 5 + 2] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
      block_boxes[threadIdx.x * 5 + 3] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
      block_boxes[threadIdx.x * 5 + 4] =
          dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
    }
    __syncthreads();

    if (threadIdx.x < row_size) {
      const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
      const T* cur_box = dev_boxes + cur_box_idx * 5;
      int i = 0;
      unsigned long long t = 0;
      int start = 0;
      if (row_start == col_start) {
        start = threadIdx.x + 1;
      }
      for (i = start; i < col_size; i++) {
        // Instead of devIoU used by original horizontal nms, here
        // we use the single_box_iou_rotated function from
        // box_iou_rotated_utils.h
        if (single_box_iou_rotated<T>(cur_box, block_boxes + i * 5, 0) >
            iou_threshold) {
          t |= 1ULL << i;
        }
      }
      const int col_blocks = divideUP(n_boxes, threadsPerBlock);
      dev_mask[cur_box_idx * col_blocks + col_start] = t;
    }
  }
}

#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh
================================================
/*
 * Copyright (c) 2019, SenseTime.
 */

#ifndef INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
#define INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_

#ifndef __CUDACC__
#error cudawarpfunction.cuh should only be included by .cu files
#endif
#include <cuda.h>

#include <parrots/foundation/common.hpp>

#ifdef PARROTS_USE_HALF
#include <cuda_fp16.h>
#endif
#ifdef __CUDA_ARCH__
#define CUDA_INTRINSIC_FUNC(Expr) Expr
#else
#define CUDA_INTRINSIC_FUNC(Expr)
#endif

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300

#ifdef PARROTS_USE_HALF

#if CUDA_VERSION < 9000

__device__ inline float16 __shfl(float16 var, int srcLane, int width) {
  CUDA_INTRINSIC_FUNC(return __shfl(var.y, srcLane, width););
}

__device__ inline float16 __shfl_up(float16 var, unsigned delta, int width) {
  CUDA_INTRINSIC_FUNC(return __shfl_up(var.y, delta, width););
}

__device__ inline float16 __shfl_down(float16 var, unsigned delta, int width) {
  CUDA_INTRINSIC_FUNC(return __shfl_down(var.y, delta, width););
}

__device__ inline float16 __shfl_xor(float16 var, int laneMask, int width) {
  CUDA_INTRINSIC_FUNC(return __shfl_xor(var.y, laneMask, width););
}

#else  // CUDA_VERSION >= 9000

__device__ inline float16 __shfl_sync(unsigned mask, float16 var, int srcLane,
                                      int width = warpSize) {
  CUDA_INTRINSIC_FUNC(float16 r; r.y = __shfl_sync(mask, var.y, srcLane, width);
                      return r;);
}

__device__ inline float16 __shfl_up_sync(unsigned mask, float16 var,
                                         unsigned delta, int width = warpSize) {
  CUDA_INTRINSIC_FUNC(
      float16 r; r.y = __shfl_up_sync(mask, var.y, delta, width); return r;);
}

__device__ inline float16 __shfl_down_sync(unsigned mask, float16 var,
                                           unsigned delta,
                                           int width = warpSize) {
  CUDA_INTRINSIC_FUNC(
      float16 r; r.y = __shfl_down_sync(mask, var.y, delta, width); return r;);
}

__device__ inline float16 __shfl_xor_sync(unsigned mask, float16 var,
                                          int laneMask, int width) {
  CUDA_INTRINSIC_FUNC(float16 r;
                      r.y = __shfl_xor_sync(mask, var.y, laneMask, width);
                      return r;);
}

#endif  // CUDA_VERSION < 9000

#endif  // PARROTS_USE_HALF

// warp shuffle interface with a dummy mask
#if CUDA_VERSION < 9000

template <typename T>
__device__ inline T __shfl_sync(unsigned mask, T var, int srcLane,
                                int width = warpSize) {
  CUDA_INTRINSIC_FUNC(return __shfl(var, srcLane, width););
}

template <typename T>
__device__ inline T __shfl_up_sync(unsigned mask, T var, unsigned delta,
                                   int width = warpSize) {
  CUDA_INTRINSIC_FUNC(return __shfl_up(var, delta, width););
}

template <typename T>
__device__ inline T __shfl_down_sync(unsigned mask, T var, unsigned delta,
                                     int width = warpSize) {
  CUDA_INTRINSIC_FUNC(return __shfl_down(var, delta, width););
}

template <typename T>
__device__ inline T __shfl_xor_sync(unsigned mask, T var, int laneMask,
                                    int width = warpSize) {
  CUDA_INTRINSIC_FUNC(return __shfl_xor(var, laneMask, width););
}

#endif  // CUDA_VERSION < 9000

#endif  // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300

#endif  // INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef POINT_IN_BOXES_CUDA_KERNEL_CUH
#define POINT_IN_BOXES_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz,
                                             T &local_x, T &local_y) {
  T cosa = cos(-rz), sina = sin(-rz);
  local_x = shift_x * cosa + shift_y * (-sina);
  local_y = shift_x * sina + shift_y * cosa;
}

template <typename T>
__device__ inline int check_pt_in_box3d(const T *pt, const T *box3d, T &local_x,
                                        T &local_y) {
  // param pt: (x, y, z)
  // param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate,
  // cz in the bottom center
  T x = pt[0], y = pt[1], z = pt[2];
  T cx = box3d[0], cy = box3d[1], cz = box3d[2];
  T x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6];
  cz += z_size /
        2.0;  // shift to the center since cz in box3d is the bottom center

  if (fabsf(z - cz) > z_size / 2.0) return 0;
  lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
  float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) &
                  (local_y > -y_size / 2.0) & (local_y < y_size / 2.0);
  return in_flag;
}

template <typename T>
__global__ void points_in_boxes_part_forward_cuda_kernel(
    int batch_size, int boxes_num, int pts_num, const T *boxes, const T *pts,
    int *box_idx_of_points) {
  // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate, z is the bottom center, each box DO NOT overlaps params pts:
  // (B, npoints, 3) [x, y, z] in LiDAR coordinate params boxes_idx_of_points:
  // (B, npoints), default -1

  int bs_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) {
    if (bs_idx >= batch_size) return;

    boxes += bs_idx * boxes_num * 7;
    pts += bs_idx * pts_num * 3 + pt_idx * 3;
    box_idx_of_points += bs_idx * pts_num + pt_idx;

    T local_x = 0, local_y = 0;
    int cur_in_flag = 0;
    for (int k = 0; k < boxes_num; k++) {
      cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y);
      if (cur_in_flag) {
        box_idx_of_points[0] = k;
        break;
      }
    }
  }
}

template <typename T>
__global__ void points_in_boxes_all_forward_cuda_kernel(
    int batch_size, int boxes_num, int pts_num, const T *boxes, const T *pts,
    int *box_idx_of_points) {
  // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate, z is the bottom center, each box DO NOT overlaps params pts:
  // (B, npoints, 3) [x, y, z] in LiDAR coordinate params boxes_idx_of_points:
  // (B, npoints), default -1

  int bs_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) {
    if (bs_idx >= batch_size) return;

    boxes += bs_idx * boxes_num * 7;
    pts += bs_idx * pts_num * 3 + pt_idx * 3;
    box_idx_of_points += bs_idx * pts_num * boxes_num + pt_idx * boxes_num;

    T local_x = 0, local_y = 0;
    for (int k = 0; k < boxes_num; k++) {
      const int cur_in_flag =
          check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y);
      if (cur_in_flag) {
        box_idx_of_points[k] = 1;
      }
    }
  }
}

#endif  // POINT_IN_BOXES_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef POINTS_IN_POLYGONS_CUDA_KERNEL_CUH
#define POINTS_IN_POLYGONS_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

struct point {
  float x, y;
};

template <typename scalar_t>
__global__ void points_in_polygons_forward_cuda_kernel(
    const int nthreads, const scalar_t *vertex1, const scalar_t *vertex2,
    const int rows, const int cols, scalar_t *inside_flag) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int row = index / cols;
    int col = index % cols;

    const scalar_t *offset_vertex1 = vertex1 + row * 2;
    const scalar_t *offset_vertex2 = vertex2 + col * 8;

    point point_[1];
    point polygon[4];

    point_[0].x = offset_vertex1[0];
    point_[0].y = offset_vertex1[1];

    polygon[0].x = offset_vertex2[0];
    polygon[0].y = offset_vertex2[1];
    polygon[1].x = offset_vertex2[2];
    polygon[1].y = offset_vertex2[3];
    polygon[2].x = offset_vertex2[4];
    polygon[2].y = offset_vertex2[5];
    polygon[3].x = offset_vertex2[6];
    polygon[3].y = offset_vertex2[7];

    int nCross = 0;
    int i, j;
    float sx, sy, tx, ty, px, py, x;
    for (i = 0, j = 3; i < 4; j = i, i++) {
      sx = polygon[i].x;
      sy = polygon[i].y;
      tx = polygon[j].x;
      ty = polygon[j].y;

      px = point_[0].x;
      py = point_[0].y;

      if (py < min(sy, ty)) continue;
      if (py > max(sy, ty)) continue;

      if ((sx == px && sy == py) || (tx == px && ty == py)) {
        break;
      } else {
        if ((sy < py && ty >= py) || (sy >= py && ty < py)) {
          x = sx + (py - sy) * (tx - sx) / (ty - sy);
          if (x == px) {
            break;
          }
          if (x > px) {
            nCross++;
          }
        }
      }
    }
    if (nCross % 2 == 1) {
      inside_flag[index] = 1.0;
    } else {
      inside_flag[index] = 0.0;
    }
    return;
  }
}

#endif  // POINTS_IN_POLYGONS_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/psamask_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef PSAMASK_CUDA_KERNEL_CUH
#define PSAMASK_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

// CUDA: grid stride looping
#ifndef CUDA_KERNEL_LOOP
#define CUDA_KERNEL_LOOP(i, n)                                 \
  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
       i += blockDim.x * gridDim.x)
#endif

template <typename T>
__global__ void psamask_collect_forward_cuda(
    const int nthreads, const int h_feature, const int w_feature,
    const int h_mask, const int w_mask, const int half_h_mask,
    const int half_w_mask, const T* mask_data, T* buffer_data) {
  CUDA_KERNEL_LOOP(index, nthreads) {
    const int w = index % w_feature;
    const int h = (index / w_feature) % h_feature;
    const int n = index / w_feature / h_feature;
    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
    const int hstart = max(0, half_h_mask - h);
    const int hend = min(h_mask, h_feature + half_h_mask - h);
    const int wstart = max(0, half_w_mask - w);
    const int wend = min(w_mask, w_feature + half_w_mask - w);
    // (hidx,                    widx                   ) with mask-indexed
    // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
    for (int hidx = hstart; hidx < hend; hidx++) {
      for (int widx = wstart; widx < wend; widx++) {
        buffer_data[(n * h_feature * w_feature +
                     (hidx + h - half_h_mask) * w_feature +
                     (widx + w - half_w_mask)) *
                        h_feature * w_feature +
                    h * w_feature + w] = mask_data
            [((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) *
                 w_feature +
             w];
      }
    }
  }
}

template <typename T>
__global__ void psamask_distribute_forward_cuda(
    const int nthreads, const int h_feature, const int w_feature,
    const int h_mask, const int w_mask, const int half_h_mask,
    const int half_w_mask, const T* mask_data, T* buffer_data) {
  CUDA_KERNEL_LOOP(index, nthreads) {
    const int w = index % w_feature;
    const int h = (index / w_feature) % h_feature;
    const int n = index / w_feature / h_feature;
    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
    const int hstart = max(0, half_h_mask - h);
    const int hend = min(h_mask, h_feature + half_h_mask - h);
    const int wstart = max(0, half_w_mask - w);
    const int wend = min(w_mask, w_feature + half_w_mask - w);
    // (hidx,                    widx                   ) with mask-indexed
    // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
    for (int hidx = hstart; hidx < hend; hidx++) {
      for (int widx = wstart; widx < wend; widx++) {
        buffer_data[(n * h_feature * w_feature + h * w_feature + w) *
                        h_feature * w_feature +
                    (hidx + h - half_h_mask) * w_feature +
                    (widx + w - half_w_mask)] = mask_data
            [((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) *
                 w_feature +
             w];
      }
    }
  }
}

template <typename T>
__global__ void psamask_collect_backward_cuda(
    const int nthreads, const int h_feature, const int w_feature,
    const int h_mask, const int w_mask, const int half_h_mask,
    const int half_w_mask, const T* buffer_diff, T* mask_diff) {
  CUDA_KERNEL_LOOP(index, nthreads) {
    const int w = index % w_feature;
    const int h = (index / w_feature) % h_feature;
    const int n = index / w_feature / h_feature;
    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
    const int hstart = max(0, half_h_mask - h);
    const int hend = min(h_mask, h_feature + half_h_mask - h);
    const int wstart = max(0, half_w_mask - w);
    const int wend = min(w_mask, w_feature + half_w_mask - w);
    // (hidx,                    widx                   ) with mask-indexed
    // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
    for (int hidx = hstart; hidx < hend; hidx++) {
      for (int widx = wstart; widx < wend; widx++) {
        mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature +
                   h) *
                      w_feature +
                  w] = buffer_diff[(n * h_feature * w_feature +
                                    (hidx + h - half_h_mask) * w_feature +
                                    (widx + w - half_w_mask)) *
                                       h_feature * w_feature +
                                   h * w_feature + w];
      }
    }
  }
}

template <typename T>
__global__ void psamask_distribute_backward_cuda(
    const int nthreads, const int h_feature, const int w_feature,
    const int h_mask, const int w_mask, const int half_h_mask,
    const int half_w_mask, const T* buffer_diff, T* mask_diff) {
  CUDA_KERNEL_LOOP(index, nthreads) {
    const int w = index % w_feature;
    const int h = (index / w_feature) % h_feature;
    const int n = index / w_feature / h_feature;
    // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
    const int hstart = max(0, half_h_mask - h);
    const int hend = min(h_mask, h_feature + half_h_mask - h);
    const int wstart = max(0, half_w_mask - w);
    const int wend = min(w_mask, w_feature + half_w_mask - w);
    // (hidx,                    widx                   ) with mask-indexed
    // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
    for (int hidx = hstart; hidx < hend; hidx++) {
      for (int widx = wstart; widx < wend; widx++) {
        mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature +
                   h) *
                      w_feature +
                  w] =
            buffer_diff[(n * h_feature * w_feature + h * w_feature + w) *
                            h_feature * w_feature +
                        (hidx + h - half_h_mask) * w_feature +
                        (widx + w - half_w_mask)];
      }
    }
  }
}

#endif  // PSAMASK_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
================================================
// Modified from
// https://github.com/csuhan/ReDet/blob/master/mmdet/ops/riroi_align/src/riroi_align_kernel.cu
#ifndef RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH
#define RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH

#include <float.h>
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else  // MMCV_USE_PARROTS
#include "pytorch_cuda_helper.hpp"
#endif  // MMCV_USE_PARROTS

/*** Forward ***/
template <typename scalar_t>
__global__ void riroi_align_rotated_forward_cuda_kernel(
    const int nthreads, const scalar_t *bottom_data,
    const scalar_t *bottom_rois, const scalar_t spatial_scale,
    const int num_samples, const bool clockwise, const int channels,
    const int height, const int width, const int pooled_height,
    const int pooled_width, const int num_orientations, scalar_t *top_data) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int o = (index / pooled_width / pooled_height) % num_orientations;
    int c =
        (index / pooled_width / pooled_height / num_orientations) % channels;
    int n = index / pooled_width / pooled_height / num_orientations / channels;

    const scalar_t *offset_bottom_rois = bottom_rois + n * 6;
    int roi_batch_ind = offset_bottom_rois[0];

    // Do not using rounding; this implementation detail is critical
    scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale;
    scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale;
    scalar_t roi_width = offset_bottom_rois[3] * spatial_scale;
    scalar_t roi_height = offset_bottom_rois[4] * spatial_scale;
    // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0;
    scalar_t theta = offset_bottom_rois[5];
    // Force malformed ROIs to be 1x1
    roi_width = max(roi_width, (scalar_t)1.);
    roi_height = max(roi_height, (scalar_t)1.);
    scalar_t bin_size_h = static_cast<scalar_t>(roi_height) /
                          static_cast<scalar_t>(pooled_height);
    scalar_t bin_size_w =
        static_cast<scalar_t>(roi_width) / static_cast<scalar_t>(pooled_width);

    // find aligned index
    scalar_t ind_float = theta * num_orientations / (2 * M_PI);
    int ind = floorf(ind_float);
    scalar_t l_var = ind_float - (scalar_t)ind;
    scalar_t r_var = 1.0 - l_var;
    // correct start channel
    ind = (ind + num_orientations) % num_orientations;
    // rotated channel
    int ind_rot = (o - ind + num_orientations) % num_orientations;
    int ind_rot_plus = (ind_rot + 1 + num_orientations) % num_orientations;
    const scalar_t *offset_bottom_data =
        bottom_data + (roi_batch_ind * channels * num_orientations +
                       c * num_orientations + ind_rot) *
                          height * width;

    const scalar_t *offset_bottom_data_plus =
        bottom_data + (roi_batch_ind * channels * num_orientations +
                       c * num_orientations + ind_rot_plus) *
                          height * width;
    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h = (num_samples > 0)
                             ? num_samples
                             : ceilf(roi_height / pooled_height);  // e.g., = 2
    int roi_bin_grid_w =
        (num_samples > 0) ? num_samples : ceilf(roi_width / pooled_width);

    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
    // Appropriate translation needs to be applied after.
    if (clockwise) {
      theta = -theta;  // If clockwise, the angle needs to be reversed.
    }
    scalar_t roi_start_h = -roi_height / 2.0;
    scalar_t roi_start_w = -roi_width / 2.0;
    scalar_t cosscalar_theta = cos(theta);
    scalar_t sinscalar_theta = sin(theta);

    // We do average (integral) pooling inside a bin
    const scalar_t count = max(roi_bin_grid_h * roi_bin_grid_w, 1);  // e.g. = 4

    scalar_t output_val = 0.;
    for (int iy = 0; iy < roi_bin_grid_h; iy++) {  // e.g., iy = 0, 1
      const scalar_t yy =
          roi_start_h + ph * bin_size_h +
          static_cast<scalar_t>(iy + .5f) * bin_size_h /
              static_cast<scalar_t>(roi_bin_grid_h);  // e.g., 0.5, 1.5
      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
        const scalar_t xx = roi_start_w + pw * bin_size_w +
                            static_cast<scalar_t>(ix + .5f) * bin_size_w /
                                static_cast<scalar_t>(roi_bin_grid_w);

        // Rotate by theta (counterclockwise) around the center and translate
        scalar_t y = yy * cosscalar_theta - xx * sinscalar_theta + roi_center_h;
        scalar_t x = yy * sinscalar_theta + xx * cosscalar_theta + roi_center_w;

        scalar_t val = bilinear_interpolate<scalar_t>(
            offset_bottom_data, height, width, y, x, index);
        scalar_t val_plus = bilinear_interpolate<scalar_t>(
            offset_bottom_data_plus, height, width, y, x, index);
        output_val += r_var * val + l_var * val_plus;
      }
    }
    output_val /= count;

    top_data[index] = output_val;
  }
}

/*** Backward ***/
template <typename scalar_t>
__global__ void riroi_align_rotated_backward_cuda_kernel(
    const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois,
    const scalar_t spatial_scale, const int num_samples, const bool clockwise,
    const int channels, const int height, const int width,
    const int pooled_height, const int pooled_width, const int num_orientations,
    scalar_t *bottom_diff) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int o = (index / pooled_width / pooled_height) % num_orientations;
    int c =
        (index / pooled_width / pooled_height / num_orientations) % channels;
    int n = index / pooled_width / pooled_height / num_orientations / channels;

    const scalar_t *offset_bottom_rois = bottom_rois + n * 6;
    int roi_batch_ind = offset_bottom_rois[0];

    // Do not round
    scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale;
    scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale;
    scalar_t roi_width = offset_bottom_rois[3] * spatial_scale;
    scalar_t roi_height = offset_bottom_rois[4] * spatial_scale;
    // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0;
    scalar_t theta = offset_bottom_rois[5];
    // Force malformed ROIs to be 1x1
    roi_width = max(roi_width, (scalar_t)1.);
    roi_height = max(roi_height, (scalar_t)1.);

    scalar_t bin_size_h = static_cast<scalar_t>(roi_height) /
                          static_cast<scalar_t>(pooled_height);
    scalar_t bin_size_w =
        static_cast<scalar_t>(roi_width) / static_cast<scalar_t>(pooled_width);

    // find aligned index
    scalar_t ind_float = theta * num_orientations / (2 * M_PI);
    int ind = floorf(ind_float);
    scalar_t l_var = ind_float - (scalar_t)ind;
    scalar_t r_var = 1.0 - l_var;
    // correct start channel
    ind = (ind + num_orientations) % num_orientations;
    // rotated channel
    int ind_rot = (o - ind + num_orientations) % num_orientations;
    int ind_rot_plus = (ind_rot + 1 + num_orientations) % num_orientations;
    scalar_t *offset_bottom_diff =
        bottom_diff + (roi_batch_ind * channels * num_orientations +
                       c * num_orientations + ind_rot) *
                          height * width;
    scalar_t *offset_bottom_diff_plus =
        bottom_diff + (roi_batch_ind * channels * num_orientations +
                       c * num_orientations + ind_rot_plus) *
                          height * width;
    int top_offset =
        (n * channels * num_orientations + c * num_orientations + o) *
        pooled_height * pooled_width;
    const scalar_t *offset_top_diff = top_diff + top_offset;
    const scalar_t top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h = (num_samples > 0)
                             ? num_samples
                             : ceilf(roi_height / pooled_height);  // e.g., = 2
    int roi_bin_grid_w =
        (num_samples > 0) ? num_samples : ceilf(roi_width / pooled_width);

    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
    // Appropriate translation needs to be applied after.
    if (clockwise) {
      theta = -theta;  // If clockwise, the angle needs to be reversed.
    }
    scalar_t roi_start_h = -roi_height / 2.0;
    scalar_t roi_start_w = -roi_width / 2.0;
    scalar_t cosTheta = cos(theta);
    scalar_t sinTheta = sin(theta);

    // We do average (integral) pooling inside a bin
    const scalar_t count = roi_bin_grid_h * roi_bin_grid_w;  // e.g. = 4

    for (int iy = 0; iy < roi_bin_grid_h; iy++) {  // e.g., iy = 0, 1
      const scalar_t yy =
          roi_start_h + ph * bin_size_h +
          static_cast<scalar_t>(iy + .5f) * bin_size_h /
              static_cast<scalar_t>(roi_bin_grid_h);  // e.g., 0.5, 1.5
      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
        const scalar_t xx = roi_start_w + pw * bin_size_w +
                            static_cast<scalar_t>(ix + .5f) * bin_size_w /
                                static_cast<scalar_t>(roi_bin_grid_w);

        // Rotate by theta around the center and translate
        scalar_t y = yy * cosTheta - xx * sinTheta + roi_center_h;
        scalar_t x = yy * sinTheta + xx * cosTheta + roi_center_w;

        scalar_t w1, w2, w3, w4;
        int x_low, x_high, y_low, y_high;

        bilinear_interpolate_gradient<scalar_t>(height, width, y, x, w1, w2, w3,
                                                w4, x_low, x_high, y_low,
                                                y_high, index);

        scalar_t g1 = top_diff_this_bin * w1 / count;
        scalar_t g2 = top_diff_this_bin * w2 / count;
        scalar_t g3 = top_diff_this_bin * w3 / count;
        scalar_t g4 = top_diff_this_bin * w4 / count;

        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
          atomicAdd(offset_bottom_diff + y_low * width + x_low, g1 * r_var);
          atomicAdd(offset_bottom_diff + y_low * width + x_high, g2 * r_var);
          atomicAdd(offset_bottom_diff + y_high * width + x_low, g3 * r_var);
          atomicAdd(offset_bottom_diff + y_high * width + x_high, g4 * r_var);

          atomicAdd(offset_bottom_diff_plus + y_low * width + x_low,
                    g1 * l_var);
          atomicAdd(offset_bottom_diff_plus + y_low * width + x_high,
                    g2 * l_var);
          atomicAdd(offset_bottom_diff_plus + y_high * width + x_low,
                    g3 * l_var);
          atomicAdd(offset_bottom_diff_plus + y_high * width + x_high,
                    g4 * l_var);

        }  // if
      }    // ix
    }      // iy
  }        // CUDA_1D_KERNEL_LOOP
}  // RiRoIAlignBackward

#endif  // RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROI_ALIGN_CUDA_KERNEL_CUH
#define ROI_ALIGN_CUDA_KERNEL_CUH

#include <float.h>
#ifdef MMCV_WITH_TRT
#include "common_cuda_helper.hpp"
#else  // MMCV_WITH_TRT
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else  // MMCV_USE_PARROTS
#include "pytorch_cuda_helper.hpp"
#endif  // MMCV_USE_PARROTS
#endif  // MMCV_WITH_TRT

/*** Forward ***/
template <typename T>
__global__ void roi_align_forward_cuda_kernel(
    const int nthreads, const T* input, const T* rois, T* output, T* argmax_y,
    T* argmax_x, const int pooled_height, const int pooled_width,
    const T spatial_scale, const int sampling_ratio,
    const int pool_mode,  // 0 - max pool, 1 - avg pool
    const bool aligned, const int channels, const int height, const int width) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    const T* offset_rois = rois + n * 5;
    int roi_batch_ind = offset_rois[0];

    // Do not using rounding; this implementation detail is critical
    T offset = aligned ? (T)0.5 : (T)0.0;
    T roi_start_w = offset_rois[1] * spatial_scale - offset;
    T roi_start_h = offset_rois[2] * spatial_scale - offset;
    T roi_end_w = offset_rois[3] * spatial_scale - offset;
    T roi_end_h = offset_rois[4] * spatial_scale - offset;

    T roi_width = roi_end_w - roi_start_w;
    T roi_height = roi_end_h - roi_start_h;
    if (!aligned) {  // for backward-compatibility only
      roi_width = max(roi_width, (T)1.);
      roi_height = max(roi_height, (T)1.);
    }

    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

    const T* offset_input =
        input + (roi_batch_ind * channels + c) * height * width;

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h =
        (sampling_ratio > 0)
            ? sampling_ratio
            : static_cast<int>(ceilf(roi_height / pooled_height));
    int roi_bin_grid_w =
        (sampling_ratio > 0)
            ? sampling_ratio
            : static_cast<int>(ceilf(roi_width / pooled_width));

    if (pool_mode == 0) {
      // We do max pooling inside a bin
      T maxval = -FLT_MAX;
      T maxidx_y = -1.f, maxidx_x = -1.f;
      for (int iy = 0; iy < roi_bin_grid_h; iy++) {
        const T y = roi_start_h + ph * bin_size_h +
                    static_cast<T>(iy + .5f) * bin_size_h /
                        static_cast<T>(roi_bin_grid_h);
        for (int ix = 0; ix < roi_bin_grid_w; ix++) {
          const T x = roi_start_w + pw * bin_size_w +
                      static_cast<T>(ix + .5f) * bin_size_w /
                          static_cast<T>(roi_bin_grid_w);
          T val =
              bilinear_interpolate(offset_input, height, width, y, x, index);
          if (val > maxval) {
            maxval = val;
            maxidx_y = y;
            maxidx_x = x;
          }
        }
      }
      output[index] = maxval;
      argmax_y[index] = maxidx_y;
      argmax_x[index] = maxidx_x;
    } else if (pool_mode == 1) {
      // We do average pooling inside a bin
      const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1);
      T output_val = 0.;
      for (int iy = 0; iy < roi_bin_grid_h; iy++) {
        const T y = roi_start_h + ph * bin_size_h +
                    static_cast<T>(iy + .5f) * bin_size_h /
                        static_cast<T>(roi_bin_grid_h);
        for (int ix = 0; ix < roi_bin_grid_w; ix++) {
          const T x = roi_start_w + pw * bin_size_w +
                      static_cast<T>(ix + .5f) * bin_size_w /
                          static_cast<T>(roi_bin_grid_w);
          T val =
              bilinear_interpolate(offset_input, height, width, y, x, index);
          output_val += val;
        }
      }
      output[index] = output_val / count;
    }
  }
}

/*** Backward ***/
template <typename T>
__global__ void roi_align_backward_cuda_kernel(
    const int nthreads, const T* grad_output, const T* rois, const T* argmax_y,
    const T* argmax_x, T* grad_input, const int pooled_height,
    const int pooled_width, const T spatial_scale, const int sampling_ratio,
    const int pool_mode,  // 0 - max pool, 1 - avg pool
    const bool aligned, const int channels, const int height, const int width) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    const T grad_output_this_bin = grad_output[index];

    const T* offset_rois = rois + n * 5;
    int roi_batch_ind = offset_rois[0];
    T* offset_grad_input =
        grad_input + ((roi_batch_ind * channels + c) * height * width);

    if (pool_mode == 0) {
      T y = argmax_y[index], x = argmax_x[index];
      if (y != -1.f) {
        T w1, w2, w3, w4;
        int x_low, x_high, y_low, y_high;
        bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
                                      x_low, x_high, y_low, y_high, index);

        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
          atomicAdd(offset_grad_input + y_low * width + x_low,
                    grad_output_this_bin * w1);
          atomicAdd(offset_grad_input + y_low * width + x_high,
                    grad_output_this_bin * w2);
          atomicAdd(offset_grad_input + y_high * width + x_low,
                    grad_output_this_bin * w3);
          atomicAdd(offset_grad_input + y_high * width + x_high,
                    grad_output_this_bin * w4);
        }
      }
    } else if (pool_mode == 1) {
      // Do not using rounding; this implementation detail is critical
      T offset = aligned ? (T)0.5 : (T)0.0;
      T roi_start_w = offset_rois[1] * spatial_scale - offset;
      T roi_start_h = offset_rois[2] * spatial_scale - offset;
      T roi_end_w = offset_rois[3] * spatial_scale - offset;
      T roi_end_h = offset_rois[4] * spatial_scale - offset;

      T roi_width = roi_end_w - roi_start_w;
      T roi_height = roi_end_h - roi_start_h;
      if (!aligned) {  // for backward-compatibility only
        roi_width = max(roi_width, (T)1.);
        roi_height = max(roi_height, (T)1.);
      }

      T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
      T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

      // We use roi_bin_grid to sample the grid and mimic integral
      int roi_bin_grid_h =
          (sampling_ratio > 0)
              ? sampling_ratio
              : static_cast<int>(ceilf(roi_height / pooled_height));
      int roi_bin_grid_w =
          (sampling_ratio > 0)
              ? sampling_ratio
              : static_cast<int>(ceilf(roi_width / pooled_width));

      // We do average (integral) pooling inside a bin
      const T count = roi_bin_grid_h * roi_bin_grid_w;  // e.g. = 4

      for (int iy = 0; iy < roi_bin_grid_h; iy++) {
        const T y = roi_start_h + ph * bin_size_h +
                    static_cast<T>(iy + .5f) * bin_size_h /
                        static_cast<T>(roi_bin_grid_h);
        for (int ix = 0; ix < roi_bin_grid_w; ix++) {
          const T x = roi_start_w + pw * bin_size_w +
                      static_cast<T>(ix + .5f) * bin_size_w /
                          static_cast<T>(roi_bin_grid_w);

          T w1, w2, w3, w4;
          int x_low, x_high, y_low, y_high;
          bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
                                        x_low, x_high, y_low, y_high, index);

          if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
            atomicAdd(offset_grad_input + y_low * width + x_low,
                      grad_output_this_bin * w1 / count);
            atomicAdd(offset_grad_input + y_low * width + x_high,
                      grad_output_this_bin * w2 / count);
            atomicAdd(offset_grad_input + y_high * width + x_low,
                      grad_output_this_bin * w3 / count);
            atomicAdd(offset_grad_input + y_high * width + x_high,
                      grad_output_this_bin * w4 / count);
          }
        }
      }
    }
  }
}

#endif  // ROI_ALIGN_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
================================================
// Modified from
// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlignRotated
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
#ifndef ROI_ALIGN_ROTATED_CUDA_KERNEL_CUH
#define ROI_ALIGN_ROTATED_CUDA_KERNEL_CUH

#include <float.h>
#ifdef MMCV_WITH_TRT
#include "common_cuda_helper.hpp"
#else  // MMCV_WITH_TRT
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else  // MMCV_USE_PARROTS
#include "pytorch_cuda_helper.hpp"
#endif  // MMCV_USE_PARROTS
#endif  // MMCV_WITH_TRT

/*** Forward ***/
template <typename scalar_t>
__global__ void roi_align_rotated_forward_cuda_kernel(
    const int nthreads, const scalar_t *bottom_data,
    const scalar_t *bottom_rois, const scalar_t spatial_scale,
    const int sample_num, const bool aligned, const bool clockwise,
    const int channels, const int height, const int width,
    const int pooled_height, const int pooled_width, scalar_t *top_data) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    const scalar_t *offset_bottom_rois = bottom_rois + n * 6;
    int roi_batch_ind = offset_bottom_rois[0];

    // Do not using rounding; this implementation detail is critical
    scalar_t offset = aligned ? (scalar_t)0.5 : (scalar_t)0.0;
    scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale - offset;
    scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale - offset;
    scalar_t roi_width = offset_bottom_rois[3] * spatial_scale;
    scalar_t roi_height = offset_bottom_rois[4] * spatial_scale;
    // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0;
    scalar_t theta = offset_bottom_rois[5];
    if (clockwise) {
      theta = -theta;  // If clockwise, the angle needs to be reversed.
    }
    if (!aligned) {  // for backward-compatibility only
      // Force malformed ROIs to be 1x1
      roi_width = max(roi_width, (scalar_t)1.);
      roi_height = max(roi_height, (scalar_t)1.);
    }
    scalar_t bin_size_h = static_cast<scalar_t>(roi_height) /
                          static_cast<scalar_t>(pooled_height);
    scalar_t bin_size_w =
        static_cast<scalar_t>(roi_width) / static_cast<scalar_t>(pooled_width);

    const scalar_t *offset_bottom_data =
        bottom_data + (roi_batch_ind * channels + c) * height * width;

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h = (sample_num > 0)
                             ? sample_num
                             : ceilf(roi_height / pooled_height);  // e.g., = 2
    int roi_bin_grid_w =
        (sample_num > 0) ? sample_num : ceilf(roi_width / pooled_width);

    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
    // Appropriate translation needs to be applied after.
    scalar_t roi_start_h = -roi_height / 2.0;
    scalar_t roi_start_w = -roi_width / 2.0;
    scalar_t cosscalar_theta = cos(theta);
    scalar_t sinscalar_theta = sin(theta);

    // We do average (integral) pooling inside a bin
    const scalar_t count = max(roi_bin_grid_h * roi_bin_grid_w, 1);  // e.g. = 4

    scalar_t output_val = 0.;
    for (int iy = 0; iy < roi_bin_grid_h; iy++) {  // e.g., iy = 0, 1
      const scalar_t yy =
          roi_start_h + ph * bin_size_h +
          static_cast<scalar_t>(iy + .5f) * bin_size_h /
              static_cast<scalar_t>(roi_bin_grid_h);  // e.g., 0.5, 1.5
      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
        const scalar_t xx = roi_start_w + pw * bin_size_w +
                            static_cast<scalar_t>(ix + .5f) * bin_size_w /
                                static_cast<scalar_t>(roi_bin_grid_w);

        // Rotate by theta (counterclockwise) around the center and translate
        scalar_t y = yy * cosscalar_theta - xx * sinscalar_theta + roi_center_h;
        scalar_t x = yy * sinscalar_theta + xx * cosscalar_theta + roi_center_w;

        scalar_t val = bilinear_interpolate<scalar_t>(
            offset_bottom_data, height, width, y, x, index);
        output_val += val;
      }
    }
    output_val /= count;

    top_data[index] = output_val;
  }
}

/*** Backward ***/
template <typename scalar_t>
__global__ void roi_align_rotated_backward_cuda_kernel(
    const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois,
    const scalar_t spatial_scale, const int sample_num, const bool aligned,
    const bool clockwise, const int channels, const int height, const int width,
    const int pooled_height, const int pooled_width, scalar_t *bottom_diff) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    const scalar_t *offset_bottom_rois = bottom_rois + n * 6;
    int roi_batch_ind = offset_bottom_rois[0];

    // Do not round
    scalar_t offset = aligned ? (scalar_t)0.5 : (scalar_t)0.0;
    scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale - offset;
    scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale - offset;
    scalar_t roi_width = offset_bottom_rois[3] * spatial_scale;
    scalar_t roi_height = offset_bottom_rois[4] * spatial_scale;
    // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0;
    scalar_t theta = offset_bottom_rois[5];
    if (clockwise) {
      theta = -theta;  // If clockwise, the angle needs to be reversed.
    }
    if (!aligned) {  // for backward-compatibility only
      // Force malformed ROIs to be 1x1
      roi_width = max(roi_width, (scalar_t)1.);
      roi_height = max(roi_height, (scalar_t)1.);
    }
    scalar_t bin_size_h = static_cast<scalar_t>(roi_height) /
                          static_cast<scalar_t>(pooled_height);
    scalar_t bin_size_w =
        static_cast<scalar_t>(roi_width) / static_cast<scalar_t>(pooled_width);

    scalar_t *offset_bottom_diff =
        bottom_diff + (roi_batch_ind * channels + c) * height * width;

    int top_offset = (n * channels + c) * pooled_height * pooled_width;
    const scalar_t *offset_top_diff = top_diff + top_offset;
    const scalar_t top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h = (sample_num > 0)
                             ? sample_num
                             : ceilf(roi_height / pooled_height);  // e.g., = 2
    int roi_bin_grid_w =
        (sample_num > 0) ? sample_num : ceilf(roi_width / pooled_width);

    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
    // Appropriate translation needs to be applied after.
    scalar_t roi_start_h = -roi_height / 2.0;
    scalar_t roi_start_w = -roi_width / 2.0;
    scalar_t cosTheta = cos(theta);
    scalar_t sinTheta = sin(theta);

    // We do average (integral) pooling inside a bin
    const scalar_t count = roi_bin_grid_h * roi_bin_grid_w;  // e.g. = 4

    for (int iy = 0; iy < roi_bin_grid_h; iy++) {  // e.g., iy = 0, 1
      const scalar_t yy =
          roi_start_h + ph * bin_size_h +
          static_cast<scalar_t>(iy + .5f) * bin_size_h /
              static_cast<scalar_t>(roi_bin_grid_h);  // e.g., 0.5, 1.5
      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
        const scalar_t xx = roi_start_w + pw * bin_size_w +
                            static_cast<scalar_t>(ix + .5f) * bin_size_w /
                                static_cast<scalar_t>(roi_bin_grid_w);

        // Rotate by theta around the center and translate
        scalar_t y = yy * cosTheta - xx * sinTheta + roi_center_h;
        scalar_t x = yy * sinTheta + xx * cosTheta + roi_center_w;

        scalar_t w1, w2, w3, w4;
        int x_low, x_high, y_low, y_high;

        bilinear_interpolate_gradient<scalar_t>(height, width, y, x, w1, w2, w3,
                                                w4, x_low, x_high, y_low,
                                                y_high, index);

        scalar_t g1 = top_diff_this_bin * w1 / count;
        scalar_t g2 = top_diff_this_bin * w2 / count;
        scalar_t g3 = top_diff_this_bin * w3 / count;
        scalar_t g4 = top_diff_this_bin * w4 / count;

        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
          atomicAdd(offset_bottom_diff + y_low * width + x_low, g1);
          atomicAdd(offset_bottom_diff + y_low * width + x_high, g2);
          atomicAdd(offset_bottom_diff + y_high * width + x_low, g3);
          atomicAdd(offset_bottom_diff + y_high * width + x_high, g4);
        }  // if
      }    // ix
    }      // iy
  }        // CUDA_1D_KERNEL_LOOP
}  // RoIAlignBackward

#endif  // ROI_ALIGN_ROTATED_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/roi_pool_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROI_POOL_CUDA_KERNEL_CUH
#define ROI_POOL_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void roi_pool_forward_cuda_kernel(
    const int nthreads, const T* input, const T* rois, T* output, int* argmax,
    const int pooled_height, const int pooled_width, const T spatial_scale,
    const int channels, const int height, const int width) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    const T* offset_rois = rois + n * 5;
    int roi_batch_ind = offset_rois[0];
    // calculate the roi region on feature maps
    T roi_x1 = offset_rois[1] * spatial_scale;
    T roi_y1 = offset_rois[2] * spatial_scale;
    T roi_x2 = (offset_rois[3] + 1) * spatial_scale;
    T roi_y2 = (offset_rois[4] + 1) * spatial_scale;

    // force malformed rois to be 1x1
    T roi_w = roi_x2 - roi_x1;
    T roi_h = roi_y2 - roi_y1;
    if (roi_w <= 0 || roi_h <= 0) continue;

    T bin_size_w = roi_w / static_cast<T>(pooled_width);
    T bin_size_h = roi_h / static_cast<T>(pooled_height);

    // the corresponding bin region
    int bin_x1 = floorf(static_cast<T>(pw) * bin_size_w + roi_x1);
    int bin_y1 = floorf(static_cast<T>(ph) * bin_size_h + roi_y1);
    int bin_x2 = ceilf(static_cast<T>(pw + 1) * bin_size_w + roi_x1);
    int bin_y2 = ceilf(static_cast<T>(ph + 1) * bin_size_h + roi_y1);

    // add roi offsets and clip to input boundaries
    bin_x1 = min(max(bin_x1, 0), width);
    bin_y1 = min(max(bin_y1, 0), height);
    bin_x2 = min(max(bin_x2, 0), width);
    bin_y2 = min(max(bin_y2, 0), height);
    bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1);

    const T* offset_input =
        input + (roi_batch_ind * channels + c) * height * width;
    // Define an empty pooling region to be zero
    // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
    T max_val = is_empty ? 0 : -FLT_MAX;
    int max_idx = -1;
    for (int h = bin_y1; h < bin_y2; ++h) {
      for (int w = bin_x1; w < bin_x2; ++w) {
        int offset = h * width + w;
        if (offset_input[offset] > max_val) {
          max_val = offset_input[offset];
          max_idx = offset;
        }
      }
    }
    output[index] = max_val;
    if (argmax != NULL) argmax[index] = max_idx;
  }
}

template <typename T>
__global__ void roi_pool_backward_cuda_kernel(
    const int nthreads, const T* grad_output, const T* rois, const int* argmax,
    T* grad_input, const int pooled_height, const int pooled_width,
    const int channels, const int height, const int width) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c) is an element in the pooled output
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    int roi_batch_ind = rois[n * 5];
    T* grad_input_offset =
        grad_input + ((roi_batch_ind * channels + c) * height * width);
    int argmax_index = argmax[index];

    if (argmax_index != -1) {
      atomicAdd(grad_input_offset + argmax_index, grad_output[index]);
    }
  }
}

#endif  // ROI_POOL_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROIAWARE_POOL3D_CUDA_KERNEL_CUH
#define ROIAWARE_POOL3D_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz,
                                             T &local_x, T &local_y) {
  T cosa = cos(-rz), sina = sin(-rz);
  local_x = shift_x * cosa + shift_y * (-sina);
  local_y = shift_x * sina + shift_y * cosa;
}

template <typename T>
__device__ inline int check_pt_in_box3d(const T *pt, const T *box3d, T &local_x,
                                        T &local_y) {
  // param pt: (x, y, z)
  // param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate,
  // cz in the bottom center
  T x = pt[0], y = pt[1], z = pt[2];
  T cx = box3d[0], cy = box3d[1], cz = box3d[2];
  T x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6];
  cz += z_size /
        2.0;  // shift to the center since cz in box3d is the bottom center

  if (fabsf(z - cz) > z_size / 2.0) return 0;
  lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
  float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) &
                  (local_y > -y_size / 2.0) & (local_y < y_size / 2.0);
  return in_flag;
}

template <typename T>
__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num,
                                            int out_x, int out_y, int out_z,
                                            const T *rois, const T *pts,
                                            int *pts_mask) {
  // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate params pts: (npoints, 3) [x, y, z] params pts_mask: (N,
  // npoints): -1 means point does not in this box, otherwise: encode (x_idxs,
  // y_idxs, z_idxs) by binary bit
  int box_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) {
    if (box_idx >= boxes_num) return;

    pts += pt_idx * 3;
    rois += box_idx * 7;
    pts_mask += box_idx * pts_num + pt_idx;

    T local_x = 0, local_y = 0;
    int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y);

    pts_mask[0] = -1;
    if (cur_in_flag > 0) {
      T local_z = pts[2] - rois[2];
      T x_size = rois[3], y_size = rois[4], z_size = rois[5];

      T x_res = x_size / out_x;
      T y_res = y_size / out_y;
      T z_res = z_size / out_z;

      unsigned int x_idx = int((local_x + x_size / 2) / x_res);
      unsigned int y_idx = int((local_y + y_size / 2) / y_res);
      unsigned int z_idx = int(local_z / z_res);

      x_idx = min(max(x_idx, 0), out_x - 1);
      y_idx = min(max(y_idx, 0), out_y - 1);
      z_idx = min(max(z_idx, 0), out_z - 1);

      unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx;

      pts_mask[0] = idx_encoding;
    }
  }
}

template <typename T>
__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num,
                                             int max_pts_each_voxel, int out_x,
                                             int out_y, int out_z,
                                             const int *pts_mask,
                                             T *pts_idx_of_voxels) {
  // params pts_mask: (N, npoints)  0 or 1
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
  CUDA_1D_KERNEL_LOOP(box_idx, boxes_num) {
    int max_num_pts = max_pts_each_voxel - 1;  // index 0 is the counter
    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel;

    for (int k = 0; k < pts_num; k++) {
      if (pts_mask[box_idx * pts_num + k] != -1) {
        unsigned int idx_encoding = pts_mask[box_idx * pts_num + k];
        unsigned int x_idx = (idx_encoding >> 16) & 0xFF;
        unsigned int y_idx = (idx_encoding >> 8) & 0xFF;
        unsigned int z_idx = idx_encoding & 0xFF;
        unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel +
                                   y_idx * out_z * max_pts_each_voxel +
                                   z_idx * max_pts_each_voxel;
        unsigned int cnt = pts_idx_of_voxels[base_offset];
        if (cnt < max_num_pts) {
          pts_idx_of_voxels[base_offset + cnt + 1] = k;
          pts_idx_of_voxels[base_offset]++;
        }
      }
    }
  }
}

template <typename T>
__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels,
                                   int max_pts_each_voxel, int out_x, int out_y,
                                   int out_z, const T *pts_feature,
                                   const int *pts_idx_of_voxels,
                                   T *pooled_features, int *argmax) {
  // params pts_feature: (npoints, C)
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
  // index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
  // params argmax: (N, out_x, out_y, out_z, C)

  int box_idx = blockIdx.z;
  int channel_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) {
    int x_idx = voxel_idx_flat / (out_y * out_z);
    int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
    int z_idx = voxel_idx_flat % out_z;
    if (box_idx >= boxes_num || channel_idx >= channels) return;

    int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
                         offset_base * max_pts_each_voxel;
    pooled_features += box_idx * out_x * out_y * out_z * channels +
                       offset_base * channels + channel_idx;
    argmax += box_idx * out_x * out_y * out_z * channels +
              offset_base * channels + channel_idx;

    int argmax_idx = -1;
    float max_val = -1e50;

    int total_pts = pts_idx_of_voxels[0];

    for (int k = 1; k <= total_pts; k++) {
      if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] >
          max_val) {
        max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
        argmax_idx = pts_idx_of_voxels[k];
      }
    }

    if (argmax_idx != -1) {
      pooled_features[0] = max_val;
    }
    argmax[0] = argmax_idx;
  }
}

template <typename T>
__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels,
                                   int max_pts_each_voxel, int out_x, int out_y,
                                   int out_z, const T *pts_feature,
                                   const int *pts_idx_of_voxels,
                                   T *pooled_features) {
  // params pts_feature: (npoints, C)
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
  // index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
  // params argmax: (N, out_x, out_y, out_z, C)

  int box_idx = blockIdx.z;
  int channel_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) {
    int x_idx = voxel_idx_flat / (out_y * out_z);
    int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
    int z_idx = voxel_idx_flat % out_z;
    if (box_idx >= boxes_num || channel_idx >= channels) return;

    int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
                         offset_base * max_pts_each_voxel;
    pooled_features += box_idx * out_x * out_y * out_z * channels +
                       offset_base * channels + channel_idx;

    float sum_val = 0;
    int total_pts = pts_idx_of_voxels[0];

    for (int k = 1; k <= total_pts; k++) {
      sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
    }

    if (total_pts > 0) {
      pooled_features[0] = sum_val / total_pts;
    }
  }
}

template <typename T>
__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels,
                                            int out_x, int out_y, int out_z,
                                            const int *argmax,
                                            const T *grad_out, T *grad_in) {
  // params argmax: (N, out_x, out_y, out_z, C)
  // params grad_out: (N, out_x, out_y, out_z, C)
  // params grad_in: (npoints, C), return value

  int box_idx = blockIdx.z;
  int channel_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) {
    int x_idx = voxel_idx_flat / (out_y * out_z);
    int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
    int z_idx = voxel_idx_flat % out_z;
    if (box_idx >= boxes_num || channel_idx >= channels) return;

    int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
    argmax += box_idx * out_x * out_y * out_z * channels +
              offset_base * channels + channel_idx;
    grad_out += box_idx * out_x * out_y * out_z * channels +
                offset_base * channels + channel_idx;

    if (argmax[0] == -1) return;

    atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1);
  }
}

template <typename T>
__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels,
                                            int out_x, int out_y, int out_z,
                                            int max_pts_each_voxel,
                                            const int *pts_idx_of_voxels,
                                            const T *grad_out, T *grad_in) {
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
  // params grad_out: (N, out_x, out_y, out_z, C)
  // params grad_in: (npoints, C), return value

  int box_idx = blockIdx.z;
  int channel_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) {
    int x_idx = voxel_idx_flat / (out_y * out_z);
    int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
    int z_idx = voxel_idx_flat % out_z;
    if (box_idx >= boxes_num || channel_idx >= channels) return;

    int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
                         offset_base * max_pts_each_voxel;
    grad_out += box_idx * out_x * out_y * out_z * channels +
                offset_base * channels + channel_idx;

    int total_pts = pts_idx_of_voxels[0];
    float cur_grad = 1 / fmaxf(float(total_pts), 1.0);
    for (int k = 1; k <= total_pts; k++) {
      atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx,
                grad_out[0] * cur_grad);
    }
  }
}

#endif  // ROIAWARE_POOL3D_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROIPOINT_POOL3D_CUDA_KERNEL_CUH
#define ROIPOINT_POOL3D_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz,
                                             T &local_x, T &local_y) {
  T cosa = cos(-rz), sina = sin(-rz);
  local_x = shift_x * cosa + shift_y * (-sina);
  local_y = shift_x * sina + shift_y * cosa;
}

template <typename T>
__device__ inline int check_pt_in_box3d(const T *pt, const T *box3d, T &local_x,
                                        T &local_y) {
  // param pt: (x, y, z)
  // param box3d: (cx, cy, cz, dx, dy, dz, rz) in LiDAR coordinate, cz in the
  // bottom center
  T x = pt[0], y = pt[1], z = pt[2];
  T cx = box3d[0], cy = box3d[1], cz = box3d[2];
  T dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6];
  cz += dz / 2.0;  // shift to the center since cz in box3d is the bottom center

  if (fabsf(z - cz) > dz / 2.0) return 0;
  lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
  T in_flag = (local_x > -dx / 2.0) & (local_x < dx / 2.0) &
              (local_y > -dy / 2.0) & (local_y < dy / 2.0);
  return in_flag;
}

template <typename T>
__global__ void assign_pts_to_box3d(int batch_size, int pts_num, int boxes_num,
                                    const T *xyz, const T *boxes3d,
                                    int *pts_assign) {
  // params xyz: (B, N, 3)
  // params boxes3d: (B, M, 7)
  // params pts_assign: (B, N, M): idx of the corresponding box3d, -1 means
  // background points
  int box_idx = blockIdx.y;
  int bs_idx = blockIdx.z;
  CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) {
    if (box_idx >= boxes_num || bs_idx >= batch_size) return;

    int assign_idx =
        bs_idx * pts_num * boxes_num + pt_idx * boxes_num + box_idx;
    pts_assign[assign_idx] = 0;

    int box_offset = bs_idx * boxes_num * 7 + box_idx * 7;
    int pt_offset = bs_idx * pts_num * 3 + pt_idx * 3;

    T local_x = 0, local_y = 0;
    int cur_in_flag = check_pt_in_box3d(xyz + pt_offset, boxes3d + box_offset,
                                        local_x, local_y);
    pts_assign[assign_idx] = cur_in_flag;
  }
}

__global__ void get_pooled_idx(int batch_size, int pts_num, int boxes_num,
                               int sampled_pts_num, const int *pts_assign,
                               int *pts_idx, int *pooled_empty_flag) {
  // params xyz: (B, N, 3)
  // params pts_feature: (B, N, C)
  // params pts_assign: (B, N)
  // params pts_idx: (B, M, 512)
  // params pooled_empty_flag: (B, M)
  CUDA_1D_KERNEL_LOOP(boxes_idx, boxes_num) {
    int bs_idx = blockIdx.y;

    int cnt = 0;
    for (int k = 0; k < pts_num; k++) {
      if (pts_assign[bs_idx * pts_num * boxes_num + k * boxes_num +
                     boxes_idx]) {
        if (cnt < sampled_pts_num) {
          pts_idx[bs_idx * boxes_num * sampled_pts_num +
                  boxes_idx * sampled_pts_num + cnt] = k;
          cnt++;
        } else
          break;
      }
    }

    if (cnt == 0) {
      pooled_empty_flag[bs_idx * boxes_num + boxes_idx] = 1;
    } else if (cnt < sampled_pts_num) {
      // duplicate same points for sampling
      for (int k = cnt; k < sampled_pts_num; k++) {
        int duplicate_idx = k % cnt;
        int base_offset =
            bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num;
        pts_idx[base_offset + k] = pts_idx[base_offset + duplicate_idx];
      }
    }
  }
}

template <typename T>
__global__ void roipoint_pool3d_forward(
    int batch_size, int pts_num, int boxes_num, int feature_in_len,
    int sampled_pts_num, const T *xyz, const int *pts_idx, const T *pts_feature,
    T *pooled_features, int *pooled_empty_flag) {
  // params xyz: (B, N, 3)
  // params pts_idx: (B, M, 512)
  // params pts_feature: (B, N, C)
  // params pooled_features: (B, M, 512, 3+C)
  // params pooled_empty_flag: (B, M)
  int box_idx = blockIdx.y;
  int bs_idx = blockIdx.z;
  CUDA_1D_KERNEL_LOOP(sample_pt_idx, sampled_pts_num) {
    if (box_idx >= boxes_num || bs_idx >= batch_size) return;
    if (pooled_empty_flag[bs_idx * boxes_num + box_idx]) return;

    int temp_idx = bs_idx * boxes_num * sampled_pts_num +
                   box_idx * sampled_pts_num + sample_pt_idx;
    int src_pt_idx = pts_idx[temp_idx];
    int dst_feature_offset = temp_idx * (3 + feature_in_len);

    for (int j = 0; j < 3; j++)
      pooled_features[dst_feature_offset + j] =
          xyz[bs_idx * pts_num * 3 + src_pt_idx * 3 + j];

    int src_feature_offset =
        bs_idx * pts_num * feature_in_len + src_pt_idx * feature_in_len;
    memcpy(pooled_features + dst_feature_offset + 3,
           pts_feature + src_feature_offset, feature_in_len * sizeof(T));
  }
}

#endif  // ROIPOINT_POOL3D_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_kernel.cu
#ifndef ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH
#define ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename scalar_t>
__global__ void rotated_feature_align_forward_kernel(
    const int nthreads, const int points, const scalar_t* bottom_data,
    const scalar_t* best_bboxes, const scalar_t spatial_scale,
    const int channels, const int height, const int width, scalar_t* top_data) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int w = index % width;
    int h = (index / width) % height;
    int c = (index / width / height) % channels;
    int n = index / width / height / channels;

    const scalar_t* bbox_offset =
        best_bboxes + ((n * height + h) * width + w) * 5;
    scalar_t roi_y = bbox_offset[0] * spatial_scale;
    scalar_t roi_x = bbox_offset[1] * spatial_scale;

    scalar_t px[5] = {roi_x, 0, 0, 0, 0};
    scalar_t py[5] = {roi_y, 0, 0, 0, 0};

    if (points > 1) {
      scalar_t roi_w = bbox_offset[2] * spatial_scale;
      scalar_t roi_h = bbox_offset[3] * spatial_scale;
      scalar_t roi_a = bbox_offset[4];

      scalar_t w_2 = roi_w / 2, h_2 = roi_h / 2;
      scalar_t cosa = cosf(roi_a), sina = sinf(roi_a);
      scalar_t wx = cosa * w_2, wy = sina * w_2;
      scalar_t hx = -sina * h_2, hy = cosa * h_2;

      px[1] = roi_x + wx + hx;
      py[1] = roi_y + wy + hy;
      px[2] = roi_x - wx + hx;
      py[2] = roi_y - wy + hy;
      px[3] = roi_x - wx - hx;
      py[3] = roi_y - wy - hy;
      px[4] = roi_x + wx - hx;
      py[4] = roi_y + wy - hy;
    }

    const scalar_t* offset_bottom_data =
        bottom_data + (n * channels + c) * height * width;

    scalar_t output_val = bottom_data[index];
    for (int i = 0; i < points; i++) {
      output_val += bilinear_interpolate<scalar_t>(offset_bottom_data, height,
                                                   width, py[i], px[i], i);
    }
    top_data[index] = output_val;
  }
}

template <typename scalar_t>
__global__ void rotated_feature_align_backward_kernel(
    const int nthreads, const int points, const scalar_t* top_diff,
    const scalar_t* best_bboxes, const scalar_t spatial_scale,
    const int channels, const int height, const int width,
    scalar_t* bottom_diff) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int w = index % width;
    int h = (index / width) % height;
    int c = (index / width / height) % channels;
    int n = index / width / height / channels;

    const scalar_t* bbox_offset =
        best_bboxes + ((n * height + h) * width + w) * 5;
    scalar_t roi_y = bbox_offset[0] * spatial_scale;
    scalar_t roi_x = bbox_offset[1] * spatial_scale;

    scalar_t px[5] = {roi_x, 0, 0, 0, 0};
    scalar_t py[5] = {roi_y, 0, 0, 0, 0};

    if (points > 1) {
      scalar_t roi_w = bbox_offset[2] * spatial_scale;
      scalar_t roi_h = bbox_offset[3] * spatial_scale;
      scalar_t roi_a = bbox_offset[4];

      scalar_t w_2 = roi_w / 2, h_2 = roi_h / 2;
      scalar_t cosa = cosf(roi_a), sina = sinf(roi_a);
      scalar_t wx = cosa * w_2, wy = sina * w_2;
      scalar_t hx = -sina * h_2, hy = cosa * h_2;

      px[1] = roi_x + wx + hx;
      py[1] = roi_y + wy + hy;
      px[2] = roi_x - wx + hx;
      py[2] = roi_y - wy + hy;
      px[3] = roi_x - wx - hx;
      py[3] = roi_y - wy - hy;
      px[4] = roi_x + wx - hx;
      py[4] = roi_y + wy - hy;
    }

    scalar_t* offset_bottom_diff =
        bottom_diff + (n * channels + c) * height * width;
    scalar_t value_top_diff = top_diff[index];

    atomicAdd(bottom_diff + index, value_top_diff);
    for (int i = 0; i < points; i++) {
      scalar_t w1, w2, w3, w4;
      int x_low, x_high, y_low, y_high;

      bilinear_interpolate_gradient<scalar_t>(height, width, py[i], px[i], w1,
                                              w2, w3, w4, x_low, x_high, y_low,
                                              y_high, i);
      scalar_t g1 = value_top_diff * w1;
      scalar_t g2 = value_top_diff * w2;
      scalar_t g3 = value_top_diff * w3;
      scalar_t g4 = value_top_diff * w4;
      if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
        atomicAdd(offset_bottom_diff + y_low * width + x_low, g1);
        atomicAdd(offset_bottom_diff + y_low * width + x_high, g2);
        atomicAdd(offset_bottom_diff + y_high * width + x_low, g3);
        atomicAdd(offset_bottom_diff + y_high * width + x_high, g4);
      }
    }
  }
}
#endif  // ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef SCATTER_POINTS_CUDA_KERNEL_CUH
#define SCATTER_POINTS_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;
int const maxGridDim = 50000;

__device__ __forceinline__ static void reduceMax(float *address, float val) {
  int *address_as_i = reinterpret_cast<int *>(address);
  int old = *address_as_i, assumed;
  do {
    assumed = old;
    old = atomicCAS(address_as_i, assumed,
                    __float_as_int(fmaxf(val, __int_as_float(assumed))));
  } while (assumed != old || __int_as_float(old) < val);
}

__device__ __forceinline__ static void reduceMax(double *address, double val) {
  unsigned long long *address_as_ull =
      reinterpret_cast<unsigned long long *>(address);
  unsigned long long old = *address_as_ull, assumed;
  do {
    assumed = old;
    old = atomicCAS(
        address_as_ull, assumed,
        __double_as_longlong(fmax(val, __longlong_as_double(assumed))));
  } while (assumed != old || __longlong_as_double(old) < val);
}

// get rid of meaningless warnings when compiling host code
#ifdef HIP_DIFF
__device__ __forceinline__ static void reduceAdd(float *address, float val) {
  atomicAdd(address, val);
}
__device__ __forceinline__ static void reduceAdd(double *address, double val) {
  atomicAdd(address, val);
}
#else
#ifdef __CUDA_ARCH__
__device__ __forceinline__ static void reduceAdd(float *address, float val) {
#if (__CUDA_ARCH__ < 200)
#ifdef _MSC_VER
#pragma message( \
    "compute capability lower than 2.x. fall back to use CAS version of atomicAdd for float32")
#else
#warning \
    "compute capability lower than 2.x. fall back to use CAS version of atomicAdd for float32"
#endif
  int *address_as_i = reinterpret_cast<int *>(address);
  int old = *address_as_i, assumed;
  do {
    assumed = old;
    old = atomicCAS(address_as_i, assumed,
                    __float_as_int(val + __int_as_float(assumed)));
  } while (assumed != old);
#else
  atomicAdd(address, val);
#endif
}

__device__ __forceinline__ static void reduceAdd(double *address, double val) {
#if (__CUDA_ARCH__ < 600)
#ifdef _MSC_VER
#pragma message( \
    "compute capability lower than 6.x. fall back to use CAS version of atomicAdd for float64")
#else
#warning \
    "compute capability lower than 6.x. fall back to use CAS version of atomicAdd for float64"
#endif
  unsigned long long *address_as_ull =
      reinterpret_cast<unsigned long long *>(address);
  unsigned long long old = *address_as_ull, assumed;
  do {
    assumed = old;
    old = atomicCAS(address_as_ull, assumed,
                    __double_as_longlong(val + __longlong_as_double(assumed)));
  } while (assumed != old);
#else
  atomicAdd(address, val);
#endif
}
#endif  // __CUDA_ARCH__
#endif  // HIP_DIFF

template <typename T>
__global__ void feats_reduce_kernel(
    const T *feats, const int32_t *coors_map,
    T *reduced_feats,  // shall be 0 at initialization
    const int num_input, const int num_feats, const reduce_t reduce_type) {
  CUDA_1D_KERNEL_LOOP(x, num_input) {
    int32_t reduce_to = coors_map[x];
    if (reduce_to == -1) continue;

    const T *feats_offset = feats + x * num_feats;
    T *reduced_feats_offset = reduced_feats + reduce_to * num_feats;
    if (reduce_type == reduce_t::MAX) {
      for (int i = 0; i < num_feats; i++) {
        reduceMax(&reduced_feats_offset[i], feats_offset[i]);
      }
    } else {
      for (int i = 0; i < num_feats; i++) {
        reduceAdd(&reduced_feats_offset[i], feats_offset[i]);
      }
    }
  }
}

template <typename T>
__global__ void add_reduce_traceback_grad_kernel(
    T *grad_feats, const T *grad_reduced_feats, const int32_t *coors_map,
    const int32_t *reduce_count, const int num_input, const int num_feats,
    const reduce_t reduce_type) {
  CUDA_1D_KERNEL_LOOP(x, num_input) {
    int32_t reduce_to = coors_map[x];
    if (reduce_to == -1) {
      continue;
    }

    const int input_offset = x * num_feats;
    T *grad_feats_offset = grad_feats + input_offset;
    const int reduced_offset = reduce_to * num_feats;
    const T *grad_reduced_feats_offset = grad_reduced_feats + reduced_offset;

    if (reduce_type == reduce_t::SUM) {
      for (int i = 0; i < num_feats; i++) {
        grad_feats_offset[i] = grad_reduced_feats_offset[i];
      }
    } else if (reduce_type == reduce_t::MEAN) {
      for (int i = 0; i < num_feats; i++) {
        grad_feats_offset[i] = grad_reduced_feats_offset[i] /
                               static_cast<T>(reduce_count[reduce_to]);
      }
    }
  }
}

template <typename T>
__global__ void max_reduce_traceback_scatter_idx_kernel(
    const T *feats, const T *reduced_feats, int32_t *reduce_from,
    const int32_t *coors_map, const int num_input, const int num_feats) {
  CUDA_1D_KERNEL_LOOP(x, num_input) {
    int32_t reduce_to = coors_map[x];

    const int input_offset = x * num_feats;
    const T *feats_offset = feats + input_offset;

    if (reduce_to == -1) {
      continue;
    }

    const int reduced_offset = reduce_to * num_feats;
    const T *reduced_feats_offset = reduced_feats + reduced_offset;
    int32_t *reduce_from_offset = reduce_from + reduced_offset;

    for (int i = 0; i < num_feats; i++) {
      if (feats_offset[i] == reduced_feats_offset[i]) {
        atomicMin(&reduce_from_offset[i], static_cast<int32_t>(x));
      }
    }
  }
}

template <typename T>
__global__ void max_reduce_scatter_grad_kernel(T *grad_feats,
                                               const T *grad_reduced_feats,
                                               const int32_t *reduce_from,
                                               const int num_reduced,
                                               const int num_feats) {
  CUDA_1D_KERNEL_LOOP(x, num_reduced) {
    const int reduced_offset = x * num_feats;
    const int32_t *scatter_to_offset = reduce_from + reduced_offset;
    const T *grad_reduced_feats_offset = grad_reduced_feats + reduced_offset;

    for (int i = 0; i < num_feats; i++) {
      grad_feats[scatter_to_offset[i] * num_feats + i] =
          grad_reduced_feats_offset[i];
    }
  }
}

#endif  // SCATTER_POINTS_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/sigmoid_focal_loss_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH
#define SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void sigmoid_focal_loss_forward_cuda_kernel(
    const int nthreads, const T* input, const int64_t* target, const T* weight,
    T* output, const T gamma, const T alpha, const int num_classes) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int n = index / num_classes;
    int c = index % num_classes;

    int64_t t = target[n];
    T flag_p = (t == c);
    T flag_n = (t != c);

    // p = sigmoid(x) = 1. / 1. + expf(-x)
    T p = (T)1. / ((T)1. + expf(-input[index]));

    // (1 - p)**gamma * log(p)
    T term_p = pow(((T)1. - p), gamma) * log(max(p, (T)FLT_MIN));
    // p**gamma * log(1 - p)
    T term_n = pow(p, gamma) * log(max((T)1. - p, (T)FLT_MIN));

    output[index] = (T)0.;
    output[index] += -flag_p * alpha * term_p;
    output[index] += -flag_n * ((T)1. - alpha) * term_n;
    if (weight != NULL) {
      output[index] *= weight[t];
    }
  }
}

template <typename T>
__global__ void sigmoid_focal_loss_backward_cuda_kernel(
    const int nthreads, const T* input, const int64_t* target, const T* weight,
    T* grad_input, const T gamma, const T alpha, const int num_classes) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int n = index / num_classes;
    int c = index % num_classes;

    int64_t t = target[n];
    T flag_p = (t == c);
    T flag_n = (t != c);

    // p = sigmoid(x) = 1. / 1. + expf(-x)
    T p = (T)1. / ((T)1. + exp(-input[index]));

    // (1 - p)**gamma * (1 - p - gamma*p*log(p))
    T term_p = pow(((T)1. - p), gamma) *
               ((T)1. - p - (gamma * p * log(max(p, (T)FLT_MIN))));
    // p**gamma * (gamma * (1 - p) * log(1 - p) - p)
    T term_n = pow(p, gamma) *
               (gamma * ((T)1. - p) * log(max((T)1. - p, (T)FLT_MIN)) - p);

    grad_input[index] = (T)0.;
    grad_input[index] += -flag_p * alpha * term_p;
    grad_input[index] += -flag_n * ((T)1. - alpha) * term_n;
    if (weight != NULL) {
      grad_input[index] *= weight[t];
    }
  }
}

#endif  // SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/softmax_focal_loss_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH
#define SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void softmax_focal_loss_forward_cuda_kernel(
    const int nthreads, const T* softmax, const int64_t* target,
    const T* weight, T* output, const T gamma, const T alpha,
    const int num_classes) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int64_t label = target[index];
    T pred = softmax[index * num_classes + label];

    if (label >= 0) {
      output[index] =
          -alpha * pow((T)1. - pred, gamma) * log(max(pred, (T)FLT_MIN));
    } else {
      output[index] = 0;
    }
    if (weight != NULL) {
      output[index] *= weight[label];
    }
  }
}

template <typename T>
__global__ void softmax_focal_loss_backward_cuda1_kernel(
    const int nthreads, const T* softmax, const int64_t* target,
    const T* weight, T* buff, const T gamma, const T alpha,
    const int num_classes) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int64_t label = target[index];
    T pred = softmax[index * num_classes + label];

    if (label >= 0) {
      buff[index] = alpha * (-pow((T)1. - pred, gamma) +
                             gamma * pow((T)1. - pred, gamma - 1) * pred *
                                 log(max(pred, (T)FLT_MIN)));
    } else {
      buff[index] = 0;
    }
    if (weight != NULL) {
      buff[index] *= weight[label];
    }
  }
}

template <typename T>
__global__ void softmax_focal_loss_backward_cuda2_kernel(
    const int nthreads, const T* softmax, const int64_t* target, const T* buff,
    T* grad_input, const int num_classes) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int n = index / num_classes;
    int c = index % num_classes;
    int64_t label = target[n];

    if (label >= 0) {
      T flag = (label == c ? (T)1. : (T)0.);
      grad_input[index] = buff[n] * (flag - softmax[index]);
    } else {
      grad_input[index] = 0;
    }
  }
}

#endif  // SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/sync_bn_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef SYNCBN_CUDA_KERNEL_CUH
#define SYNCBN_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void sync_bn_forward_mean_cuda_kernel(const T *input, float *mean,
                                                 int num, int channels,
                                                 int spatial) {
  __shared__ float buffer[THREADS_PER_BLOCK];
  int tid = threadIdx.x;
  int c = blockIdx.x;
  buffer[tid] = 0;
  for (int i = tid; i < num * spatial; i += blockDim.x) {
    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
    buffer[tid] += input[index];
  }
  __syncthreads();

  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
    if (tid < s) {
      buffer[tid] += buffer[tid + s];
    }
    __syncthreads();
  }
  int total = num * spatial;
  if (tid == 0) {
    mean[c] = buffer[0] / total;
  }
}

template <>
__global__ void sync_bn_forward_mean_cuda_kernel(const phalf *input,
                                                 float *mean, int num,
                                                 int channels, int spatial) {
  __shared__ float buffer[THREADS_PER_BLOCK];
  int tid = threadIdx.x;
  int c = blockIdx.x;
  buffer[tid] = 0;
  for (int i = tid; i < num * spatial; i += blockDim.x) {
    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
    buffer[tid] += static_cast<float>(input[index]);
  }
  __syncthreads();

  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
    if (tid < s) {
      buffer[tid] += buffer[tid + s];
    }
    __syncthreads();
  }
  int total = num * spatial;
  if (tid == 0) {
    mean[c] = buffer[0] / total;
  }
}

template <typename T>
__global__ void sync_bn_forward_var_cuda_kernel(const T *input,
                                                const float *mean, float *var,
                                                int num, int channels,
                                                int spatial) {
  __shared__ float buffer[THREADS_PER_BLOCK];
  int tid = threadIdx.x;
  int c = blockIdx.x;
  buffer[tid] = 0;
  for (int i = tid; i < num * spatial; i += blockDim.x) {
    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
    float td = input[index] - mean[c];
    buffer[tid] += td * td;
  }
  __syncthreads();
  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
    if (tid < s) {
      buffer[tid] += buffer[tid + s];
    }
    __syncthreads();
  }
  int total = num * spatial;
  if (tid == 0) {
    var[c] = buffer[0] / total;
  }
}

template <>
__global__ void sync_bn_forward_var_cuda_kernel(const phalf *input,
                                                const float *mean, float *var,
                                                int num, int channels,
                                                int spatial) {
  __shared__ float buffer[THREADS_PER_BLOCK];
  int tid = threadIdx.x;
  int c = blockIdx.x;
  buffer[tid] = 0;
  for (int i = tid; i < num * spatial; i += blockDim.x) {
    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
    float td = static_cast<float>(input[index]) - mean[c];
    buffer[tid] += td * td;
  }
  __syncthreads();
  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
    if (tid < s) {
      buffer[tid] += buffer[tid + s];
    }
    __syncthreads();
  }
  int total = num * spatial;
  if (tid == 0) {
    var[c] = buffer[0] / total;
  }
}

template <typename T>
__global__ void sync_bn_forward_output_cuda_kernel(
    const T *input, const float *mean, const float *var, float *running_mean,
    float *running_var, const float *weight, const float *bias, float *norm,
    float *std, T *output, int num, int channels, int spatial, float eps,
    float momentum, int group_size) {
  int tid = threadIdx.x;
  int c = blockIdx.x;
  float mean_value = mean[c];
  float std_value = sqrt(var[c] + eps);

  if (weight != nullptr) {
    float weight_value = weight[c];
    float bias_value = bias[c];
    if (norm != nullptr) {
      for (int i = tid; i < num * spatial; i += blockDim.x) {
        int index =
            (i / spatial) * channels * spatial + c * spatial + i % spatial;
        norm[index] = (input[index] - mean_value) / std_value;
        output[index] = norm[index] * weight_value + bias_value;
      }
    } else {
      for (int i = tid; i < num * spatial; i += blockDim.x) {
        int index =
            (i / spatial) * channels * spatial + c * spatial + i % spatial;
        output[index] =
            (input[index] - mean_value) / std_value * weight_value + bias_value;
      }
    }
  } else {
    if (norm != nullptr) {
      for (int i = tid; i < num * spatial; i += blockDim.x) {
        int index =
            (i / spatial) * channels * spatial + c * spatial + i % spatial;
        output[index] = norm[index] = (input[index] - mean_value) / std_value;
      }
    } else {
      for (int i = tid; i < num * spatial; i += blockDim.x) {
        int index =
            (i / spatial) * channels * spatial + c * spatial + i % spatial;
        output[index] = (input[index] - mean_value) / std_value;
      }
    }
  }
  if (tid == 0) {
    if (std != nullptr) std[c] = std_value;
    if (running_mean != nullptr) {
      running_mean[c] =
          momentum * mean_value + (1 - momentum) * running_mean[c];
      int count = num * spatial * group_size;
      float var_unbias = count > 1 ? var[c] * count / (count - 1) : var[c];
      running_var[c] = momentum * var_unbias + (1 - momentum) * running_var[c];
    }
  }
}

template <>
__global__ void sync_bn_forward_output_cuda_kernel(
    const phalf *input, const float *mean, const float *var,
    float *running_mean, float *running_var, const float *weight,
    const float *bias, float *norm, float *std, phalf *output, int num,
    int channels, int spatial, float eps, float momentum, int group_size) {
  int tid = threadIdx.x;
  int c = blockIdx.x;
  float mean_value = mean[c];
  float std_value = sqrt(var[c] + eps);
  if (weight != nullptr) {
    float weight_value = weight[c];
    float bias_value = bias[c];
    if (norm != nullptr) {
      for (int i = tid; i < num * spatial; i += blockDim.x) {
        int index =
            (i / spatial) * channels * spatial + c * spatial + i % spatial;
        norm[index] =
            (static_cast<float>(input[index]) - mean_value) / std_value;
        output[index] =
            static_cast<phalf>(norm[index] * weight_value + bias_value);
      }
    } else {
      for (int i = tid; i < num * spatial; i += blockDim.x) {
        int index =
            (i / spatial) * channels * spatial + c * spatial + i % spatial;
        output[index] =
            static_cast<phalf>((static_cast<float>(input[index]) - mean_value) /
                                   std_value * weight_value +
                               bias_value);
      }
    }
  } else {
    if (norm != nullptr) {
      for (int i = tid; i < num * spatial; i += blockDim.x) {
        int index =
            (i / spatial) * channels * spatial + c * spatial + i % spatial;
        norm[index] =
            (static_cast<float>(input[index]) - mean_value) / std_value;
        output[index] = static_cast<phalf>(norm[index]);
      }
    } else {
      for (int i = tid; i < num * spatial; i += blockDim.x) {
        int index =
            (i / spatial) * channels * spatial + c * spatial + i % spatial;
        output[index] = static_cast<phalf>(
            (static_cast<float>(input[index]) - mean_value) / std_value);
      }
    }
  }
  if (tid == 0) {
    if (std != nullptr) std[c] = std_value;
    if (running_mean != nullptr) {
      running_mean[c] =
          momentum * mean_value + (1 - momentum) * running_mean[c];
      int count = num * spatial * group_size;
      float var_unbias = count > 1 ? var[c] * count / (count - 1) : var[c];
      running_var[c] = momentum * var_unbias + (1 - momentum) * running_var[c];
    }
  }
}

template <typename T>
__global__ void sync_bn_backward_param_cuda_kernel(const T *grad_output,
                                                   const float *norm,
                                                   float *grad_weight,
                                                   float *grad_bias, int num,
                                                   int channels, int spatial) {
  __shared__ float buffer1[THREADS_PER_BLOCK];
  __shared__ float buffer2[THREADS_PER_BLOCK];

  int tid = threadIdx.x;
  int c = blockIdx.x;
  buffer1[tid] = buffer2[tid] = 0;
  for (int i = tid; i < num * spatial; i += blockDim.x) {
    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
    buffer1[tid] += grad_output[index] * norm[index];
    buffer2[tid] += grad_output[index];
  }
  __syncthreads();

  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
    if (tid < s) {
      buffer1[tid] += buffer1[tid + s];
      buffer2[tid] += buffer2[tid + s];
    }
    __syncthreads();
  }
  if (tid == 0) {
    grad_weight[c] = buffer1[0];
    grad_bias[c] = buffer2[0];
  }
}

template <>
__global__ void sync_bn_backward_param_cuda_kernel(const phalf *grad_output,
                                                   const float *norm,
                                                   float *grad_weight,
                                                   float *grad_bias, int num,
                                                   int channels, int spatial) {
  __shared__ float buffer1[THREADS_PER_BLOCK];
  __shared__ float buffer2[THREADS_PER_BLOCK];

  int tid = threadIdx.x;
  int c = blockIdx.x;
  buffer1[tid] = buffer2[tid] = 0;
  for (int i = tid; i < num * spatial; i += blockDim.x) {
    int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
    buffer1[tid] += static_cast<float>(grad_output[index]) * norm[index];
    buffer2[tid] += static_cast<float>(grad_output[index]);
  }
  __syncthreads();

  for (int s = blockDim.x / 2; s > 0; s >>= 1) {
    if (tid < s) {
      buffer1[tid] += buffer1[tid + s];
      buffer2[tid] += buffer2[tid + s];
    }
    __syncthreads();
  }
  if (tid == 0) {
    grad_weight[c] = buffer1[0];
    grad_bias[c] = buffer2[0];
  }
}

template <typename T>
__global__ void sync_bn_backward_data_cuda_kernel(
    int output_size, const T *grad_output, const float *weight,
    const float *grad_weight, const float *grad_bias, const float *norm,
    const float *std, T *grad_input, int num, int channels, int spatial) {
  int factor = num * spatial;
  CUDA_1D_KERNEL_LOOP(index, output_size) {
    int c = (index / spatial) % channels;
    grad_input[index] =
        weight[c] *
        (grad_output[index] -
         (grad_weight[c] * norm[index] + grad_bias[c]) / factor) /
        std[c];
  }
}

template <>
__global__ void sync_bn_backward_data_cuda_kernel(
    int output_size, const phalf *grad_output, const float *weight,
    const float *grad_weight, const float *grad_bias, const float *norm,
    const float *std, phalf *grad_input, int num, int channels, int spatial) {
  int factor = num * spatial;
  CUDA_1D_KERNEL_LOOP(index, output_size) {
    int c = (index / spatial) % channels;
    grad_input[index] = static_cast<phalf>(
        weight[c] *
        (static_cast<float>(grad_output[index]) -
         (grad_weight[c] * norm[index] + grad_bias[c]) / factor) /
        std[c]);
  }
}

#endif  // SYNCBN_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef THREE_INTERPOLATE_CUDA_KERNEL_CUH
#define THREE_INTERPOLATE_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void three_interpolate_forward_cuda_kernel(
    int b, int c, int m, int n, const T *points, const int *__restrict__ idx,
    const T *weight, T *out) {
  // points: (B, C, M)
  // idx: (B, N, 3)
  // weight: (B, N, 3)
  // output:
  //      out: (B, C, N)

  int bs_idx = blockIdx.z;
  int c_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(pt_idx, n) {
    if (bs_idx >= b || c_idx >= c) return;

    weight += bs_idx * n * 3 + pt_idx * 3;
    points += bs_idx * c * m + c_idx * m;
    idx += bs_idx * n * 3 + pt_idx * 3;
    out += bs_idx * c * n + c_idx * n;

    out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] +
                  weight[2] * points[idx[2]];
  }
}

template <typename T>
__global__ void three_interpolate_backward_cuda_kernel(
    int b, int c, int n, int m, const T *grad_out, const int *__restrict__ idx,
    const T *weight, T *grad_points) {
  // grad_out: (B, C, N)
  // weight: (B, N, 3)
  // output:
  //      grad_points: (B, C, M)

  int bs_idx = blockIdx.z;
  int c_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(pt_idx, n) {
    if (bs_idx >= b || c_idx >= c) return;

    grad_out += bs_idx * c * n + c_idx * n + pt_idx;
    weight += bs_idx * n * 3 + pt_idx * 3;
    grad_points += bs_idx * c * m + c_idx * m;
    idx += bs_idx * n * 3 + pt_idx * 3;

    atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]);
    atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]);
    atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]);
  }
}

#endif  // THREE_INTERPOLATE_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef THREE_NN_CUDA_KERNEL_CUH
#define THREE_NN_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void three_nn_forward_cuda_kernel(int b, int n, int m,
                                             const T *unknown, const T *known,
                                             T *dist2, int *__restrict__ idx) {
  // unknown: (B, N, 3)
  // known: (B, M, 3)
  // output:
  //      dist2: (B, N, 3)
  //      idx: (B, N, 3)

  int bs_idx = blockIdx.y;
  CUDA_1D_KERNEL_LOOP(pt_idx, n) {
    if (bs_idx >= b) return;

    unknown += bs_idx * n * 3 + pt_idx * 3;
    known += bs_idx * m * 3;
    dist2 += bs_idx * n * 3 + pt_idx * 3;
    idx += bs_idx * n * 3 + pt_idx * 3;

    T ux = unknown[0];
    T uy = unknown[1];
    T uz = unknown[2];

    double best1 = 1e40, best2 = 1e40, best3 = 1e40;
    int besti1 = 0, besti2 = 0, besti3 = 0;
    for (int k = 0; k < m; ++k) {
      T x = known[k * 3 + 0];
      T y = known[k * 3 + 1];
      T z = known[k * 3 + 2];
      T d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
      if (d < best1) {
        best3 = best2;
        besti3 = besti2;
        best2 = best1;
        besti2 = besti1;
        best1 = d;
        besti1 = k;
      } else if (d < best2) {
        best3 = best2;
        besti3 = besti2;
        best2 = d;
        besti2 = k;
      } else if (d < best3) {
        best3 = d;
        besti3 = k;
      }
    }
    dist2[0] = best1;
    dist2[1] = best2;
    dist2[2] = best3;
    idx[0] = besti1;
    idx[1] = besti2;
    idx[2] = besti3;
  }
}

#endif  // THREE_NN_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/tin_shift_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef TIN_SHIFT_CUDA_KERNEL_CUH
#define TIN_SHIFT_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

template <typename T>
__global__ void tin_shift_forward_cuda_kernel(
    const int nthreads, const T* input, const int* shift, T* output,
    const int batch_size, const int channels, const int t_size,
    const int hw_size, const int group_size, const int group_channel) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    const int hw_index = index % hw_size;
    const int j = (index / hw_size) % channels;

    const int n_index = (index / hw_size / channels) % batch_size;
    int group_id = j / group_channel;
    int t_shift = shift[n_index * group_size + group_id];
    int offset = n_index * t_size * hw_size * channels + hw_size * j + hw_index;
    for (int i = 0; i < t_size; i++) {
      int now_t = i + t_shift;
      int data_id = i * hw_size * channels + offset;
      if (now_t < 0 || now_t >= t_size) {
        continue;
      }
      int out_id = now_t * hw_size * channels + offset;
      output[out_id] = input[data_id];
    }
  }
}

template <typename T>
__global__ void tin_shift_backward_cuda_kernel(
    const int nthreads, const T* input, const int* shift, T* output,
    const int batch_size, const int channels, const int t_size,
    const int hw_size, const int group_size, const int group_channel) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    const int hw_index = index % hw_size;
    const int j = (index / hw_size) % channels;

    const int n_index = (index / hw_size / channels) % batch_size;
    int group_id = j / group_channel;
    int t_shift = shift[n_index * group_size + group_id];
    int offset = n_index * t_size * hw_size * channels + hw_size * j + hw_index;
    for (int i = 0; i < t_size; i++) {
      int now_t = i + t_shift;
      int data_id = i * hw_size * channels + offset;
      if (now_t < 0 || now_t >= t_size) {
        continue;
      }
      int out_id = now_t * hw_size * channels + offset;
      output[out_id] = input[data_id];
    }
  }
}

#endif  // TIN_SHIFT_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef VOXELIZATION_CUDA_KERNEL_CUH
#define VOXELIZATION_CUDA_KERNEL_CUH

#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif

typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;

template <typename T, typename T_int>
__global__ void dynamic_voxelize_kernel(
    const T* points, T_int* coors, const float voxel_x, const float voxel_y,
    const float voxel_z, const float coors_x_min, const float coors_y_min,
    const float coors_z_min, const float coors_x_max, const float coors_y_max,
    const float coors_z_max, const int grid_x, const int grid_y,
    const int grid_z, const int num_points, const int num_features,
    const int NDim) {
  //   const int index = blockIdx.x * threadsPerBlock + threadIdx.x;
  CUDA_1D_KERNEL_LOOP(index, num_points) {
    // To save some computation
    auto points_offset = points + index * num_features;
    auto coors_offset = coors + index * NDim;
    int c_x = floorf((points_offset[0] - coors_x_min) / voxel_x);
    if (c_x < 0 || c_x >= grid_x) {
      coors_offset[0] = -1;
      continue;
    }

    int c_y = floorf((points_offset[1] - coors_y_min) / voxel_y);
    if (c_y < 0 || c_y >= grid_y) {
      coors_offset[0] = -1;
      coors_offset[1] = -1;
      continue;
    }

    int c_z = floorf((points_offset[2] - coors_z_min) / voxel_z);
    if (c_z < 0 || c_z >= grid_z) {
      coors_offset[0] = -1;
      coors_offset[1] = -1;
      coors_offset[2] = -1;
    } else {
      coors_offset[0] = c_z;
      coors_offset[1] = c_y;
      coors_offset[2] = c_x;
    }
  }
}

template <typename T, typename T_int>
__global__ void assign_point_to_voxel(const int nthreads, const T* points,
                                      T_int* point_to_voxelidx,
                                      T_int* coor_to_voxelidx, T* voxels,
                                      const int max_points,
                                      const int num_features,
                                      const int num_points, const int NDim) {
  CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) {
    // const int index = blockIdx.x * threadsPerBlock + threadIdx.x;
    int index = thread_idx / num_features;

    int num = point_to_voxelidx[index];
    int voxelidx = coor_to_voxelidx[index];
    if (num > -1 && voxelidx > -1) {
      auto voxels_offset =
          voxels + voxelidx * max_points * num_features + num * num_features;

      int k = thread_idx % num_features;
      voxels_offset[k] = points[thread_idx];
    }
  }
}

template <typename T, typename T_int>
__global__ void assign_voxel_coors(const int nthreads, T_int* coor,
                                   T_int* point_to_voxelidx,
                                   T_int* coor_to_voxelidx, T_int* voxel_coors,
                                   const int num_points, const int NDim) {
  CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) {
    // const int index = blockIdx.x * threadsPerBlock + threadIdx.x;
    // if (index >= num_points) return;
    int index = thread_idx / NDim;
    int num = point_to_voxelidx[index];
    int voxelidx = coor_to_voxelidx[index];
    if (num == 0 && voxelidx > -1) {
      auto coors_offset = voxel_coors + voxelidx * NDim;
      int k = thread_idx % NDim;
      coors_offset[k] = coor[thread_idx];
    }
  }
}

template <typename T_int>
__global__ void point_to_voxelidx_kernel(const T_int* coor,
                                         T_int* point_to_voxelidx,
                                         T_int* point_to_pointidx,
                                         const int max_points,
                                         const int max_voxels,
                                         const int num_points, const int NDim) {
  CUDA_1D_KERNEL_LOOP(index, num_points) {
    auto coor_offset = coor + index * NDim;
    // skip invalid points
    if (coor_offset[0] == -1) return;

    int num = 0;
    int coor_x = coor_offset[0];
    int coor_y = coor_offset[1];
    int coor_z = coor_offset[2];
    // only calculate the coors before this coor[index]
    for (int i = 0; i < index; ++i) {
      auto prev_coor = coor + i * NDim;
      if (prev_coor[0] == -1) continue;

      // Find all previous points that have the same coors
      // if find the same coor, record it
      if ((prev_coor[0] == coor_x) && (prev_coor[1] == coor_y) &&
          (prev_coor[2] == coor_z)) {
        num++;
        if (num == 1) {
          // point to the same coor that first show up
          point_to_pointidx[index] = i;
        } else if (num >= max_points) {
          // out of boundary
          return;
        }
      }
    }
    if (num == 0) {
      point_to_pointidx[index] = index;
    }
    if (num < max_points) {
      point_to_voxelidx[index] = num;
    }
  }
}

template <typename T_int>
__global__ void determin_voxel_num(
    // const T_int* coor,
    T_int* num_points_per_voxel, T_int* point_to_voxelidx,
    T_int* point_to_pointidx, T_int* coor_to_voxelidx, T_int* voxel_num,
    const int max_points, const int max_voxels, const int num_points) {
  // only calculate the coors before this coor[index]
  for (int i = 0; i < num_points; ++i) {
    int point_pos_in_voxel = point_to_voxelidx[i];
    // record voxel
    if (point_pos_in_voxel == -1) {
      // out of max_points or invalid point
      continue;
    } else if (point_pos_in_voxel == 0) {
      // record new voxel
      int voxelidx = voxel_num[0];
      if (voxel_num[0] >= max_voxels) continue;
      voxel_num[0] += 1;
      coor_to_voxelidx[i] = voxelidx;
      num_points_per_voxel[voxelidx] = 1;
    } else {
      int point_idx = point_to_pointidx[i];
      int voxelidx = coor_to_voxelidx[point_idx];
      if (voxelidx != -1) {
        coor_to_voxelidx[i] = voxelidx;
        num_points_per_voxel[voxelidx] += 1;
      }
    }
  }
}

#endif  // VOXELIZATION_CUDA_KERNEL_CUH


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/parrots_cpp_helper.hpp
================================================
#ifndef PARROTS_CPP_HELPER
#define PARROTS_CPP_HELPER
#include <parrots/darray/darraymath.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/darraylite.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include <vector>

using namespace parrots;

#define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \
  case prim_type: {                                     \
    using scalar_t = type;                              \
    return __VA_ARGS__();                               \
  }

#define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...)                  \
  [&] {                                                             \
    const auto& the_type = TYPE;                                    \
    switch (the_type) {                                             \
      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \
      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)  \
      default:                                                      \
        PARROTS_NOTSUPPORTED;                                       \
    }                                                               \
  }()

#define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...)          \
  [&] {                                                              \
    const auto& the_type = TYPE;                                     \
    switch (the_type) {                                              \
      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__)  \
      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)   \
      PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \
      default:                                                       \
        PARROTS_NOTSUPPORTED;                                        \
    }                                                                \
  }()

#endif  // PARROTS_CPP_HELPER


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/parrots_cuda_helper.hpp
================================================
#ifndef PARROTS_CUDA_HELPER
#define PARROTS_CUDA_HELPER

#include <cuda.h>
#include <float.h>

#include <parrots/darray/darraymath.hpp>
#include <parrots/darray/mathfunctions.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/darrayutil.hpp>
#include <parrots/foundation/exceptions.hpp>
#include <parrots/foundation/float16.hpp>
#include <parrots/foundation/mathfunction.hpp>

#include "common_cuda_helper.hpp"
#include "parrots_cudawarpfunction.cuh"

using namespace parrots;
using phalf = float16;

#define __PHALF(x) (x.y)

#define PARROTS_CUDA_CHECK(exp)                         \
  do {                                                  \
    cudaError_t err = exp;                              \
    if (err != cudaSuccess) {                           \
      fprintf(stderr, "cudaCheckError() failed : %s\n", \
              cudaGetErrorString(err));                 \
      exit(-1);                                         \
    }                                                   \
  } while (0)

#define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \
  case prim_type: {                                     \
    using scalar_t = type;                              \
    return __VA_ARGS__();                               \
  }

#define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...)                  \
  [&] {                                                             \
    const auto& the_type = TYPE;                                    \
    switch (the_type) {                                             \
      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \
      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)  \
      default:                                                      \
        PARROTS_NOTSUPPORTED;                                       \
    }                                                               \
  }()

#define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...)          \
  [&] {                                                              \
    const auto& the_type = TYPE;                                     \
    switch (the_type) {                                              \
      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__)  \
      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)   \
      PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \
      default:                                                       \
        PARROTS_NOTSUPPORTED;                                        \
    }                                                                \
  }()

/** atomicAdd **/
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600

static __inline__ __device__ double atomicAdd(double* address, double val) {
  unsigned long long int* address_as_ull = (unsigned long long int*)address;
  unsigned long long int old = *address_as_ull, assumed;
  if (val == 0.0) return __longlong_as_double(old);
  do {
    assumed = old;
    old = atomicCAS(address_as_ull, assumed,
                    __double_as_longlong(val + __longlong_as_double(assumed)));
  } while (assumed != old);
  return __longlong_as_double(old);
}

#endif

static __inline__ __device__ float16 atomicAdd(float16* address, float16 val) {
  unsigned int* aligned =
      (unsigned int*)((size_t)address - ((size_t)address & 2));
  unsigned int old = *aligned;
  unsigned int assumed;
  unsigned short old_as_us;
  do {
    assumed = old;
    old_as_us =
        (unsigned short)((size_t)address & 2 ? old >> 16 : old & 0xffff);

#if __CUDACC_VER_MAJOR__ >= 9
    float16 tmp;
    tmp.x = old_as_us;
    float16 sum = tmp + val;
    unsigned short sum_as_us = sum.x;
//         half sum = __float2half_rn(__half2float(__ushort_as_half(old_as_us))
//         + (float)(val)); unsigned short sum_as_us = __half_as_ushort(sum);
#else
    unsigned short sum_as_us =
        __float2half_rn(__half2float(old_as_us) + (float)(val));
#endif

    unsigned int sum_as_ui = (size_t)address & 2
                                 ? (sum_as_us << 16) | (old & 0xffff)
                                 : (old & 0xffff0000) | sum_as_us;
    old = atomicCAS(aligned, assumed, sum_as_ui);
  } while (assumed != old);
  //__half_raw raw = {old_as_us};
  // return float16(raw);
  return *reinterpret_cast<float16*>(&old_as_us);
}
#endif  // PARROTS_CUDA_HELPER


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp
================================================
#ifndef PYTORCH_CPP_HELPER
#define PYTORCH_CPP_HELPER
#include <torch/extension.h>

#include <vector>

using namespace at;

#define CHECK_CUDA(x) \
  TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CPU(x) \
  TORCH_CHECK(!x.device().is_cuda(), #x " must be a CPU tensor")
#define CHECK_CONTIGUOUS(x) \
  TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_CUDA_INPUT(x) \
  CHECK_CUDA(x);            \
  CHECK_CONTIGUOUS(x)
#define CHECK_CPU_INPUT(x) \
  CHECK_CPU(x);            \
  CHECK_CONTIGUOUS(x)

#endif  // PYTORCH_CPP_HELPER


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/pytorch_cuda_helper.hpp
================================================
#ifndef PYTORCH_CUDA_HELPER
#define PYTORCH_CUDA_HELPER

#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>

#include <ATen/cuda/CUDAApplyUtils.cuh>
#include <THC/THCAtomics.cuh>

#include "common_cuda_helper.hpp"

using at::Half;
using at::Tensor;
using phalf = at::Half;

#define __PHALF(x) (x)

#endif  // PYTORCH_CUDA_HELPER


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/common/pytorch_device_registry.hpp
================================================
#ifndef PYTORCH_DEVICE_REGISTRY_H
#define PYTORCH_DEVICE_REGISTRY_H

// Using <torch/extension.h> is recommended in the official documentation in
// https://pytorch.org/tutorials/advanced/cpp_extension.html#writing-the-c-op.
// However, we use <torch/types.h> for compatibility with CUDA 9.0
// Read https://github.com/pytorch/extension-cpp/issues/35 for more details.
#include <torch/types.h>

#include <cassert>
#include <functional>
#include <map>
#include <type_traits>

inline std::string GetDeviceStr(const at::Device& device) {
  std::string str = DeviceTypeName(device.type(), true);
  if (device.has_index()) {
    str.push_back(':');
    str.append(std::to_string(device.index()));
  }
  return str;
}

// Registry
template <typename F, F f>
class DeviceRegistry;

template <typename Ret, typename... Args, Ret (*f)(Args...)>
class DeviceRegistry<Ret (*)(Args...), f> {
 public:
  using FunctionType = Ret (*)(Args...);
  static const int MAX_DEVICE_TYPES =
      int8_t(at::DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES);

  void Register(at::DeviceType device, FunctionType function) {
    funcs_[int8_t(device)] = function;
  }

  FunctionType Find(at::DeviceType device) const {
    return funcs_[int8_t(device)];
  }

  static DeviceRegistry& instance() {
    static DeviceRegistry inst;
    return inst;
  }

 private:
  DeviceRegistry() {
    for (size_t i = 0; i < MAX_DEVICE_TYPES; ++i) {
      funcs_[i] = nullptr;
    }
  };
  FunctionType funcs_[MAX_DEVICE_TYPES];
};

// get device of first tensor param

template <typename T, typename... Args,
          std::enable_if_t<std::is_same<std::decay_t<T>, at::Tensor>::value,
                           bool> = true>
at::Device GetFirstTensorDevice(T&& t, Args&&... args) {
  return std::forward<T>(t).device();
}
template <typename T, typename... Args,
          std::enable_if_t<!std::is_same<std::decay_t<T>, at::Tensor>::value,
                           bool> = true>
at::Device GetFirstTensorDevice(T&& t, Args&&... args) {
  return GetFirstTensorDevice(std::forward<Args>(args)...);
}

// check device consistency

inline std::pair<int, at::Device> CheckDeviceConsistency(
    const at::Device& device, int index) {
  return {index, device};
}

template <typename T, typename... Args,
          std::enable_if_t<!std::is_same<std::decay_t<T>, at::Tensor>::value,
                           bool> = true>
std::pair<int, at::Device> CheckDeviceConsistency(const at::Device& device,
                                                  int index, T&& t,
                                                  Args&&... args);

template <typename T, typename... Args,
          std::enable_if_t<std::is_same<std::decay_t<T>, at::Tensor>::value,
                           bool> = true>
std::pair<int, at::Device> CheckDeviceConsistency(const at::Device& device,
                                                  int index, T&& t,
                                                  Args&&... args) {
  auto new_device = std::forward<T>(t).device();
  if (new_device.type() != device.type() ||
      new_device.index() != device.index()) {
    return {index, new_device};
  }
  return CheckDeviceConsistency(device, index + 1, std::forward<Args>(args)...);
}

template <
    typename T, typename... Args,
    std::enable_if_t<!std::is_same<std::decay_t<T>, at::Tensor>::value, bool>>
std::pair<int, at::Device> CheckDeviceConsistency(const at::Device& device,
                                                  int index, T&& t,
                                                  Args&&... args) {
  return CheckDeviceConsistency(device, index + 1, std::forward<Args>(args)...);
}

// dispatch

template <typename R, typename... Args>
auto Dispatch(const R& registry, const char* name, Args&&... args) {
  auto device = GetFirstTensorDevice(std::forward<Args>(args)...);
  auto inconsist =
      CheckDeviceConsistency(device, 0, std::forward<Args>(args)...);
  TORCH_CHECK(inconsist.first >= int(sizeof...(Args)), name, ": at param ",
              inconsist.first,
              ", inconsistent device: ", GetDeviceStr(inconsist.second).c_str(),
              " vs ", GetDeviceStr(device).c_str(), "\n")
  auto f_ptr = registry.Find(device.type());
  TORCH_CHECK(f_ptr != nullptr, name, ": implementation for device ",
              GetDeviceStr(device).c_str(), " not found.\n")
  return f_ptr(std::forward<Args>(args)...);
}

// helper macro

#define DEVICE_REGISTRY(key) DeviceRegistry<decltype(&(key)), key>::instance()

#define REGISTER_DEVICE_IMPL(key, device, value)           \
  struct key##_##device##_registerer {                     \
    key##_##device##_registerer() {                        \
      DEVICE_REGISTRY(key).Register(at::k##device, value); \
    }                                                      \
  };                                                       \
  static key##_##device##_registerer _##key##_##device##_registerer;

#define DISPATCH_DEVICE_IMPL(key, ...) \
  Dispatch(DEVICE_REGISTRY(key), #key, __VA_ARGS__)

#endif  // PYTORCH_DEVICE_REGISTRY


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/corner_pool.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_CORNER_POOL_H
#define ONNXRUNTIME_CORNER_POOL_H

#include <assert.h>
#include <onnxruntime_cxx_api.h>

struct MMCVCornerPoolKernel {
 public:
  MMCVCornerPoolKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info)
      : ort_(ort) {
    mode_ = ort_.KernelInfoGetAttribute<int64_t>(info, "mode");
  }

  void Compute(OrtKernelContext* context);

 private:
  Ort::CustomOpApi ort_;

  int64_t mode_;
};

struct MMCVCornerPoolCustomOp
    : Ort::CustomOpBase<MMCVCornerPoolCustomOp, MMCVCornerPoolKernel> {
  void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const {
    return new MMCVCornerPoolKernel(api, info);
  }

  const char* GetName() const { return "MMCVCornerPool"; }

  size_t GetInputTypeCount() const { return 1; }
  ONNXTensorElementDataType GetInputType(size_t) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  }

  size_t GetOutputTypeCount() const { return 1; }
  ONNXTensorElementDataType GetOutputType(size_t) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  }

  // force cpu
  const char* GetExecutionProviderType() const {
    return "CPUExecutionProvider";
  }
};
#endif  // ONNXRUNTIME_CORNER_POOL_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/corner_pool.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "corner_pool.h"

#include "../ort_mmcv_utils.h"

void TopPoolForwardCPU(const float *input, float *output, const int batch_size,
                       const int channels, const int height, const int width) {
  for (int n = 0; n < batch_size; n++) {
    int index_n = n * channels * width * height;
    for (int c = 0; c < channels; c++) {
      int index_n_c = index_n + c * width * height;
      for (int w = 0; w < width; w++) {
        // directly copy the most bottom value from input to output
        output[index_n_c + (height - 1) * width + w] =
            input[index_n_c + (height - 1) * width + w];
        // do top_pool
        for (int h = height - 2; h >= 0; h--) {
          output[index_n_c + h * width + w] =
              std::max(output[index_n_c + (h + 1) * width + w],
                       input[index_n_c + h * width + w]);
        }  // for h
      }    // for w
    }      // for c
  }        // for n
}

void BottomPoolForwardCPU(const float *input, float *output,
                          const int batch_size, const int channels,
                          const int height, const int width) {
  for (int n = 0; n < batch_size; n++) {
    int index_n = n * channels * width * height;
    for (int c = 0; c < channels; c++) {
      int index_n_c = index_n + c * width * height;
      for (int w = 0; w < width; w++) {
        // directly copy the most top value from input to output
        output[index_n_c + w] = input[index_n_c + w];
        // do top_pool
        for (int h = 1; h < height; h++) {
          output[index_n_c + h * width + w] =
              std::max(output[index_n_c + (h - 1) * width + w],
                       input[index_n_c + h * width + w]);
        }  // for h
      }    // for w
    }      // for c
  }        // for n
}

void LeftPoolForwardCPU(const float *input, float *output, const int batch_size,
                        const int channels, const int height, const int width) {
  for (int n = 0; n < batch_size; n++) {
    int index_n = n * channels * width * height;
    for (int c = 0; c < channels; c++) {
      int index_n_c = index_n + c * width * height;
      for (int h = 0; h < height; h++) {
        // directly copy the most right value from input to output
        output[index_n_c + h * width + width - 1] =
            input[index_n_c + h * width + width - 1];
        // do left_pool
        for (int w = width - 2; w >= 0; w--) {
          output[index_n_c + h * width + w] =
              std::max(output[index_n_c + h * width + w + 1],
                       input[index_n_c + h * width + w]);
        }  // for w
      }    // for h
    }      // for c
  }        // for n
}

void RightPoolForwardCPU(const float *input, float *output,
                         const int batch_size, const int channels,
                         const int height, const int width) {
  for (int n = 0; n < batch_size; n++) {
    int index_n = n * channels * width * height;
    for (int c = 0; c < channels; c++) {
      int index_n_c = index_n + c * width * height;
      for (int h = 0; h < height; h++) {
        // directly copy the most left value from input to output
        output[index_n_c + h * width] = input[index_n_c + h * width];
        // do right_pool
        for (int w = 1; w < width; w++) {
          output[index_n_c + h * width + w] =
              std::max(output[index_n_c + h * width + w - 1],
                       input[index_n_c + h * width + w]);
        }  // for w
      }    // for h
    }      // for c
  }        // for n
}

void MMCVCornerPoolKernel::Compute(OrtKernelContext *context) {
  const int mode = int(mode_);
  typedef float T;
  const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
  const T *input_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<T>(input));

  // get output memory
  OrtTensorDimensions out_dimensions(ort_, input);
  OrtValue *output = ort_.KernelContext_GetOutput(
      context, 0, out_dimensions.data(), out_dimensions.size());
  T *output_data = ort_.GetTensorMutableData<T>(output);

  // 'top': 0, 'bottom': 1, 'left': 2, 'right':3
  assert(mode == 0 || mode == 1 || mode == 2 || mode == 3);

  // do corner_pool
  int batch_size = out_dimensions.data()[0];
  int input_channels = out_dimensions.data()[1];
  int input_height = out_dimensions.data()[2];
  int input_width = out_dimensions.data()[3];
  if (mode == 0)
    TopPoolForwardCPU(input_data, output_data, batch_size, input_channels,
                      input_height, input_width);
  else if (mode == 1)
    BottomPoolForwardCPU(input_data, output_data, batch_size, input_channels,
                         input_height, input_width);
  else if (mode == 2)
    LeftPoolForwardCPU(input_data, output_data, batch_size, input_channels,
                       input_height, input_width);
  else
    RightPoolForwardCPU(input_data, output_data, batch_size, input_channels,
                        input_height, input_width);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/deform_conv.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "deform_conv.h"

#include <cmath>
#include <vector>

#include "../ort_mmcv_utils.h"

void gemm_ref_fp32_deform(const float *A, const float *B, const float *V,
                          const float *H, const int32_t trans_A,
                          const int32_t trans_B, const int32_t M,
                          const int32_t N, const int32_t K, const float alpha,
                          const float beta, float *Y) {
  if (!trans_A && !trans_B) {  // MK, KN; NN
    for (int64_t m = 0; m < M; ++m) {
      for (int64_t n = 0; n < N; ++n) {
        float y = 0.0f;
        for (int64_t k = 0; k < K; ++k) {
          y += A[m * K + k] * B[k * N + n];
        }
        y *= alpha;
        if (V) y += beta * V[n];
        if (H) y += beta * H[m * N + n];
        Y[m * N + n] = y;
      }
    }
  }
  if (trans_A && !trans_B) {  // KM, KN; TN
    for (int64_t m = 0; m < M; ++m) {
      for (int64_t n = 0; n < N; ++n) {
        float y = 0.0f;
        for (int64_t k = 0; k < K; ++k) {
          y += A[k * M + m] * B[k * N + n];
        }
        y *= alpha;
        if (V) y += beta * V[n];
        if (H) y += beta * H[m * N + n];
        Y[m * N + n] = y;
      }
    }
  }
  if (trans_A && trans_B) {  // KM, NK; TT
    for (int64_t m = 0; m < M; ++m) {
      for (int64_t n = 0; n < N; ++n) {
        float y = 0.0f;
        for (int64_t k = 0; k < K; ++k) {
          y += A[k * M + m] * B[n * K + k];
        }
        y *= alpha;
        if (V) y += beta * V[n];
        if (H) y += beta * H[m * N + n];
        Y[m * N + n] = y;
      }
    }
  }
  if (!trans_A && trans_B) {  // MK, NK; NT
    for (int64_t m = 0; m < M; ++m) {
      for (int64_t n = 0; n < N; ++n) {
        float y = 0.0f;
        for (int64_t k = 0; k < K; ++k) {
          y += A[m * K + k] * B[n * K + k];
        }
        y *= alpha;
        if (V) y += beta * V[n];
        if (H) y += beta * H[m * N + n];
        Y[m * N + n] = y;
      }
    }
  }
}

float bilinear_interpolate(const float *src, const int64_t src_h,
                           const int64_t src_w, const float h, const float w) {
  if (h <= -1 || src_h <= h || w <= -1 || src_w <= w) {
    return 0;
  }

  int64_t h_low = floor(h);
  int64_t w_low = floor(w);
  int64_t h_high = h_low + 1;
  int64_t w_high = w_low + 1;

  float lh = h - h_low;
  float lw = w - w_low;
  float hh = 1 - lh;
  float hw = 1 - lw;

  float v1 = 0;
  if (h_low >= 0 && w_low >= 0) v1 = src[h_low * src_w + w_low];
  float v2 = 0;
  if (h_low >= 0 && w_high <= src_w - 1) v2 = src[h_low * src_w + w_high];
  float v3 = 0;
  if (h_high <= src_h - 1 && w_low >= 0) v3 = src[h_high * src_w + w_low];
  float v4 = 0;
  if (h_high <= src_h - 1 && w_high <= src_w - 1)
    v4 = src[h_high * src_w + w_high];

  float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;

  float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
  return val;
}

void deformable_im2col(const float *input, const float *offset,
                       const int64_t src_h, const int64_t src_w,
                       const int64_t kernel_h, const int64_t kernel_w,
                       const int64_t pad_h, const int64_t pad_w,
                       const int64_t stride_h, const int64_t stride_w,
                       const int64_t dilation_h, const int64_t dilation_w,
                       const int64_t channels, const int64_t offset_groups,
                       const int64_t dst_h, const int64_t dst_w,
                       float *columns) {
  const int64_t indices = channels * dst_h * dst_w;
  for (int64_t index = 0; index != indices; ++index) {
    const int64_t w_col = index % dst_w;
    const int64_t h_col = (index / dst_w) % dst_h;
    const int64_t c_im = index / (dst_w * dst_h);
    const int64_t c_col = c_im * kernel_h * kernel_w;

    int64_t c_per_offset_grp = channels / offset_groups;
    const int64_t grp_idx = c_im / c_per_offset_grp;
    auto columns_ptr =
        columns + (c_col * (dst_h * dst_w) + h_col * dst_w + w_col);
    auto input_ptr = input + c_im * (src_h * src_w);
    auto offset_ptr =
        offset + grp_idx * 2 * kernel_h * kernel_w * dst_h * dst_w;

    for (int64_t kh = 0; kh < kernel_h; ++kh) {
      for (int64_t kw = 0; kw < kernel_w; ++kw) {
        const int data_offset_h_ptr =
            ((2 * (kh * kernel_w + kw)) * dst_h + h_col) * dst_w + w_col;
        const int data_offset_w_ptr =
            ((2 * (kh * kernel_w + kw) + 1) * dst_h + h_col) * dst_w + w_col;

        const float offset_h = offset_ptr[data_offset_h_ptr];
        const float offset_w = offset_ptr[data_offset_w_ptr];
        const float ih =
            (h_col * stride_h - pad_h) + kh * dilation_h + offset_h;
        const float iw =
            (w_col * stride_w - pad_w) + kw * dilation_w + offset_w;
        *columns_ptr = bilinear_interpolate(input_ptr, src_h, src_w, ih, iw);
        columns_ptr += dst_h * dst_w;
      }
    }
  }
}

void deformable_conv_forward(
    const float *src, const float *offset, const float *filter,
    const int64_t batch, const int64_t src_c, const int64_t src_h,
    const int64_t src_w, const int64_t dst_c, const int64_t dst_h,
    const int64_t dst_w, const int64_t group, const int64_t offset_group,
    const int64_t channels, const int64_t num_output, const int64_t kernel_h,
    const int64_t kernel_w, const int64_t stride_h, const int64_t stride_w,
    const int64_t pad_h, const int64_t pad_w, const int64_t dilation_h,
    const int64_t dilation_w, float *columns, float *dst) {
  const int64_t ic_per_gp = channels / group;
  const int64_t oc_per_gp = num_output / group;
  for (int64_t b = 0; b < batch; ++b) {
    for (int64_t g = 0; g < group; ++g) {
      deformable_im2col(
          src + b * src_c * src_h * src_w + g * ic_per_gp * src_h * src_w,
          offset + b * offset_group * 2 * kernel_h * kernel_w * dst_h * dst_w,
          src_h, src_w, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
          dilation_h, dilation_w, ic_per_gp, offset_group, dst_h, dst_w,
          columns);
      float *dst_ptr =
          dst + b * dst_c * dst_h * dst_w + g * oc_per_gp * dst_h * dst_w;

      memset(dst_ptr, 0.0f, sizeof(float) * oc_per_gp * dst_h * dst_w);

      gemm_ref_fp32_deform(
          filter + g * oc_per_gp * ic_per_gp * kernel_h * kernel_w, columns,
          nullptr, dst_ptr, 0, 0, oc_per_gp, dst_h * dst_w,
          ic_per_gp * kernel_h * kernel_w, 1.0f, 1.0f, dst_ptr);
    }
  }
}

MMCVDeformConvKernel::MMCVDeformConvKernel(OrtApi api,
                                           const OrtKernelInfo *info)
    : api_(api), ort_(api_), info_(info) {
  std::vector<int64_t> stride =
      ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "stride");
  stride_height_ = stride[0];
  stride_width_ = stride[1];
  std::vector<int64_t> padding =
      ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "padding");
  padding_height_ = padding[0];
  padding_width_ = padding[1];
  std::vector<int64_t> dilation =
      ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "dilation");
  dilation_height_ = dilation[0];
  dilation_width_ = dilation[1];
  deformable_group_ =
      ort_.KernelInfoGetAttribute<int64_t>(info, "deform_groups");
  group_ = ort_.KernelInfoGetAttribute<int64_t>(info, "groups");

  // create allocator
  allocator_ = Ort::AllocatorWithDefaultOptions();
}

void MMCVDeformConvKernel::Compute(OrtKernelContext *context) {
  const int64_t stride_height = stride_height_;
  const int64_t stride_width = stride_width_;
  const int64_t padding_height = padding_height_;
  const int64_t padding_width = padding_width_;
  const int64_t dilation_height = dilation_height_;
  const int64_t dilation_width = dilation_width_;
  const int64_t deformable_group = deformable_group_;
  const int64_t group = group_;

  const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
  const float *input_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));

  const OrtValue *offset = ort_.KernelContext_GetInput(context, 1);
  const float *offset_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(offset));

  const OrtValue *filter = ort_.KernelContext_GetInput(context, 2);
  const float *filter_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(filter));

  OrtTensorDimensions input_dims(ort_, input);
  OrtTensorDimensions filter_dims(ort_, filter);

  int64_t batch_size = input_dims[0];
  int64_t in_channels = input_dims[1];
  int64_t in_height = input_dims[2];
  int64_t in_width = input_dims[3];
  int64_t out_channels = filter_dims[0];
  int64_t kernel_height = filter_dims[2];
  int64_t kernel_width = filter_dims[3];

  // get output memory
  int64_t out_height = floor((in_height + 2 * padding_height -
                              dilation_height * (kernel_height - 1) - 1) /
                                 stride_height +
                             1);
  int64_t out_width = floor(
      (in_width + 2 * padding_width - dilation_width * (kernel_width - 1) - 1) /
          stride_width +
      1);

  std::vector<int64_t> output_dims = {batch_size, out_channels, out_height,
                                      out_width};

  OrtValue *output = ort_.KernelContext_GetOutput(
      context, 0, output_dims.data(), output_dims.size());
  float *out_ptr = ort_.GetTensorMutableData<float>(output);

  // allocate tmp memory
  int64_t column_len = (in_channels / group) * kernel_height * kernel_width *
                       out_height * out_width;
  float *columns = (float *)allocator_.Alloc(sizeof(float) * column_len);
  deformable_conv_forward(
      input_data, offset_data, filter_data, batch_size, in_channels, in_height,
      in_width, out_channels, out_height, out_width, group, deformable_group,
      in_channels, out_channels, kernel_height, kernel_width, stride_height,
      stride_width, padding_height, padding_width, dilation_height,
      dilation_width, columns, out_ptr);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <cmath>

#include "../ort_mmcv_utils.h"
#include "grid_sample.h"

#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MAX(a, b) (((a) < (b)) ? (b) : (a))
#define CLIP_COORDINATES(in, out, clip_limit) \
  out = MIN((clip_limit - 1), MAX(in, 0))

// modified from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/GridSampler.cpp

GridSampleKernel::GridSampleKernel(OrtApi api, const OrtKernelInfo *info)
    : api_(api), ort_(api_), info_(info) {
  align_corners_ = ort_.KernelInfoGetAttribute<int64_t>(info, "align_corners");
  interpolation_mode_ =
      ort_.KernelInfoGetAttribute<int64_t>(info, "interpolation_mode");
  padding_mode_ = ort_.KernelInfoGetAttribute<int64_t>(info, "padding_mode");

  allocator_ = Ort::AllocatorWithDefaultOptions();
}

enum GridSamplerInterpolation { Bilinear = 0, Nearest = 1, Bicubic = 2 };
enum GridSamplerPadding { Zeros = 0, Border = 1, Reflection = 2 };

template <typename scalar_t>
static inline scalar_t grid_sampler_unnormalize(scalar_t coord, int64_t size,
                                                bool align_corners) {
  if (align_corners) {
    return ((coord + 1) / 2) * (size - 1);
  } else {
    return ((coord + 1) * size - 1) / 2;
  }
}

// Clips coordinates to between 0 and clip_limit - 1
template <typename scalar_t>
static inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) {
  return std::min(static_cast<scalar_t>(clip_limit - 1),
                  std::max(in, static_cast<scalar_t>(0)));
}

// Reflects coordinates until they fall between low and high (inclusive).
// The bounds are passed as twice their value so that half-integer values
// can be represented as ints.
template <typename scalar_t>
static inline scalar_t reflect_coordinates(scalar_t in, int64_t twice_low,
                                           int64_t twice_high) {
  if (twice_low == twice_high) {
    return static_cast<scalar_t>(0);
  }
  scalar_t min = static_cast<scalar_t>(twice_low) / 2;
  scalar_t span = static_cast<scalar_t>(twice_high - twice_low) / 2;
  in = std::fabs(in - min);
  // `fmod` returns same sign as `in`, which is positive after the `fabs` above.
  scalar_t extra = std::fmod(in, span);
  int flips = static_cast<int>(std::floor(in / span));
  if (flips % 2 == 0) {
    return extra + min;
  } else {
    return span - extra + min;
  }
}

template <typename scalar_t>
static inline scalar_t compute_coordinates(scalar_t coord, int64_t size,
                                           int64_t padding_mode,
                                           bool align_corners) {
  if (padding_mode == GridSamplerPadding::Border) {
    coord = clip_coordinates(coord, size);
  } else if (padding_mode == GridSamplerPadding::Reflection) {
    if (align_corners) {
      coord = reflect_coordinates(coord, 0, 2 * (size - 1));
    } else {
      coord = reflect_coordinates(coord, -1, 2 * size - 1);
    }
    coord = clip_coordinates(coord, size);
  }
  return coord;
}

// Computes the pixel source index value for a grid coordinate
template <typename scalar_t>
static inline scalar_t grid_sampler_compute_source_index(scalar_t coord,
                                                         int64_t size,
                                                         int64_t padding_mode,
                                                         bool align_corners) {
  coord = grid_sampler_unnormalize(coord, size, align_corners);
  coord = compute_coordinates(coord, size, padding_mode, align_corners);
  return coord;
}

static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H,
                                    int64_t W) {
  return h >= 0 && h < H && w >= 0 && w < W;
}

template <typename scalar_t>
static inline scalar_t get_value_bounded(const scalar_t *data, scalar_t x,
                                         scalar_t y, int64_t W, int64_t H,
                                         int64_t sW, int64_t sH,
                                         int64_t padding_mode,
                                         bool align_corners) {
  x = compute_coordinates(x, W, padding_mode, align_corners);
  y = compute_coordinates(y, H, padding_mode, align_corners);

  int64_t ix = static_cast<int64_t>(x);
  int64_t iy = static_cast<int64_t>(y);

  if (within_bounds_2d(iy, ix, H, W)) {
    return data[iy * sH + ix * sW];
  }
  return static_cast<scalar_t>(0);
}

template <typename scalar_t>
static inline scalar_t cubic_convolution1(scalar_t x, scalar_t A) {
  return ((A + 2) * x - (A + 3)) * x * x + 1;
}

template <typename scalar_t>
static inline scalar_t cubic_convolution2(scalar_t x, scalar_t A) {
  return ((A * x - 5 * A) * x + 8 * A) * x - 4 * A;
}

template <typename scalar_t>
static inline void get_cubic_upsample_coefficients(scalar_t coeffs[4],
                                                   scalar_t t) {
  scalar_t A = -0.75;

  scalar_t x1 = t;
  coeffs[0] = cubic_convolution2<scalar_t>(x1 + 1.0, A);
  coeffs[1] = cubic_convolution1<scalar_t>(x1, A);

  // opposite coefficients
  scalar_t x2 = 1.0 - t;
  coeffs[2] = cubic_convolution1<scalar_t>(x2, A);
  coeffs[3] = cubic_convolution2<scalar_t>(x2 + 1.0, A);
}

template <typename scalar_t>
static inline scalar_t cubic_interp1d(scalar_t x0, scalar_t x1, scalar_t x2,
                                      scalar_t x3, scalar_t t) {
  scalar_t coeffs[4];
  get_cubic_upsample_coefficients<scalar_t>(coeffs, t);

  return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3];
}

void GridSampleKernel::Compute(OrtKernelContext *context) {
  const bool align_corners = align_corners_;
  const int64_t padding_mode = padding_mode_;
  const int64_t interpolation_mode = interpolation_mode_;

  const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
  const float *input_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));

  const OrtValue *grid = ort_.KernelContext_GetInput(context, 1);
  const float *grid_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(grid));

  OrtTensorDimensions input_dims(ort_, input);
  OrtTensorDimensions grid_dims(ort_, grid);
  int64_t N = input_dims[0];
  int64_t C = input_dims[1];
  int64_t inp_H = input_dims[2];
  int64_t inp_W = input_dims[3];
  int64_t out_H = grid_dims[1];
  int64_t out_W = grid_dims[2];

  std::vector<int64_t> output_dims = {N, C, out_H, out_W};
  OrtValue *output = ort_.KernelContext_GetOutput(
      context, 0, output_dims.data(), output_dims.size());
  float *out_ptr = ort_.GetTensorMutableData<float>(output);

  int64_t inp_sN = input_dims[1] * input_dims[2] * input_dims[3];
  int64_t inp_sC = input_dims[2] * input_dims[3];
  int64_t inp_sH = input_dims[3];
  int64_t inp_sW = 1;
  int64_t grid_sN = grid_dims[1] * grid_dims[2] * grid_dims[3];
  int64_t grid_sH = grid_dims[2] * grid_dims[3];
  int64_t grid_sW = grid_dims[3];
  int64_t grid_sCoor = 1;
  int64_t out_sN = output_dims[1] * output_dims[2] * output_dims[3];
  int64_t out_sC = output_dims[2] * output_dims[3];
  int64_t out_sH = output_dims[3];
  int64_t out_sW = 1;

  // loop over each output pixel
  for (int64_t n = 0; n < N; ++n) {
    const float *grid_ptr_N = grid_data + n * grid_sN;
    const float *inp_ptr_N = input_data + n * inp_sN;
    for (int64_t h = 0; h < out_H; ++h) {
      for (int64_t w = 0; w < out_W; ++w) {
        const float *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW;
        float x = *grid_ptr_NHW;
        float y = grid_ptr_NHW[grid_sCoor];

        float ix = grid_sampler_compute_source_index(x, inp_W, padding_mode,
                                                     align_corners);
        float iy = grid_sampler_compute_source_index(y, inp_H, padding_mode,
                                                     align_corners);

        if (interpolation_mode == GridSamplerInterpolation::Bilinear) {
          // get corner pixel values from (x, y)
          // for 4d, we use north-east-south-west
          int64_t ix_nw = static_cast<int64_t>(std::floor(ix));
          int64_t iy_nw = static_cast<int64_t>(std::floor(iy));

          int64_t ix_ne = ix_nw + 1;
          int64_t iy_ne = iy_nw;

          int64_t ix_sw = ix_nw;
          int64_t iy_sw = iy_nw + 1;

          int64_t ix_se = ix_nw + 1;
          int64_t iy_se = iy_nw + 1;

          // get surfaces to each neighbor:
          float nw = (ix_se - ix) * (iy_se - iy);
          float ne = (ix - ix_sw) * (iy_sw - iy);
          float sw = (ix_ne - ix) * (iy - iy_ne);
          float se = (ix - ix_nw) * (iy - iy_nw);

          // calculate bilinear weighted pixel value and set output pixel
          const float *inp_ptr_NC = inp_ptr_N;
          float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW;
          for (int64_t c = 0; c < C;
               ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) {
            auto res = static_cast<float>(0);
            if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) {
              res += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw;
            }
            if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) {
              res += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne;
            }
            if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) {
              res += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw;
            }
            if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) {
              res += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se;
            }
            *out_ptr_NCHW = res;
          }
        } else if (interpolation_mode == GridSamplerInterpolation::Nearest) {
          int64_t ix_nearest = static_cast<int64_t>(std::nearbyint(ix));
          int64_t iy_nearest = static_cast<int64_t>(std::nearbyint(iy));

          // assign nearest neighbor pixel value to output pixel
          float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW;
          const float *inp_ptr_NC = inp_ptr_N;
          for (int64_t c = 0; c < C;
               ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) {
            if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) {
              *out_ptr_NCHW =
                  inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW];
            } else {
              *out_ptr_NCHW = static_cast<float>(0);
            }
          }
        } else if (interpolation_mode == GridSamplerInterpolation::Bicubic) {
          // grid_sampler_compute_source_index will "clip the value" of idx
          // depends on the padding,
          // which would cause calculation to be wrong,
          // for example x = -0.1 -> ix = 0 for zero padding, but in bicubic ix
          // = floor(x) = -1
          // There would be more problem in reflection padding, since the -1 and
          // +1 direction is not fixed in boundary condition
          ix = grid_sampler_unnormalize(x, inp_W, align_corners);
          iy = grid_sampler_unnormalize(y, inp_H, align_corners);

          float ix_nw = std::floor(ix);
          float iy_nw = std::floor(iy);

          const float tx = ix - ix_nw;
          const float ty = iy - iy_nw;

          const float *inp_ptr_NC = inp_ptr_N;
          float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW;
          for (int64_t c = 0; c < C;
               ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) {
            float coefficients[4];

            // Interpolate 4 values in the x direction
            for (int64_t i = 0; i < 4; ++i) {
              coefficients[i] = cubic_interp1d<float>(
                  get_value_bounded<float>(inp_ptr_NC, ix_nw - 1, iy_nw - 1 + i,
                                           inp_W, inp_H, inp_sW, inp_sH,
                                           padding_mode, align_corners),
                  get_value_bounded<float>(inp_ptr_NC, ix_nw + 0, iy_nw - 1 + i,
                                           inp_W, inp_H, inp_sW, inp_sH,
                                           padding_mode, align_corners),
                  get_value_bounded<float>(inp_ptr_NC, ix_nw + 1, iy_nw - 1 + i,
                                           inp_W, inp_H, inp_sW, inp_sH,
                                           padding_mode, align_corners),
                  get_value_bounded<float>(inp_ptr_NC, ix_nw + 2, iy_nw - 1 + i,
                                           inp_W, inp_H, inp_sW, inp_sH,
                                           padding_mode, align_corners),
                  tx);
            }

            // Interpolate in the y direction
            *out_ptr_NCHW =
                cubic_interp1d<float>(coefficients[0], coefficients[1],
                                      coefficients[2], coefficients[3], ty);
          }
        }
      }
    }
  }
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/modulated_deform_conv.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "modulated_deform_conv.h"

#include <cmath>
#include <vector>

#include "../ort_mmcv_utils.h"

float bilinear_interpolate_2d(const float *src, const int64_t src_h,
                              const int64_t src_w, const float h,
                              const float w) {
  if (h <= -1 || src_h <= h || w <= -1 || src_w <= w) {
    return 0;
  }

  int64_t h_low = floor(h);
  int64_t w_low = floor(w);
  int64_t h_high = h_low + 1;
  int64_t w_high = w_low + 1;

  float lh = h - h_low;
  float lw = w - w_low;
  float hh = 1 - lh;
  float hw = 1 - lw;

  float v1 = 0;
  if (h_low >= 0 && w_low >= 0) v1 = src[h_low * src_w + w_low];
  float v2 = 0;
  if (h_low >= 0 && w_high <= src_w - 1) v2 = src[h_low * src_w + w_high];
  float v3 = 0;
  if (h_high <= src_h - 1 && w_low >= 0) v3 = src[h_high * src_w + w_low];
  float v4 = 0;
  if (h_high <= src_h - 1 && w_high <= src_w - 1)
    v4 = src[h_high * src_w + w_high];

  float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;

  float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
  return val;
}

// output: (channels * kernel_h * kernel_w, dst_h * dst_w)
void deformable_im2col_2d(const float *input, const float *offset,
                          const float *mask, const int64_t src_h,
                          const int64_t src_w, const int64_t kernel_h,
                          const int64_t kernel_w, const int64_t pad_h,
                          const int64_t pad_w, const int64_t stride_h,
                          const int64_t stride_w, const int64_t dilation_h,
                          const int64_t dilation_w, const int64_t channels,
                          const int64_t offset_groups, const int64_t dst_h,
                          const int64_t dst_w, const bool use_mask,
                          float *columns) {
  const int64_t workload = channels * dst_h * dst_w;
  for (int64_t index = 0; index != workload; ++index) {
    const int64_t ow = index % dst_w;
    const int64_t oh = (index / dst_w) % dst_h;
    const int64_t ic = index / (dst_w * dst_h);
    const int64_t oc = ic * kernel_h * kernel_w;

    int64_t c_per_offset_grp = channels / offset_groups;
    const int64_t grp_idx = ic / c_per_offset_grp;

    auto columns_ptr = columns + (oc * (dst_h * dst_w) + oh * dst_w + ow);
    auto input_ptr = input + ic * (src_h * src_w);
    auto offset_ptr =
        offset + grp_idx * 2 * kernel_h * kernel_w * dst_h * dst_w;
    auto mask_ptr = mask;
    if (use_mask) {
      mask_ptr += grp_idx * kernel_h * kernel_w * dst_h * dst_w;
    }

    for (int64_t kh = 0; kh < kernel_h; ++kh) {
      for (int64_t kw = 0; kw < kernel_w; ++kw) {
        const int64_t mask_idx = kh * kernel_w + kw;
        const int64_t offset_idx = 2 * mask_idx;

        float mask_value = 1;
        if (use_mask) {
          mask_value = mask_ptr[mask_idx * (dst_h * dst_w) + oh * dst_w + ow];
        }

        const float offset_h =
            offset_ptr[offset_idx * (dst_h * dst_w) + oh * dst_w + ow];
        const float offset_w =
            offset_ptr[(offset_idx + 1) * (dst_h * dst_w) + oh * dst_w + ow];
        const float ih = (oh * stride_h - pad_h) + kh * dilation_h + offset_h;
        const float iw = (ow * stride_w - pad_w) + kw * dilation_w + offset_w;
        *columns_ptr = mask_value *
                       bilinear_interpolate_2d(input_ptr, src_h, src_w, ih, iw);
        columns_ptr += dst_h * dst_w;
      }
    }
  }
}

void gemm_ref_fp32(const float *A, const float *B, const float *V,
                   const float *H, const int32_t trans_A, const int32_t trans_B,
                   const int32_t M, const int32_t N, const int32_t K,
                   const float alpha, const float beta, float *Y) {
  if (!trans_A && !trans_B) {  // MK, KN; NN
    for (int64_t m = 0; m < M; ++m) {
      for (int64_t n = 0; n < N; ++n) {
        float y = 0.0f;
        for (int64_t k = 0; k < K; ++k) {
          y += A[m * K + k] * B[k * N + n];
        }
        y *= alpha;
        if (V) y += beta * V[n];
        if (H) y += beta * H[m * N + n];
        Y[m * N + n] = y;
      }
    }
  }
  if (trans_A && !trans_B) {  // KM, KN; TN
    for (int64_t m = 0; m < M; ++m) {
      for (int64_t n = 0; n < N; ++n) {
        float y = 0.0f;
        for (int64_t k = 0; k < K; ++k) {
          y += A[k * M + m] * B[k * N + n];
        }
        y *= alpha;
        if (V) y += beta * V[n];
        if (H) y += beta * H[m * N + n];
        Y[m * N + n] = y;
      }
    }
  }
  if (trans_A && trans_B) {  // KM, NK; TT
    for (int64_t m = 0; m < M; ++m) {
      for (int64_t n = 0; n < N; ++n) {
        float y = 0.0f;
        for (int64_t k = 0; k < K; ++k) {
          y += A[k * M + m] * B[n * K + k];
        }
        y *= alpha;
        if (V) y += beta * V[n];
        if (H) y += beta * H[m * N + n];
        Y[m * N + n] = y;
      }
    }
  }
  if (!trans_A && trans_B) {  // MK, NK; NT
    for (int64_t m = 0; m < M; ++m) {
      for (int64_t n = 0; n < N; ++n) {
        float y = 0.0f;
        for (int64_t k = 0; k < K; ++k) {
          y += A[m * K + k] * B[n * K + k];
        }
        y *= alpha;
        if (V) y += beta * V[n];
        if (H) y += beta * H[m * N + n];
        Y[m * N + n] = y;
      }
    }
  }
}

void deformable_conv2d_ref_fp32(
    const float *src, const float *offset, const float *mask,
    const float *filter, const float *bias, const int64_t batch,
    const int64_t src_c, const int64_t src_h, const int64_t src_w,
    const int64_t dst_c, const int64_t dst_h, const int64_t dst_w,
    const int64_t group, const int64_t offset_group, const int64_t channels,
    const int64_t num_output, const int64_t kernel_h, const int64_t kernel_w,
    const int64_t stride_h, const int64_t stride_w, const int64_t pad_h,
    const int64_t pad_w, const int64_t dilation_h, const int64_t dilation_w,
    float *columns, float *dst) {
  const int64_t ic_per_gp = channels / group;
  const int64_t oc_per_gp = num_output / group;

  for (int64_t b = 0; b < batch; ++b) {
    for (int64_t g = 0; g < group; ++g) {
      deformable_im2col_2d(
          src + b * src_c * src_h * src_w + g * ic_per_gp * src_h * src_w,
          offset + b * offset_group * 2 * kernel_h * kernel_w * dst_h * dst_w,
          mask + b * offset_group * kernel_h * kernel_w * dst_h * dst_w, src_h,
          src_w, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
          dilation_h, dilation_w, ic_per_gp, offset_group, dst_h, dst_w,
          mask != nullptr, columns);
      float *dst_ptr =
          dst + b * dst_c * dst_h * dst_w + g * oc_per_gp * dst_h * dst_w;
      if (bias != nullptr) {
        const float *bias_ptr = bias + g * oc_per_gp;
        for (int64_t oc = 0; oc < oc_per_gp; ++oc) {
          for (int64_t hw = 0; hw < dst_h * dst_w; ++hw) {
            dst_ptr[oc * dst_h * dst_w + hw] = bias_ptr[oc];
          }
        }
      } else {
        memset(dst_ptr, 0.0f, sizeof(float) * oc_per_gp * dst_h * dst_w);
      }
      gemm_ref_fp32(filter + g * oc_per_gp * ic_per_gp * kernel_h * kernel_w,
                    columns, nullptr, dst_ptr, 0, 0, oc_per_gp, dst_h * dst_w,
                    ic_per_gp * kernel_h * kernel_w, 1.0f, 1.0f, dst_ptr);
    }
  }
}

MMCVModulatedDeformConvKernel::MMCVModulatedDeformConvKernel(
    OrtApi api, const OrtKernelInfo *info)
    : api_(api), ort_(api_), info_(info) {
  std::vector<int64_t> stride =
      ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "stride");
  stride_height_ = stride[0];
  stride_width_ = stride[1];
  std::vector<int64_t> padding =
      ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "padding");
  padding_height_ = padding[0];
  padding_width_ = padding[1];
  std::vector<int64_t> dilation =
      ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "dilation");
  dilation_height_ = dilation[0];
  dilation_width_ = dilation[1];
  deformable_group_ =
      ort_.KernelInfoGetAttribute<int64_t>(info, "deform_groups");
  group_ = ort_.KernelInfoGetAttribute<int64_t>(info, "groups");

  // create allocator
  allocator_ = Ort::AllocatorWithDefaultOptions();
}

void MMCVModulatedDeformConvKernel::Compute(OrtKernelContext *context) {
  const int64_t stride_height = stride_height_;
  const int64_t stride_width = stride_width_;
  const int64_t padding_height = padding_height_;
  const int64_t padding_width = padding_width_;
  const int64_t dilation_height = dilation_height_;
  const int64_t dilation_width = dilation_width_;
  const int64_t deformable_group = deformable_group_;
  const int64_t group = group_;

  const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
  const float *input_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));

  const OrtValue *offset = ort_.KernelContext_GetInput(context, 1);
  const float *offset_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(offset));

  const OrtValue *mask = ort_.KernelContext_GetInput(context, 2);
  const float *mask_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(mask));

  const OrtValue *filter = ort_.KernelContext_GetInput(context, 3);
  const float *filter_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(filter));

  const OrtValue *bias = ort_.KernelContext_GetInput(context, 4);
  const float *bias_data =
      (bias != nullptr)
          ? reinterpret_cast<const float *>(ort_.GetTensorData<float>(bias))
          : nullptr;
  // const float *bias_data = nullptr;

  OrtTensorDimensions input_dims(ort_, input);
  OrtTensorDimensions filter_dims(ort_, filter);

  int64_t batch = input_dims[0];
  int64_t channels = input_dims[1];
  int64_t in_height = input_dims[2];
  int64_t in_width = input_dims[3];
  int64_t num_output = filter_dims[0];
  int64_t kernel_height = filter_dims[2];
  int64_t kernel_width = filter_dims[3];

  // get output memory
  int64_t out_height = floor((in_height + 2 * padding_height -
                              dilation_height * (kernel_height - 1) - 1) /
                                 stride_height +
                             1);
  int64_t out_width = floor(
      (in_width + 2 * padding_width - dilation_width * (kernel_width - 1) - 1) /
          stride_width +
      1);

  std::vector<int64_t> output_dims = {batch, num_output, out_height, out_width};
  OrtValue *output = ort_.KernelContext_GetOutput(
      context, 0, output_dims.data(), output_dims.size());
  float *out_ptr = ort_.GetTensorMutableData<float>(output);

  // allocate tmp memory
  int64_t column_len = (channels / group) * kernel_height * kernel_width *
                       out_height * out_width;
  float *columns = (float *)allocator_.Alloc(sizeof(float) * column_len);

  deformable_conv2d_ref_fp32(
      input_data, offset_data, mask_data, filter_data, bias_data, batch,
      channels, in_height, in_width, num_output, out_height, out_width, group,
      deformable_group, channels, num_output, kernel_height, kernel_width,
      stride_height, stride_width, padding_height, padding_width,
      dilation_height, dilation_width, columns, out_ptr);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/nms.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "nms.h"

#include <assert.h>

#include <algorithm>
#include <cmath>
#include <iostream>
#include <iterator>
#include <numeric>  // std::iota
#include <vector>

#include "../ort_mmcv_utils.h"

NmsKernel::NmsKernel(OrtApi api, const OrtKernelInfo *info)
    : api_(api), ort_(api_), info_(info) {
  iou_threshold_ = ort_.KernelInfoGetAttribute<float>(info, "iou_threshold");
  offset_ = ort_.KernelInfoGetAttribute<int64_t>(info, "offset");

  // create allocator
  allocator_ = Ort::AllocatorWithDefaultOptions();
}

void NmsKernel::Compute(OrtKernelContext *context) {
  const float iou_threshold = iou_threshold_;
  const int64_t offset = offset_;

  const OrtValue *boxes = ort_.KernelContext_GetInput(context, 0);
  const float *boxes_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(boxes));
  const OrtValue *scores = ort_.KernelContext_GetInput(context, 1);
  const float *scores_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(scores));

  OrtTensorDimensions boxes_dim(ort_, boxes);
  OrtTensorDimensions scores_dim(ort_, scores);

  int64_t nboxes = boxes_dim[0];
  assert(boxes_dim[1] == 4);

  // allocate tmp memory
  float *tmp_boxes = (float *)allocator_.Alloc(sizeof(float) * nboxes * 4);
  float *sc = (float *)allocator_.Alloc(sizeof(float) * nboxes);
  float *areas = (float *)allocator_.Alloc(sizeof(float) * nboxes);
  bool *select = (bool *)allocator_.Alloc(sizeof(bool) * nboxes);
  for (int64_t i = 0; i < nboxes; i++) {
    select[i] = true;
  }

  memcpy(tmp_boxes, boxes_data, sizeof(float) * nboxes * 4);
  memcpy(sc, scores_data, sizeof(float) * nboxes);

  // sort scores
  std::vector<float> tmp_sc;
  for (int i = 0; i < nboxes; i++) {
    tmp_sc.push_back(sc[i]);
  }
  std::vector<int64_t> order(tmp_sc.size());
  std::iota(order.begin(), order.end(), 0);
  std::sort(order.begin(), order.end(), [&tmp_sc](int64_t id1, int64_t id2) {
    return tmp_sc[id1] > tmp_sc[id2];
  });

  // area = (x2 - x1 + offset) * (y2 - y1 + offset)
  for (int64_t i = 0; i < nboxes; i++) {
    areas[i] = (tmp_boxes[i * 4 + 2] - tmp_boxes[i * 4 + 0] + offset) *
               (tmp_boxes[i * 4 + 3] - tmp_boxes[i * 4 + 1] + offset);
  }

  for (int64_t _i = 0; _i < nboxes; _i++) {
    if (select[_i] == false) continue;
    auto i = order[_i];
    auto ix1 = tmp_boxes[i * 4 + 0];
    auto iy1 = tmp_boxes[i * 4 + 1];
    auto ix2 = tmp_boxes[i * 4 + 2];
    auto iy2 = tmp_boxes[i * 4 + 3];
    auto iarea = areas[i];

    for (int64_t _j = _i + 1; _j < nboxes; _j++) {
      if (select[_j] == false) continue;
      auto j = order[_j];
      auto xx1 = std::max(ix1, tmp_boxes[j * 4 + 0]);
      auto yy1 = std::max(iy1, tmp_boxes[j * 4 + 1]);
      auto xx2 = std::min(ix2, tmp_boxes[j * 4 + 2]);
      auto yy2 = std::min(iy2, tmp_boxes[j * 4 + 3]);

      auto w = std::max(0.f, xx2 - xx1 + offset);
      auto h = std::max(0.f, yy2 - yy1 + offset);
      auto inter = w * h;
      auto ovr = inter / (iarea + areas[j] - inter);
      if (ovr > iou_threshold) select[_j] = false;
    }
  }
  std::vector<int64_t> res_order;
  for (int i = 0; i < nboxes; i++) {
    if (select[i]) {
      res_order.push_back(order[i]);
    }
  }

  std::vector<int64_t> inds_dims({res_order.size()});

  OrtValue *res = ort_.KernelContext_GetOutput(context, 0, inds_dims.data(),
                                               inds_dims.size());
  int64_t *res_data = ort_.GetTensorMutableData<int64_t>(res);

  memcpy(res_data, res_order.data(), sizeof(int64_t) * res_order.size());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "onnxruntime_register.h"

#include "corner_pool.h"
#include "deform_conv.h"
#include "grid_sample.h"
#include "modulated_deform_conv.h"
#include "nms.h"
#include "ort_mmcv_utils.h"
#include "reduce_ops.h"
#include "roi_align.h"
#include "roi_align_rotated.h"
#include "soft_nms.h"

const char *c_MMCVOpDomain = "mmcv";
SoftNmsOp c_SoftNmsOp;
NmsOp c_NmsOp;
MMCVRoiAlignCustomOp c_MMCVRoiAlignCustomOp;
MMCVRoIAlignRotatedCustomOp c_MMCVRoIAlignRotatedCustomOp;
GridSampleOp c_GridSampleOp;
MMCVCumMaxCustomOp c_MMCVCumMaxCustomOp;
MMCVCumMinCustomOp c_MMCVCumMinCustomOp;
MMCVCornerPoolCustomOp c_MMCVCornerPoolCustomOp;
MMCVModulatedDeformConvOp c_MMCVModulatedDeformConvOp;
MMCVDeformConvOp c_MMCVDeformConvOp;

OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options,
                                          const OrtApiBase *api) {
  OrtCustomOpDomain *domain = nullptr;
  const OrtApi *ortApi = api->GetApi(ORT_API_VERSION);

  if (auto status = ortApi->CreateCustomOpDomain(c_MMCVOpDomain, &domain)) {
    return status;
  }

  if (auto status = ortApi->CustomOpDomain_Add(domain, &c_SoftNmsOp)) {
    return status;
  }

  if (auto status = ortApi->CustomOpDomain_Add(domain, &c_NmsOp)) {
    return status;
  }

  if (auto status =
          ortApi->CustomOpDomain_Add(domain, &c_MMCVRoiAlignCustomOp)) {
    return status;
  }

  if (auto status =
          ortApi->CustomOpDomain_Add(domain, &c_MMCVRoIAlignRotatedCustomOp)) {
    return status;
  }

  if (auto status = ortApi->CustomOpDomain_Add(domain, &c_GridSampleOp)) {
    return status;
  }

  if (auto status =
          ortApi->CustomOpDomain_Add(domain, &c_MMCVCornerPoolCustomOp)) {
    return status;
  }

  if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVCumMaxCustomOp)) {
    return status;
  }

  if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVCumMinCustomOp)) {
    return status;
  }

  if (auto status =
          ortApi->CustomOpDomain_Add(domain, &c_MMCVModulatedDeformConvOp)) {
    return status;
  }

  if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVDeformConvOp)) {
    return status;
  }

  return ortApi->AddCustomOpDomain(options, domain);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/reduce_ops.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "reduce_ops.h"

#include <assert.h>

#include <vector>

#include "../ort_mmcv_utils.h"

// modified from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/ReduceOps.cpp

static inline int64_t maybe_wrap_dim(int64_t dim, int64_t ndims) {
  int64_t min = -ndims;
  int64_t max = ndims - 1;
  assert(dim >= min && dim <= max);
  if (dim < 0) dim += ndims;
  return dim;
}

static inline int64_t get_dim_stride(const int64_t dim, const int64_t ndims,
                                     const int64_t *reversed_dim_cumprod) {
  return dim == ndims - 1 ? 1 : reversed_dim_cumprod[dim + 1];
}

static inline int64_t get_dim_size(const int64_t dim, const int64_t ndims,
                                   const int64_t *reversed_dim_cumprod) {
  return dim == ndims - 1
             ? reversed_dim_cumprod[dim]
             : reversed_dim_cumprod[dim] / reversed_dim_cumprod[dim + 1];
}

template <typename T1, typename T2, typename Operation>
void cummax_cummin_helper(const T1 *input, T1 *output, T2 *indices,
                          const int64_t input_dim_size, const int64_t stride) {
  Operation op;
  T1 out = input[0];
  int64_t idx = 0;
  for (int64_t i = 0; i < input_dim_size; i++) {
    T1 curr_elem = input[i * stride];
    if (op(curr_elem, out)) {
      out = curr_elem;
      idx = i;
    }
    output[i * stride] = out;
    indices[i * stride] = idx;
  }
}

// modified `tensor_dim_apply3` from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/TensorDimApply.h.
// the difference is that: (1) use `reversed_dim_cumprod` for fast computing of
// tensor `size` and `stride`. (2) the same `stride` is used for input, output,
// and indices, since it's unnecessary to use separate values. currently
// `tensor_dim_apply3` is only used for `cummax` and `cummin`, according to the
// official pytorch projects: https://github.com/pytorch/pytorch.
template <typename T1, typename T2, typename Function>
void tensor_dim_apply3(const T1 *input, T1 *output, T2 *indices,
                       const int64_t dim, const int64_t ndims,
                       const int64_t *reversed_dim_cumprod, Function func) {
  int dim_apply_finished = 0;
  int64_t input_dim_size = get_dim_size(dim, ndims, reversed_dim_cumprod);
  // the same stride is used for input, output and indices
  int64_t stride = get_dim_stride(dim, ndims, reversed_dim_cumprod);
  std::vector<int64_t> counter(ndims, 0);

  while (!dim_apply_finished) {
    // call `func` once to update output and indices
    func(input, output, indices, input_dim_size, stride);
    if (ndims == 1) break;
    for (int64_t dim_i = 0; dim_i < ndims; dim_i++) {
      if (dim_i == dim) {
        if (dim_i == (ndims - 1)) {
          dim_apply_finished = 1;
          break;
        }
        continue;
      }
      counter[dim_i]++;

      // the same stride is used for input, output, and indices
      int64_t stride_dim_i = get_dim_stride(dim_i, ndims, reversed_dim_cumprod);
      input += stride_dim_i;
      output += stride_dim_i;
      indices += stride_dim_i;

      if (counter[dim_i] == get_dim_size(dim_i, ndims, reversed_dim_cumprod)) {
        if (dim_i == ndims - 1) {
          dim_apply_finished = 1;
          break;
        } else {
          input -= counter[dim_i] * stride_dim_i;
          output -= counter[dim_i] * stride_dim_i;
          indices -= counter[dim_i] * stride_dim_i;
          counter[dim_i] = 0;
        }
      } else {
        break;
      }  // if
    }    // for
  }      // while
}

template <typename T1, typename T2, typename Operation>
void CumMax_CumMin_CPU(const T1 *input, T1 *output, T2 *indices,
                       int64_t *reversed_dim_cumprod, const int64_t dim,
                       const OrtTensorDimensions &out_dimensions) {
  // calculate numel
  const int64_t ndims = out_dimensions.size();
  int64_t numel = 1;
  for (int64_t dim_i = 0; dim_i < ndims; dim_i++) {
    numel *= out_dimensions.data()[dim_i];
  }

  // cummax is only applied to input which is non-zero dim and non-empty
  if (numel) {
    // compute the cumulative production on dimension size,
    // which is then used for computing the stride or size of a specific `dim`.
    reversed_dim_cumprod[ndims - 1] = out_dimensions.data()[ndims - 1];
    for (int64_t dim_i = ndims - 2; dim_i >= 0; dim_i--) {
      reversed_dim_cumprod[dim_i] =
          reversed_dim_cumprod[dim_i + 1] * out_dimensions.data()[dim_i];
    }

    // do cummax or cummin based on `Operation` type
    tensor_dim_apply3<float, int64_t>(
        input, output, indices, dim, ndims, reversed_dim_cumprod,
        cummax_cummin_helper<float, int64_t, Operation>);
  }
}

void MMCVCumMaxKernel::Compute(OrtKernelContext *context) {
  // get input
  const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
  const float *input_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));

  // get output
  OrtTensorDimensions out_dimensions(ort_, input);
  OrtValue *output = ort_.KernelContext_GetOutput(
      context, 0, out_dimensions.data(), out_dimensions.size());
  float *output_data = ort_.GetTensorMutableData<float>(output);
  OrtValue *indices = ort_.KernelContext_GetOutput(
      context, 1, out_dimensions.data(), out_dimensions.size());
  int64_t *indices_data = ort_.GetTensorMutableData<int64_t>(indices);

  // allocate tmp memory for computing the cumulative production on dimension
  // size
  const int64_t ndims = out_dimensions.size();
  assert(ndims > 0);
  int64_t *reversed_dim_cumprod =
      (int64_t *)allocator_.Alloc(sizeof(int64_t) * ndims);

  // dim should be wrapped if it's negative (e.g. -1)
  const int64_t dim = maybe_wrap_dim(dim_, ndims);
  CumMax_CumMin_CPU<float, int64_t, std::greater_equal<float>>(
      input_data, output_data, indices_data, reversed_dim_cumprod, dim,
      out_dimensions);
}

void MMCVCumMinKernel::Compute(OrtKernelContext *context) {
  // get input
  const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
  const float *input_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));

  // get output
  OrtTensorDimensions out_dimensions(ort_, input);
  OrtValue *output = ort_.KernelContext_GetOutput(
      context, 0, out_dimensions.data(), out_dimensions.size());
  float *output_data = ort_.GetTensorMutableData<float>(output);
  OrtValue *indices = ort_.KernelContext_GetOutput(
      context, 1, out_dimensions.data(), out_dimensions.size());
  int64_t *indices_data = ort_.GetTensorMutableData<int64_t>(indices);

  // allocate tmp memory for computing the cumulative production on dimension
  // size
  const int64_t ndims = out_dimensions.size();
  assert(ndims > 0);
  int64_t *reversed_dim_cumprod =
      (int64_t *)allocator_.Alloc(sizeof(int64_t) * ndims);

  // dim should be wrapped if it's negative (e.g. -1)
  const int64_t dim = maybe_wrap_dim(dim_, ndims);
  CumMax_CumMin_CPU<float, int64_t, std::less_equal<float>>(
      input_data, output_data, indices_data, reversed_dim_cumprod, dim,
      out_dimensions);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/roi_align.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "roi_align.h"

#include "../ort_mmcv_utils.h"

// implementation taken from Caffe2
struct PreCalc {
  int pos1;
  int pos2;
  int pos3;
  int pos4;
  float w1;
  float w2;
  float w3;
  float w4;
};

void pre_calc_for_bilinear_interpolate(
    const int height, const int width, const int pooled_height,
    const int pooled_width, const int iy_upper, const int ix_upper,
    float roi_start_h, float roi_start_w, float bin_size_h, float bin_size_w,
    int roi_bin_grid_h, int roi_bin_grid_w, std::vector<PreCalc> &pre_calc) {
  int pre_calc_index = 0;
  for (int ph = 0; ph < pooled_height; ph++) {
    for (int pw = 0; pw < pooled_width; pw++) {
      for (int iy = 0; iy < iy_upper; iy++) {
        const float yy =
            roi_start_h + ph * bin_size_h +
            static_cast<float>(iy + .5f) * bin_size_h /
                static_cast<float>(roi_bin_grid_h);  // e.g., 0.5, 1.5
        for (int ix = 0; ix < ix_upper; ix++) {
          const float xx = roi_start_w + pw * bin_size_w +
                           static_cast<float>(ix + .5f) * bin_size_w /
                               static_cast<float>(roi_bin_grid_w);

          float x = xx;
          float y = yy;
          // deal with: inverse elements are out of feature map boundary
          if (y < -1.0 || y > height || x < -1.0 || x > width) {
            // empty
            PreCalc pc;
            pc.pos1 = 0;
            pc.pos2 = 0;
            pc.pos3 = 0;
            pc.pos4 = 0;
            pc.w1 = 0;
            pc.w2 = 0;
            pc.w3 = 0;
            pc.w4 = 0;
            pre_calc[pre_calc_index] = pc;
            pre_calc_index += 1;
            continue;
          }

          if (y <= 0) {
            y = 0;
          }
          if (x <= 0) {
            x = 0;
          }

          int y_low = (int)y;
          int x_low = (int)x;
          int y_high;
          int x_high;

          if (y_low >= height - 1) {
            y_high = y_low = height - 1;
            y = (float)y_low;
          } else {
            y_high = y_low + 1;
          }

          if (x_low >= width - 1) {
            x_high = x_low = width - 1;
            x = (float)x_low;
          } else {
            x_high = x_low + 1;
          }

          float ly = y - y_low;
          float lx = x - x_low;
          float hy = 1. - ly, hx = 1. - lx;
          float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

          // save weights and indices
          PreCalc pc;
          pc.pos1 = y_low * width + x_low;
          pc.pos2 = y_low * width + x_high;
          pc.pos3 = y_high * width + x_low;
          pc.pos4 = y_high * width + x_high;
          pc.w1 = w1;
          pc.w2 = w2;
          pc.w3 = w3;
          pc.w4 = w4;
          pre_calc[pre_calc_index] = pc;

          pre_calc_index += 1;
        }
      }
    }
  }
}

void ROIAlignForwardCPU(const int nthreads, const float *input,
                        const float *rois, float *output, float *argmax_y,
                        float *argmax_x, const int pooled_height,
                        const int pooled_width, const float spatial_scale,
                        const int sampling_ratio,
                        const int pool_mode,  // 0 - max pool, 1 - avg pool
                        const bool aligned, const int channels,
                        const int height, const int width) {
  int n_rois = nthreads / channels / pooled_width / pooled_height;
  // (n, c, ph, pw) is an element in the pooled output
  // can be parallelized using omp
  // #pragma omp parallel for num_threads(32)
  for (int n = 0; n < n_rois; n++) {
    int index_n = n * channels * pooled_width * pooled_height;

    const float *offset_rois = rois + n * 5;
    int roi_batch_ind = offset_rois[0];

    // Do not use rounding; this implementation detail is critical
    float offset = aligned ? (float)0.5 : (float)0.0;
    float roi_start_w = offset_rois[1] * spatial_scale - offset;
    float roi_start_h = offset_rois[2] * spatial_scale - offset;
    float roi_end_w = offset_rois[3] * spatial_scale - offset;
    float roi_end_h = offset_rois[4] * spatial_scale - offset;

    float roi_width = roi_end_w - roi_start_w;
    float roi_height = roi_end_h - roi_start_h;
    if (aligned) {
      /*AT_ASSERTM(roi_width >= 0 && roi_height >= 0,
                 "ROIs in ROIAlign cannot have non-negative size!");*/
      assert(roi_width >= 0 && roi_height >= 0);
    } else {  // for backward-compatibility only
      roi_width = std::max(roi_width, (float)1.);
      roi_height = std::max(roi_height, (float)1.);
    }
    float bin_size_h =
        static_cast<float>(roi_height) / static_cast<float>(pooled_height);
    float bin_size_w =
        static_cast<float>(roi_width) / static_cast<float>(pooled_width);

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h = (sampling_ratio > 0)
                             ? sampling_ratio
                             : ceil(roi_height / pooled_height);  // e.g., = 2
    int roi_bin_grid_w =
        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);

    // When the grid is empty, output zeros == 0/1, instead of NaN.
    const float count =
        std::max(roi_bin_grid_h * roi_bin_grid_w, 1);  // e.g. = 4

    // we want to precalculate indices and weights shared by all channels,
    // this is the key point of optimization
    std::vector<PreCalc> pre_calc(roi_bin_grid_h * roi_bin_grid_w *
                                  pooled_width * pooled_height);
    pre_calc_for_bilinear_interpolate(
        height, width, pooled_height, pooled_width, roi_bin_grid_h,
        roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w,
        roi_bin_grid_h, roi_bin_grid_w, pre_calc);

    for (int c = 0; c < channels; c++) {
      int index_n_c = index_n + c * pooled_width * pooled_height;
      const float *offset_input =
          input + (roi_batch_ind * channels + c) * height * width;
      int pre_calc_index = 0;

      for (int ph = 0; ph < pooled_height; ph++) {
        for (int pw = 0; pw < pooled_width; pw++) {
          int index = index_n_c + ph * pooled_width + pw;

          float output_val = 0.;
          float maxval = -10000;
          float maxidx_y = -1.f, maxidx_x = -1.f;
          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
            const float y = roi_start_h + ph * bin_size_h +
                            static_cast<float>(iy + .5f) * bin_size_h /
                                static_cast<float>(roi_bin_grid_h);
            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
              const float x = roi_start_w + pw * bin_size_w +
                              static_cast<float>(ix + .5f) * bin_size_w /
                                  static_cast<float>(roi_bin_grid_w);
              PreCalc pc = pre_calc[pre_calc_index];
              float val = pc.w1 * offset_input[pc.pos1] +
                          pc.w2 * offset_input[pc.pos2] +
                          pc.w3 * offset_input[pc.pos3] +
                          pc.w4 * offset_input[pc.pos4];
              if (val > maxval) {
                maxval = val;
                maxidx_y = y;
                maxidx_x = x;
              }
              output_val += val;
              pre_calc_index += 1;
            }
          }
          if (pool_mode == 0) {
            // We do max pooling inside a bin
            output[index] = maxval;
            argmax_y[index] = maxidx_y;
            argmax_x[index] = maxidx_x;
          } else if (pool_mode == 1) {
            // We do average (integral) pooling inside a bin
            output[index] = output_val / count;
          }  // if
        }    // for pw
      }      // for ph
    }        // for c
  }          // for n
}

void MMCVRoiAlignKernel::Compute(OrtKernelContext *context) {
  // Setup inputs
  const OrtValue *input_X = ort_.KernelContext_GetInput(context, 0);
  const float *X_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(input_X));
  const OrtValue *input_rois = ort_.KernelContext_GetInput(context, 1);
  const float *rois = reinterpret_cast<const float *>(
      ort_.GetTensorData<const float *>(input_rois));

  // Setup output
  OrtTensorDimensions out_dimensions(ort_, input_X);
  OrtTensorDimensions roi_dimensions(ort_, input_rois);

  int batch_size = out_dimensions.data()[0];
  int input_channels = out_dimensions.data()[1];
  int input_height = out_dimensions.data()[2];
  int input_width = out_dimensions.data()[3];

  out_dimensions.data()[0] = roi_dimensions.data()[0];
  out_dimensions.data()[2] = aligned_height_;
  out_dimensions.data()[3] = aligned_width_;

  OrtValue *output = ort_.KernelContext_GetOutput(
      context, 0, out_dimensions.data(), out_dimensions.size());
  float *out = ort_.GetTensorMutableData<float>(output);
  OrtTensorTypeAndShapeInfo *output_info = ort_.GetTensorTypeAndShape(output);
  ort_.ReleaseTensorTypeAndShapeInfo(output_info);

  // TODO: forward here
  int output_size = out_dimensions.data()[0];
  for (auto i = 1; i < out_dimensions.size(); ++i) {
    output_size *= out_dimensions.data()[i];
  }

  int poolMod = 1;
  if (pool_mode_ == "max") poolMod = 0;

  float *argmax_x = nullptr, *argmax_y = nullptr;
  if (poolMod == 0) {
    argmax_y = new float[output_size];
    argmax_x = new float[output_size];
  }

  ROIAlignForwardCPU(output_size, X_data, rois, out, argmax_y, argmax_x,
                     aligned_height_, aligned_width_, spatial_scale_,
                     sampling_ratio_, poolMod, aligned_, input_channels,
                     input_height, input_width);

  if (argmax_x) delete argmax_x;
  if (argmax_y) delete argmax_y;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/roi_align_rotated.cpp
================================================
// Modified from
// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlignRotated
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
#include "roi_align_rotated.h"

#include "../ort_mmcv_utils.h"

struct PreCalc {
  int pos1;
  int pos2;
  int pos3;
  int pos4;
  float w1;
  float w2;
  float w3;
  float w4;
};

void pre_calc_for_bilinear_interpolate(
    const int height, const int width, const int pooled_height,
    const int pooled_width, const int iy_upper, const int ix_upper,
    float roi_start_h, float roi_start_w, float bin_size_h, float bin_size_w,
    int roi_bin_grid_h, int roi_bin_grid_w, float roi_center_h,
    float roi_center_w, float cos_theta, float sin_theta,
    std::vector<PreCalc> &pre_calc) {
  int pre_calc_index = 0;
  for (int ph = 0; ph < pooled_height; ph++) {
    for (int pw = 0; pw < pooled_width; pw++) {
      for (int iy = 0; iy < iy_upper; iy++) {
        const float yy =
            roi_start_h + ph * bin_size_h +
            static_cast<float>(iy + .5f) * bin_size_h /
                static_cast<float>(roi_bin_grid_h);  // e.g., 0.5, 1.5
        for (int ix = 0; ix < ix_upper; ix++) {
          const float xx = roi_start_w + pw * bin_size_w +
                           static_cast<float>(ix + .5f) * bin_size_w /
                               static_cast<float>(roi_bin_grid_w);

          // Rotate by theta around the center and translate
          // In image space, (y, x) is the order for Right Handed System,
          // and this is essentially multiplying the point by a rotation matrix
          // to rotate it counterclockwise through angle theta.
          float y = yy * cos_theta - xx * sin_theta + roi_center_h;
          float x = yy * sin_theta + xx * cos_theta + roi_center_w;
          // deal with: inverse elements are out of feature map boundary
          if (y < -1.0 || y > height || x < -1.0 || x > width) {
            // empty
            PreCalc pc;
            pc.pos1 = 0;
            pc.pos2 = 0;
            pc.pos3 = 0;
            pc.pos4 = 0;
            pc.w1 = 0;
            pc.w2 = 0;
            pc.w3 = 0;
            pc.w4 = 0;
            pre_calc[pre_calc_index] = pc;
            pre_calc_index += 1;
            continue;
          }

          if (y < 0) {
            y = 0;
          }
          if (x < 0) {
            x = 0;
          }

          int y_low = (int)y;
          int x_low = (int)x;
          int y_high;
          int x_high;

          if (y_low >= height - 1) {
            y_high = y_low = height - 1;
            y = (float)y_low;
          } else {
            y_high = y_low + 1;
          }

          if (x_low >= width - 1) {
            x_high = x_low = width - 1;
            x = (float)x_low;
          } else {
            x_high = x_low + 1;
          }

          float ly = y - y_low;
          float lx = x - x_low;
          float hy = 1. - ly, hx = 1. - lx;
          float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

          // save weights and indices
          PreCalc pc;
          pc.pos1 = y_low * width + x_low;
          pc.pos2 = y_low * width + x_high;
          pc.pos3 = y_high * width + x_low;
          pc.pos4 = y_high * width + x_high;
          pc.w1 = w1;
          pc.w2 = w2;
          pc.w3 = w3;
          pc.w4 = w4;
          pre_calc[pre_calc_index] = pc;

          pre_calc_index += 1;
        }
      }
    }
  }
}

void ROIAlignRotatedForwardCPU(const int nthreads, const float *input,
                               const float *rois, float *output,
                               const float &spatial_scale, const int aligned,
                               const int clockwise, const int channels,
                               const int height, const int width,
                               const int pooled_height, const int pooled_width,
                               const int sampling_ratio) {
  int n_rois = nthreads / channels / pooled_width / pooled_height;
  // (n, c, ph, pw) is an element in the pooled output
  // can be parallelized using omp
  // #pragma omp parallel for num_threads(32)
  for (int n = 0; n < n_rois; n++) {
    int index_n = n * channels * pooled_width * pooled_height;

    const float *current_roi = rois + n * 6;
    int roi_batch_ind = current_roi[0];

    // Do not use rounding; this implementation detail is critical
    float offset = aligned ? (float)0.5 : (float)0.0;
    float roi_center_w = current_roi[1] * spatial_scale - offset;
    float roi_center_h = current_roi[2] * spatial_scale - offset;
    float roi_width = current_roi[3] * spatial_scale;
    float roi_height = current_roi[4] * spatial_scale;
    // float theta = current_roi[5] * M_PI / 180.0;
    float theta = current_roi[5];  // Radian angle by default
    if (clockwise) {
      theta = -theta;
    }
    float cos_theta = cos(theta);
    float sin_theta = sin(theta);
    if (!aligned) {  // for backward-compatibility only
      roi_width = std::max(roi_width, (float)1.);
      roi_height = std::max(roi_height, (float)1.);
    }

    float bin_size_h =
        static_cast<float>(roi_height) / static_cast<float>(pooled_height);
    float bin_size_w =
        static_cast<float>(roi_width) / static_cast<float>(pooled_width);

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h = (sampling_ratio > 0)
                             ? sampling_ratio
                             : ceil(roi_height / pooled_height);  // e.g., = 2
    int roi_bin_grid_w =
        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);

    // We do average (integral) pooling inside a bin
    const float count =
        std::max(roi_bin_grid_h * roi_bin_grid_w, 1);  // e.g. = 4

    // we want to precalculate indices and weights shared by all channels,
    // this is the key point of optimization
    std::vector<PreCalc> pre_calc(roi_bin_grid_h * roi_bin_grid_w *
                                  pooled_width * pooled_height);

    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
    // Appropriate translation needs to be applied after.
    float roi_start_h = -roi_height / 2.0;
    float roi_start_w = -roi_width / 2.0;

    pre_calc_for_bilinear_interpolate(
        height, width, pooled_height, pooled_width, roi_bin_grid_h,
        roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w,
        roi_bin_grid_h, roi_bin_grid_w, roi_center_h, roi_center_w, cos_theta,
        sin_theta, pre_calc);

    for (int c = 0; c < channels; c++) {
      int index_n_c = index_n + c * pooled_width * pooled_height;
      const float *offset_input =
          input + (roi_batch_ind * channels + c) * height * width;
      int pre_calc_index = 0;

      for (int ph = 0; ph < pooled_height; ph++) {
        for (int pw = 0; pw < pooled_width; pw++) {
          int index = index_n_c + ph * pooled_width + pw;

          float output_val = 0.;
          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
              PreCalc pc = pre_calc[pre_calc_index];
              output_val += pc.w1 * offset_input[pc.pos1] +
                            pc.w2 * offset_input[pc.pos2] +
                            pc.w3 * offset_input[pc.pos3] +
                            pc.w4 * offset_input[pc.pos4];

              pre_calc_index += 1;
            }
          }
          output_val /= count;

          output[index] = output_val;
        }  // for pw
      }    // for ph
    }      // for c
  }        // for n
}

void MMCVRoIAlignRotatedKernel::Compute(OrtKernelContext *context) {
  // Setup inputs
  const OrtValue *input_X = ort_.KernelContext_GetInput(context, 0);
  const float *X_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<float>(input_X));
  const OrtValue *input_rois = ort_.KernelContext_GetInput(context, 1);
  const float *rois = reinterpret_cast<const float *>(
      ort_.GetTensorData<const float *>(input_rois));

  // Setup output
  OrtTensorDimensions out_dimensions(ort_, input_X);
  OrtTensorDimensions roi_dimensions(ort_, input_rois);

  int batch_size = out_dimensions.data()[0];
  int input_channels = out_dimensions.data()[1];
  int input_height = out_dimensions.data()[2];
  int input_width = out_dimensions.data()[3];

  out_dimensions.data()[0] = roi_dimensions.data()[0];
  out_dimensions.data()[2] = aligned_height_;
  out_dimensions.data()[3] = aligned_width_;

  OrtValue *output = ort_.KernelContext_GetOutput(
      context, 0, out_dimensions.data(), out_dimensions.size());
  float *out = ort_.GetTensorMutableData<float>(output);
  OrtTensorTypeAndShapeInfo *output_info = ort_.GetTensorTypeAndShape(output);
  ort_.ReleaseTensorTypeAndShapeInfo(output_info);

  // TODO: forward here
  int output_size = out_dimensions.data()[0];
  for (auto i = 1; i < out_dimensions.size(); ++i) {
    output_size *= out_dimensions.data()[i];
  }
  ROIAlignRotatedForwardCPU(output_size, X_data, rois, out, spatial_scale_,
                            aligned_, clockwise_, input_channels, input_height,
                            input_width, aligned_height_, aligned_width_,
                            sampling_ratio_);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/cpu/soft_nms.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "soft_nms.h"

#include <assert.h>

#include <algorithm>
#include <cmath>

#include "../ort_mmcv_utils.h"

SoftNmsKernel::SoftNmsKernel(OrtApi api, const OrtKernelInfo *info)
    : api_(api), ort_(api_), info_(info) {
  iou_threshold_ = ort_.KernelInfoGetAttribute<float>(info, "iou_threshold");
  sigma_ = ort_.KernelInfoGetAttribute<float>(info, "sigma");
  min_score_ = ort_.KernelInfoGetAttribute<float>(info, "min_score");
  method_ = ort_.KernelInfoGetAttribute<int64_t>(info, "method");
  offset_ = ort_.KernelInfoGetAttribute<int64_t>(info, "offset");

  // create allocator
  allocator_ = Ort::AllocatorWithDefaultOptions();
}

void SoftNmsKernel::Compute(OrtKernelContext *context) {
  typedef float T;

  const T iou_threshold = T(iou_threshold_);
  const T sigma = T(sigma_);
  const T min_score = T(min_score_);
  const int method = int(method_);
  const T offset = T(offset_);

  const OrtValue *boxes = ort_.KernelContext_GetInput(context, 0);
  const T *boxes_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<T>(boxes));
  const OrtValue *scores = ort_.KernelContext_GetInput(context, 1);
  const T *scores_data =
      reinterpret_cast<const float *>(ort_.GetTensorData<T>(scores));

  OrtTensorDimensions boxes_dim(ort_, boxes);
  OrtTensorDimensions scores_dim(ort_, scores);

  int64_t nboxes = boxes_dim[0];
  assert(boxes_dim[1] == 4);

  // allocate tmp memory
  T *tmp_boxes = (T *)allocator_.Alloc(sizeof(T) * nboxes * 4);
  T *x1 = tmp_boxes;
  T *y1 = tmp_boxes + 1;
  T *x2 = tmp_boxes + 2;
  T *y2 = tmp_boxes + 3;
  T *sc = (T *)allocator_.Alloc(sizeof(T) * nboxes);
  T *areas = (T *)allocator_.Alloc(sizeof(T) * nboxes);
  T *de = (T *)allocator_.Alloc(sizeof(T) * nboxes * 5);
  int64_t *inds = (int64_t *)allocator_.Alloc(sizeof(int64_t) * nboxes);

  memcpy(tmp_boxes, boxes_data, sizeof(T) * nboxes * 4);
  memcpy(sc, scores_data, sizeof(T) * nboxes);

  // init inds as arange(nboxes)
  std::generate(inds, inds + nboxes, [n = 0]() mutable { return n++; });

  // area = (x2-x1+offset)*(y2-y1+offset)
  for (int64_t i = 0; i < nboxes; i++) {
    areas[i] =
        (x2[i * 4] - x1[i * 4] + offset) * (y2[i * 4] - y1[i * 4] + offset);
  }

  int64_t pos = 0;

  for (int64_t i = 0; i < nboxes; i++) {
    auto max_score = sc[i];
    auto max_pos = i;

    pos = i + 1;
    // get max box
    while (pos < nboxes) {
      if (max_score < sc[pos]) {
        max_score = sc[pos];
        max_pos = pos;
      }
      pos = pos + 1;
    }
    // swap
    auto ix1 = de[i * 5 + 0] = x1[max_pos * 4];
    auto iy1 = de[i * 5 + 1] = y1[max_pos * 4];
    auto ix2 = de[i * 5 + 2] = x2[max_pos * 4];
    auto iy2 = de[i * 5 + 3] = y2[max_pos * 4];
    auto iscore = de[i * 5 + 4] = sc[max_pos];
    auto iarea = areas[max_pos];
    auto iind = inds[max_pos];
    x1[max_pos * 4] = x1[i * 4];
    y1[max_pos * 4] = y1[i * 4];
    x2[max_pos * 4] = x2[i * 4];
    y2[max_pos * 4] = y2[i * 4];
    sc[max_pos] = sc[i];
    areas[max_pos] = areas[i];
    inds[max_pos] = inds[i];
    x1[i * 4] = ix1;
    y1[i * 4] = iy1;
    x2[i * 4] = ix2;
    y2[i * 4] = iy2;
    sc[i] = iscore;
    areas[i] = iarea;
    inds[i] = iind;

    pos = i + 1;
    while (pos < nboxes) {
      auto xx1 = std::max(ix1, x1[pos * 4]);
      auto yy1 = std::max(iy1, y1[pos * 4]);
      auto xx2 = std::min(ix2, x2[pos * 4]);
      auto yy2 = std::min(iy2, y2[pos * 4]);

      auto w = std::max(0.f, xx2 - xx1 + offset);
      auto h = std::max(0.f, yy2 - yy1 + offset);
      auto inter = w * h;
      auto ovr = inter / (iarea + areas[pos] - inter);

      float weight = 1.;
      if (method == 0) {
        if (ovr >= iou_threshold) weight = 0;
      } else if (method == 1) {
        if (ovr >= iou_threshold) weight = 1 - ovr;
      } else if (method == 2) {
        weight = std::exp(-(ovr * ovr) / sigma);
      }
      sc[pos] *= weight;
      // if box score falls below threshold, discard the box by
      // swapping with last box update N
      if (sc[pos] < min_score) {
        x1[pos * 4] = x1[(nboxes - 1) * 4];
        y1[pos * 4] = y1[(nboxes - 1) * 4];
        x2[pos * 4] = x2[(nboxes - 1) * 4];
        y2[pos * 4] = y2[(nboxes - 1) * 4];
        sc[pos] = sc[nboxes - 1];
        areas[pos] = areas[nboxes - 1];
        inds[pos] = inds[nboxes - 1];
        nboxes = nboxes - 1;
        pos = pos - 1;
      }
      pos = pos + 1;
    }
  }

  std::vector<int64_t> dets_dim({nboxes, 5});
  OrtValue *dets = ort_.KernelContext_GetOutput(context, 0, dets_dim.data(),
                                                dets_dim.size());
  T *dets_data = ort_.GetTensorMutableData<T>(dets);

  std::vector<int64_t> inds_dim({nboxes});
  OrtValue *inds_ov = ort_.KernelContext_GetOutput(context, 1, inds_dim.data(),
                                                   inds_dim.size());
  int64_t *inds_data = ort_.GetTensorMutableData<int64_t>(inds_ov);

  memcpy(dets_data, de, sizeof(T) * nboxes * 5);
  memcpy(inds_data, inds, sizeof(int64_t) * nboxes);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/deform_conv.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_DEFORM_CONV_H
#define ONNXRUNTIME_DEFORM_CONV_H

#include <onnxruntime_cxx_api.h>

struct MMCVDeformConvKernel {
  MMCVDeformConvKernel(OrtApi api, const OrtKernelInfo *info);

  void Compute(OrtKernelContext *context);

 protected:
  OrtApi api_;
  Ort::CustomOpApi ort_;
  const OrtKernelInfo *info_;
  Ort::AllocatorWithDefaultOptions allocator_;

  int64_t stride_height_;
  int64_t stride_width_;
  int64_t padding_height_;
  int64_t padding_width_;
  int64_t dilation_height_;
  int64_t dilation_width_;
  int64_t deformable_group_;
  int64_t group_;
  int64_t im2col_step_;
};

struct MMCVDeformConvOp
    : Ort::CustomOpBase<MMCVDeformConvOp, MMCVDeformConvKernel> {
  void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const {
    return new MMCVDeformConvKernel(api, info);
  }

  const char *GetName() const { return "MMCVDeformConv2d"; };

  size_t GetInputTypeCount() const { return 3; };
  ONNXTensorElementDataType GetInputType(size_t /*index*/) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  OrtCustomOpInputOutputCharacteristic GetInputCharacteristic(
      size_t index) const {
    return OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_REQUIRED;
  }

  size_t GetOutputTypeCount() const { return 1; };
  ONNXTensorElementDataType GetOutputType(size_t /*index*/) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  // force cpu
  const char *GetExecutionProviderType() const {
    return "CPUExecutionProvider";
  };
};
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/grid_sample.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_GRIDSAMPLE_H
#define ONNXRUNTIME_GRIDSAMPLE_H

#include <onnxruntime_cxx_api.h>

struct GridSampleKernel {
  GridSampleKernel(OrtApi api, const OrtKernelInfo *info);

  void Compute(OrtKernelContext *context);

 protected:
  OrtApi api_;
  Ort::CustomOpApi ort_;
  const OrtKernelInfo *info_;
  Ort::AllocatorWithDefaultOptions allocator_;

  int64_t align_corners_;
  int64_t interpolation_mode_;
  int64_t padding_mode_;
};

struct GridSampleOp : Ort::CustomOpBase<GridSampleOp, GridSampleKernel> {
  void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const {
    return new GridSampleKernel(api, info);
  };

  const char *GetName() const { return "grid_sampler"; };

  size_t GetInputTypeCount() const { return 2; };
  ONNXTensorElementDataType GetInputType(size_t /*index*/) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  size_t GetOutputTypeCount() const { return 1; };
  ONNXTensorElementDataType GetOutputType(size_t /*index*/) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  const char *GetExecutionProviderType() const {
    return "CPUExecutionProvider";
  };
};
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/modulated_deform_conv.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_MODULATED_DEFORM_CONV_H
#define ONNXRUNTIME_MODULATED_DEFORM_CONV_H

#include <onnxruntime_cxx_api.h>

struct MMCVModulatedDeformConvKernel {
  MMCVModulatedDeformConvKernel(OrtApi api, const OrtKernelInfo *info);

  void Compute(OrtKernelContext *context);

 protected:
  OrtApi api_;
  Ort::CustomOpApi ort_;
  const OrtKernelInfo *info_;
  Ort::AllocatorWithDefaultOptions allocator_;

  int64_t stride_height_;
  int64_t stride_width_;
  int64_t padding_height_;
  int64_t padding_width_;
  int64_t dilation_height_;
  int64_t dilation_width_;
  int64_t deformable_group_;
  int64_t group_;
};

struct MMCVModulatedDeformConvOp
    : Ort::CustomOpBase<MMCVModulatedDeformConvOp,
                        MMCVModulatedDeformConvKernel> {
  void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const {
    return new MMCVModulatedDeformConvKernel(api, info);
  }

  const char *GetName() const { return "MMCVModulatedDeformConv2d"; };

  size_t GetInputTypeCount() const { return 5; };
  ONNXTensorElementDataType GetInputType(size_t /*index*/) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  OrtCustomOpInputOutputCharacteristic GetInputCharacteristic(
      size_t index) const {
    // The last input (index == 4) is optional, which is bias
    if (index == 4)
      return OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_OPTIONAL;

    return OrtCustomOpInputOutputCharacteristic::INPUT_OUTPUT_REQUIRED;
  }

  size_t GetOutputTypeCount() const { return 1; };
  ONNXTensorElementDataType GetOutputType(size_t /*index*/) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  // force cpu
  const char *GetExecutionProviderType() const {
    return "CPUExecutionProvider";
  };
};
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/nms.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_NMS_H
#define ONNXRUNTIME_NMS_H

#include <onnxruntime_cxx_api.h>

struct NmsKernel {
  NmsKernel(OrtApi api, const OrtKernelInfo *info);

  void Compute(OrtKernelContext *context);

 protected:
  OrtApi api_;
  Ort::CustomOpApi ort_;
  const OrtKernelInfo *info_;
  Ort::AllocatorWithDefaultOptions allocator_;

  float iou_threshold_;
  int64_t offset_;
};

struct NmsOp : Ort::CustomOpBase<NmsOp, NmsKernel> {
  void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const {
    return new NmsKernel(api, info);
  };

  const char *GetName() const { return "NonMaxSuppression"; };

  size_t GetInputTypeCount() const { return 2; };
  ONNXTensorElementDataType GetInputType(size_t /*index*/) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  size_t GetOutputTypeCount() const { return 1; };
  ONNXTensorElementDataType GetOutputType(size_t index) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
  }

  // force cpu
  const char *GetExecutionProviderType() const {
    return "CPUExecutionProvider";
  }
};

#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/onnxruntime_register.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_REGISTER_H
#define ONNXRUNTIME_REGISTER_H
#include <onnxruntime_c_api.h>

#ifdef __cplusplus
extern "C" {
#endif

OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options,
                                          const OrtApiBase *api);

#ifdef __cplusplus
}
#endif
#endif  // ONNXRUNTIME_REGISTER_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/onnxruntime_session_options_config_keys.h
================================================
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#ifndef ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H
#define ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H

/*
 * This file defines SessionOptions Config Keys and format of the Config Values.
 *
 * The Naming Convention for a SessionOptions Config Key,
 * "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
 * Such as "ep.cuda.use_arena"
 * The Config Key cannot be empty
 * The maximum length of the Config Key is 128
 *
 * The string format of a SessionOptions Config Value is defined individually
 * for each Config. The maximum length of the Config Value is 1024
 */

// Key for disable PrePacking,
// If the config value is set to "1" then the prepacking is disabled, otherwise
// prepacking is enabled (default value)
static const char* const kOrtSessionOptionsConfigDisablePrepacking =
    "session.disable_prepacking";

// A value of "1" means allocators registered in the env will be used. "0" means
// the allocators created in the session will be used. Use this to override the
// usage of env allocators on a per session level.
static const char* const kOrtSessionOptionsConfigUseEnvAllocators =
    "session.use_env_allocators";

// Set to 'ORT' (case sensitive) to load an ORT format model.
// If unset, model type will default to ONNX unless inferred from filename
// ('.ort' == ORT format) or bytes to be ORT
static const char* const kOrtSessionOptionsConfigLoadModelFormat =
    "session.load_model_format";

// Set to 'ORT' (case sensitive) to save optimized model in ORT format when
// SessionOptions.optimized_model_path is set. If unset, format will default to
// ONNX unless optimized_model_filepath ends in '.ort'.
static const char* const kOrtSessionOptionsConfigSaveModelFormat =
    "session.save_model_format";

#endif  // ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ORT_MMCV_UTILS_H
#define ORT_MMCV_UTILS_H
#include <onnxruntime_cxx_api.h>

#include <vector>

struct OrtTensorDimensions : std::vector<int64_t> {
  OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) {
    OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value);
    std::vector<int64_t>::operator=(ort.GetTensorShape(info));
    ort.ReleaseTensorTypeAndShapeInfo(info);
  }
};
#endif  // ORT_MMCV_UTILS_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/reduce_ops.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_REDUCE_OPS_H
#define ONNXRUNTIME_REDUCE_OPS_H

#include <onnxruntime_cxx_api.h>

struct MMCVCumMaxKernel {
 public:
  MMCVCumMaxKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info)
      : ort_(ort) {
    dim_ = ort_.KernelInfoGetAttribute<int64_t>(info, "dim");

    // create allocator
    allocator_ = Ort::AllocatorWithDefaultOptions();
  }

  void Compute(OrtKernelContext* context);

 private:
  Ort::CustomOpApi ort_;
  Ort::AllocatorWithDefaultOptions allocator_;

  int64_t dim_;
};

struct MMCVCumMinKernel {
 public:
  MMCVCumMinKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info)
      : ort_(ort) {
    dim_ = ort_.KernelInfoGetAttribute<int64_t>(info, "dim");

    // create allocator
    allocator_ = Ort::AllocatorWithDefaultOptions();
  }

  void Compute(OrtKernelContext* context);

 private:
  Ort::CustomOpApi ort_;
  Ort::AllocatorWithDefaultOptions allocator_;

  int64_t dim_;
};

struct MMCVCumMaxCustomOp
    : Ort::CustomOpBase<MMCVCumMaxCustomOp, MMCVCumMaxKernel> {
  void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const {
    return new MMCVCumMaxKernel(api, info);
  }

  const char* GetName() const { return "cummax"; }

  size_t GetInputTypeCount() const { return 1; }
  ONNXTensorElementDataType GetInputType(size_t) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  size_t GetOutputTypeCount() const { return 2; }
  ONNXTensorElementDataType GetOutputType(size_t index) const {
    if (index == 1) return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  // force cpu
  const char* GetExecutionProviderType() const {
    return "CPUExecutionProvider";
  };
};

struct MMCVCumMinCustomOp
    : Ort::CustomOpBase<MMCVCumMinCustomOp, MMCVCumMinKernel> {
  void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const {
    return new MMCVCumMinKernel(api, info);
  }

  const char* GetName() const { return "cummin"; }

  size_t GetInputTypeCount() const { return 1; }
  ONNXTensorElementDataType GetInputType(size_t) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  size_t GetOutputTypeCount() const { return 2; }
  ONNXTensorElementDataType GetOutputType(size_t index) const {
    if (index == 1) return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  // force cpu
  const char* GetExecutionProviderType() const {
    return "CPUExecutionProvider";
  };
};

#endif  // ONNXRUNTIME_REDUCE_OPS_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/roi_align.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_ROI_ALIGN_H
#define ONNXRUNTIME_ROI_ALIGN_H

#include <assert.h>
#include <onnxruntime_cxx_api.h>

#include <cmath>
#include <mutex>
#include <string>
#include <vector>

struct MMCVRoiAlignKernel {
 public:
  MMCVRoiAlignKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info)
      : ort_(ort) {
    aligned_ = ort_.KernelInfoGetAttribute<int64_t>(info, "aligned");
    aligned_height_ =
        ort_.KernelInfoGetAttribute<int64_t>(info, "output_height");
    aligned_width_ = ort_.KernelInfoGetAttribute<int64_t>(info, "output_width");
    pool_mode_ = ort_.KernelInfoGetAttribute<std::string>(info, "mode");
    sampling_ratio_ =
        ort_.KernelInfoGetAttribute<int64_t>(info, "sampling_ratio");
    spatial_scale_ = ort_.KernelInfoGetAttribute<float>(info, "spatial_scale");
  }

  void Compute(OrtKernelContext* context);

 private:
  Ort::CustomOpApi ort_;

  int aligned_height_;
  int aligned_width_;
  float spatial_scale_;
  int sampling_ratio_;
  std::string pool_mode_;
  int aligned_;
};

struct MMCVRoiAlignCustomOp
    : Ort::CustomOpBase<MMCVRoiAlignCustomOp, MMCVRoiAlignKernel> {
  void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const {
    return new MMCVRoiAlignKernel(api, info);
  }
  const char* GetName() const { return "MMCVRoiAlign"; }

  size_t GetInputTypeCount() const { return 2; }
  ONNXTensorElementDataType GetInputType(size_t) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  }

  size_t GetOutputTypeCount() const { return 1; }
  ONNXTensorElementDataType GetOutputType(size_t) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  }

  // force cpu
  const char* GetExecutionProviderType() const {
    return "CPUExecutionProvider";
  }
};
#endif  // ONNXRUNTIME_ROI_ALIGN_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/roi_align_rotated.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_ROI_ALIGN_ROTATED_H
#define ONNXRUNTIME_ROI_ALIGN_ROTATED_H

#include <assert.h>
#include <onnxruntime_cxx_api.h>

#include <cmath>
#include <mutex>
#include <string>
#include <vector>

struct MMCVRoIAlignRotatedKernel {
 public:
  MMCVRoIAlignRotatedKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info)
      : ort_(ort) {
    aligned_height_ =
        ort_.KernelInfoGetAttribute<int64_t>(info, "output_height");
    aligned_width_ = ort_.KernelInfoGetAttribute<int64_t>(info, "output_width");
    sampling_ratio_ =
        ort_.KernelInfoGetAttribute<int64_t>(info, "sampling_ratio");
    spatial_scale_ = ort_.KernelInfoGetAttribute<float>(info, "spatial_scale");
    aligned_ = ort_.KernelInfoGetAttribute<int64_t>(info, "aligned");
    clockwise_ = ort_.KernelInfoGetAttribute<int64_t>(info, "clockwise");
  }

  void Compute(OrtKernelContext* context);

 private:
  Ort::CustomOpApi ort_;
  int aligned_height_;
  int aligned_width_;
  float spatial_scale_;
  int sampling_ratio_;
  int aligned_;
  int clockwise_;
};

struct MMCVRoIAlignRotatedCustomOp
    : Ort::CustomOpBase<MMCVRoIAlignRotatedCustomOp,
                        MMCVRoIAlignRotatedKernel> {
  void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const {
    return new MMCVRoIAlignRotatedKernel(api, info);
  }
  const char* GetName() const { return "MMCVRoIAlignRotated"; }

  size_t GetInputTypeCount() const { return 2; }
  ONNXTensorElementDataType GetInputType(size_t) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  }

  size_t GetOutputTypeCount() const { return 1; }
  ONNXTensorElementDataType GetOutputType(size_t) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  }

  // force cpu
  const char* GetExecutionProviderType() const {
    return "CPUExecutionProvider";
  }
};
#endif  // ONNXRUNTIME_ROI_ALIGN_ROTATED_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/onnxruntime/soft_nms.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_SOFT_NMS_H
#define ONNXRUNTIME_SOFT_NMS_H
#include <onnxruntime_cxx_api.h>

struct SoftNmsKernel {
  SoftNmsKernel(OrtApi api, const OrtKernelInfo *info);

  void Compute(OrtKernelContext *context);

 protected:
  OrtApi api_;
  Ort::CustomOpApi ort_;
  const OrtKernelInfo *info_;
  Ort::AllocatorWithDefaultOptions allocator_;

  float iou_threshold_;
  float sigma_;
  float min_score_;
  int64_t method_;
  int64_t offset_;
};

struct SoftNmsOp : Ort::CustomOpBase<SoftNmsOp, SoftNmsKernel> {
  void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const {
    return new SoftNmsKernel(api, info);
  };

  const char *GetName() const { return "SoftNonMaxSuppression"; };

  size_t GetInputTypeCount() const { return 2; };
  ONNXTensorElementDataType GetInputType(size_t /*index*/) const {
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  size_t GetOutputTypeCount() const { return 2; };
  ONNXTensorElementDataType GetOutputType(size_t index) const {
    if (index == 1) {
      return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
    }
    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  };

  // force cpu
  const char *GetExecutionProviderType() const {
    return "CPUExecutionProvider";
  };
};
#endif  // ONNXRUNTIME_SOFT_NMS_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/active_rotated_filter.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/ActiveRotatingFilter.h

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void active_rotated_filter_forward_impl(const Tensor input,
                                        const Tensor indices, Tensor output) {
  DISPATCH_DEVICE_IMPL(active_rotated_filter_forward_impl, input, indices,
                       output);
}

void active_rotated_filter_backward_impl(const Tensor grad_out,
                                         const Tensor indices, Tensor grad_in) {
  DISPATCH_DEVICE_IMPL(active_rotated_filter_backward_impl, grad_out, indices,
                       grad_in);
}

void active_rotated_filter_forward(const Tensor input, const Tensor indices,
                                   Tensor output) {
  active_rotated_filter_forward_impl(input, indices, output);
}

void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices,
                                    Tensor grad_in) {
  active_rotated_filter_backward_impl(grad_out, indices, grad_in);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "active_rotated_filter_pytorch.h"
using namespace parrots;

#ifdef MMCV_WITH_CUDA
void active_rotated_filter_forward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  auto input = buildATensor(ctx, ins[0]);
  auto indices = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  active_rotated_filter_forward(input, indices, output);
}

void active_rotated_filter_backward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  auto grad_out = buildATensor(ctx, ins[0]);
  auto indices = buildATensor(ctx, ins[1]);
  auto grad_in = buildATensor(ctx, outs[0]);
  active_rotated_filter_backward(grad_out, indices, grad_in);
}
#endif

void active_rotated_filter_forward_cpu_parrots(
    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  auto input = buildATensor(ctx, ins[0]);
  auto indices = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  active_rotated_filter_forward(input, indices, output);
}

void active_rotated_filter_backward_cpu_parrots(
    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  auto grad_out = buildATensor(ctx, ins[0]);
  auto indices = buildATensor(ctx, ins[1]);
  auto grad_in = buildATensor(ctx, outs[0]);
  active_rotated_filter_backward(grad_out, indices, grad_in);
}

PARROTS_EXTENSION_REGISTER(active_rotated_filter_forward)
    .input(2)
    .output(1)
    .apply(active_rotated_filter_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(active_rotated_filter_forward_cuda_parrots)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(active_rotated_filter_backward)
    .input(2)
    .output(1)
    .apply(active_rotated_filter_backward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(active_rotated_filter_backward_cuda_parrots)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ACTIVE_ROTATED_FILTER_PYTORCH_H
#define ACTIVE_ROTATED_FILTER_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void active_rotated_filter_forward(const Tensor input, const Tensor indices,
                                   Tensor output);

void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices,
                                    Tensor grad_in);

#endif  // ACTIVE_ROTATED_FILTER_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/assign_score_withk.cpp
================================================
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
                                     int aggregate, const Tensor& points,
                                     const Tensor& centers,
                                     const Tensor& scores,
                                     const Tensor& knn_idx, Tensor& output) {
  DISPATCH_DEVICE_IMPL(assign_score_withk_forward_impl, B, N0, N1, M, K, O,
                       aggregate, points, centers, scores, knn_idx, output);
}

void assign_score_withk_backward_impl(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
    Tensor& grad_centers, Tensor& grad_scores) {
  DISPATCH_DEVICE_IMPL(assign_score_withk_backward_impl, B, N0, N1, M, K, O,
                       aggregate, grad_out, points, centers, scores, knn_idx,
                       grad_points, grad_centers, grad_scores);
}

void assign_score_withk_forward(const Tensor& points, const Tensor& centers,
                                const Tensor& scores, const Tensor& knn_idx,
                                Tensor& output, int B, int N0, int N1, int M,
                                int K, int O, int aggregate) {
  assign_score_withk_forward_impl(B, N0, N1, M, K, O, aggregate, points,
                                  centers, scores, knn_idx, output);
}

void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
                                 const Tensor& centers, const Tensor& scores,
                                 const Tensor& knn_idx, Tensor& grad_points,
                                 Tensor& grad_centers, Tensor& grad_scores,
                                 int B, int N0, int N1, int M, int K, int O,
                                 int aggregate) {
  assign_score_withk_backward_impl(B, N0, N1, M, K, O, aggregate, grad_out,
                                   points, centers, scores, knn_idx,
                                   grad_points, grad_centers, grad_scores);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/assign_score_withk_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "assign_score_withk_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void assign_score_withk_forward_cuda_parrots(CudaContext& ctx,
                                             const SSElement& attr,
                                             const OperatorBase::in_list_t& ins,
                                             OperatorBase::out_list_t& outs) {
  int B, N0, N1, M, K, O, aggregate;
  SSAttrs(attr)
      .get<int>("B", B)
      .get<int>("N0", N0)
      .get<int>("N1", N1)
      .get<int>("M", M)
      .get<int>("K", K)
      .get<int>("O", O)
      .get<int>("aggregate", aggregate)
      .done();

  const auto& points = buildATensor(ctx, ins[0]);
  const auto& centers = buildATensor(ctx, ins[1]);
  const auto& scores = buildATensor(ctx, ins[2]);
  const auto& knn_idx = buildATensor(ctx, ins[3]);

  auto output = buildATensor(ctx, outs[0]);
  assign_score_withk_forward(points, centers, scores, knn_idx, output, B, N0,
                             N1, M, K, O, aggregate);
}

void assign_score_withk_backward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int B, N0, N1, M, K, O, aggregate;
  SSAttrs(attr)
      .get<int>("B", B)
      .get<int>("N0", N0)
      .get<int>("N1", N1)
      .get<int>("M", M)
      .get<int>("K", K)
      .get<int>("O", O)
      .get<int>("aggregate", aggregate)
      .done();

  const auto& grad_out = buildATensor(ctx, ins[0]);
  const auto& points = buildATensor(ctx, ins[1]);
  const auto& centers = buildATensor(ctx, ins[2]);
  const auto& scores = buildATensor(ctx, ins[3]);
  const auto& knn_idx = buildATensor(ctx, ins[4]);

  auto grad_points = buildATensor(ctx, outs[0]);
  auto grad_centers = buildATensor(ctx, outs[1]);
  auto grad_scores = buildATensor(ctx, outs[2]);
  assign_score_withk_backward(grad_out, points, centers, scores, knn_idx,
                              grad_points, grad_centers, grad_scores, B, N0, N1,
                              M, K, O, aggregate);
}

PARROTS_EXTENSION_REGISTER(assign_score_withk_forward)
    .attr("B")
    .attr("N0")
    .attr("N1")
    .attr("M")
    .attr("K")
    .attr("O")
    .attr("aggregate")
    .input(4)
    .output(1)
    .apply(assign_score_withk_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(assign_score_withk_backward)
    .attr("B")
    .attr("N0")
    .attr("N1")
    .attr("M")
    .attr("K")
    .attr("O")
    .attr("aggregate")
    .input(5)
    .output(3)
    .apply(assign_score_withk_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/assign_score_withk_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ASSIGN_SCORE_WITHK_PYTORCH_H
#define ASSIGN_SCORE_WITHK_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void assign_score_withk_forward(const Tensor& points, const Tensor& centers,
                                const Tensor& scores, const Tensor& knn_idx,
                                Tensor& output, int B, int N0, int N1, int M,
                                int K, int O, int aggregate);

void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
                                 const Tensor& centers, const Tensor& scores,
                                 const Tensor& knn_idx, Tensor& grad_points,
                                 Tensor& grad_centers, Tensor& grad_scores,
                                 int B, int N0, int N1, int M, int K, int O,
                                 int aggregate);

#endif  // ASSIGN_SCORE_WITHK_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/ball_query._parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "ball_query_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void ball_query_parrots(CudaContext& ctx, const SSElement& attr,
                        const OperatorBase::in_list_t& ins,
                        OperatorBase::out_list_t& outs) {
  int b, n, m, nsample;
  float min_radius, max_radius;
  SSAttrs(attr)
      .get<int>("b", b)
      .get<int>("n", n)
      .get<int>("m", m)
      .get<int>("nsample", nsample)
      .get<float>("min_radius", min_radius)
      .get<float>("max_radius", max_radius)
      .done();

  const auto& center_xyz = buildATensor(ctx, ins[0]);
  const auto& xyz = buildATensor(ctx, ins[1]);
  auto idx = buildATensor(ctx, outs[0]);
  ball_query_forward(center_xyz, xyz, idx, b, n, m, min_radius, max_radius,
                     nsample);
}

PARROTS_EXTENSION_REGISTER(ball_query_forward)
    .attr("b")
    .attr("n")
    .attr("m")
    .attr("nsample")
    .attr("min_radius")
    .attr("max_radius")
    .input(2)
    .output(1)
    .apply(ball_query_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/ball_query.cpp
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void ball_query_forward_impl(int b, int n, int m, float min_radius,
                             float max_radius, int nsample,
                             const Tensor new_xyz, const Tensor xyz,
                             Tensor idx) {
  DISPATCH_DEVICE_IMPL(ball_query_forward_impl, b, n, m, min_radius, max_radius,
                       nsample, new_xyz, xyz, idx);
}

void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor,
                        Tensor idx_tensor, int b, int n, int m,
                        float min_radius, float max_radius, int nsample) {
  ball_query_forward_impl(b, n, m, min_radius, max_radius, nsample,
                          new_xyz_tensor, xyz_tensor, idx_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/ball_query_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef BALL_QUERY_PYTORCH_H
#define BALL_QUERY_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void ball_query_forward(const Tensor new_xyz, const Tensor xyz, Tensor idx,
                        int b, int n, int m, float min_radius, float max_radius,
                        int nsample);

#endif  // BALL_QUERY_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
                        const int mode, const bool aligned, const int offset) {
  DISPATCH_DEVICE_IMPL(bbox_overlaps_impl, bboxes1, bboxes2, ious, mode,
                       aligned, offset);
}

void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
                   const int mode, const bool aligned, const int offset) {
  bbox_overlaps_impl(bboxes1, bboxes2, ious, mode, aligned, offset);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "bbox_overlaps_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
/*
 * void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor
 * ious, const int mode, const bool aligned, const int offset);
 */
void bbox_overlaps_parrots(CudaContext& ctx, const SSElement& attr,
                           const OperatorBase::in_list_t& ins,
                           OperatorBase::out_list_t& outs) {
  int mode, offset;
  bool aligned;
  SSAttrs(attr)
      .get<int>("mode", mode)
      .get<bool>("aligned", aligned)
      .get<int>("offset", offset)
      .done();

  const auto& bboxes1 = buildATensor(ctx, ins[0]);
  const auto& bboxes2 = buildATensor(ctx, ins[1]);
  auto ious = buildATensor(ctx, outs[0]);
  bbox_overlaps_cuda(bboxes1, bboxes2, ious, mode, aligned, offset);
}

PARROTS_EXTENSION_REGISTER(bbox_overlaps)
    .attr("mode")
    .attr("aligned")
    .attr("offset")
    .input(2)
    .output(1)
    .apply(bbox_overlaps_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef BBOX_OVERLAPS_PYTORCH_H
#define BBOX_OVERLAPS_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
                        const int mode, const bool aligned, const int offset);

#endif  // BBOX_OVERLAPS_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/border_align.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void border_align_forward_impl(const Tensor &input, const Tensor &boxes,
                               Tensor output, Tensor argmax_idx,
                               const int pool_size) {
  DISPATCH_DEVICE_IMPL(border_align_forward_impl, input, boxes, output,
                       argmax_idx, pool_size);
}

void border_align_backward_impl(const Tensor &grad_output, const Tensor &boxes,
                                const Tensor &argmax_idx, Tensor grad_input,
                                const int pool_size) {
  DISPATCH_DEVICE_IMPL(border_align_backward_impl, grad_output, boxes,
                       argmax_idx, grad_input, pool_size);
}

void border_align_forward(const Tensor &input, const Tensor &boxes,
                          Tensor output, Tensor argmax_idx,
                          const int pool_size) {
  border_align_forward_impl(input, boxes, output, argmax_idx, pool_size);
}

void border_align_backward(const Tensor &grad_output, const Tensor &boxes,
                           const Tensor &argmax_idx, Tensor grad_input,
                           const int pool_size) {
  border_align_backward_impl(grad_output, boxes, argmax_idx, grad_input,
                             pool_size);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/border_align_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "border_align_pytorch.h"

using namespace parrots;

void border_align_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                       const OperatorBase::in_list_t& ins,
                                       OperatorBase::out_list_t& outs) {
  int pool_size;
  SSAttrs(attr).get<int>("pool_size", pool_size).done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& boxes = buildATensor(ctx, ins[1]);

  auto output = buildATensor(ctx, outs[0]);
  auto argmax_idx = buildATensor(ctx, outs[1]);
  border_align_forward_cuda(input, boxes, output, argmax_idx, pool_size);
}

void border_align_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                        const OperatorBase::in_list_t& ins,
                                        OperatorBase::out_list_t& outs) {
  int pool_size;
  SSAttrs(attr).get<int>("pool_size", pool_size).done();

  const auto& top_grad = buildATensor(ctx, ins[0]);
  const auto& boxes = buildATensor(ctx, ins[1]);
  const auto& argmax_idx = buildATensor(ctx, ins[2]);

  auto bottom_grad = buildATensor(ctx, outs[0]);
  border_align_backward_cuda(top_grad, boxes, argmax_idx, bottom_grad,
                             pool_size);
}

PARROTS_EXTENSION_REGISTER(border_align_forward)
    .attr("pool_size")
    .input(2)
    .output(2)
    .apply(border_align_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(border_align_backward)
    .attr("pool_size")
    .input(3)
    .output(1)
    .apply(border_align_backward_cuda_parrots)
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/border_align_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef BORDER_ALIGN_PYTORCH_H
#define BORDER_ALIGN_PYTORCH_H
#include <torch/extension.h>
using namespace at;

#ifdef MMCV_WITH_CUDA
void border_align_forward_cuda(const Tensor &input, const Tensor &boxes,
                               Tensor output, Tensor argmax_idx,
                               const int pool_size);

void border_align_backward_cuda(const Tensor &grad_output, const Tensor &boxes,
                                const Tensor &argmax_idx, Tensor grad_input,
                                const int pool_size);
#endif

#endif  // BORDER_ALIGN_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/box_iou_rotated.cpp
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                          const int mode_flag, const bool aligned) {
  DISPATCH_DEVICE_IMPL(box_iou_rotated_impl, boxes1, boxes2, ious, mode_flag,
                       aligned);
}

// Interface for Python
// inline is needed to prevent multiple function definitions when this header is
// included by different cpps
void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                     const int mode_flag, const bool aligned) {
  box_iou_rotated_impl(boxes1, boxes2, ious, mode_flag, aligned);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/box_iou_rotated_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "box_iou_rotated_pytorch.h"

using namespace parrots;

/*
 * void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor
 * ious, const int mode_flag, const bool aligned);
 */
void box_iou_rotated_cpu_parrots(HostContext& ctx, const SSElement& attr,
                                 const OperatorBase::in_list_t& ins,
                                 OperatorBase::out_list_t& outs) {
  bool aligned;
  int mode_flag;
  SSAttrs(attr)
      .get<bool>("aligned", aligned)
      .get<int>("mode_flag", mode_flag)
      .done();

  const auto& boxes1 = buildATensor(ctx, ins[0]);
  const auto& boxes2 = buildATensor(ctx, ins[1]);
  auto ious = buildATensor(ctx, outs[0]);
  box_iou_rotated_cpu(boxes1, boxes2, ious, mode_flag, aligned);
}

#ifdef MMCV_WITH_CUDA
/*
 * void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor
 * ious, const int mode_flag, const bool aligned);
 */
void box_iou_rotated_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                  const OperatorBase::in_list_t& ins,
                                  OperatorBase::out_list_t& outs) {
  bool aligned;
  int mode_flag;
  SSAttrs(attr)
      .get<bool>("aligned", aligned)
      .get<int>("mode_flag", mode_flag)
      .done();

  const auto& boxes1 = buildATensor(ctx, ins[0]);
  const auto& boxes2 = buildATensor(ctx, ins[1]);
  auto ious = buildATensor(ctx, outs[0]);
  box_iou_rotated_cuda(boxes1, boxes2, ious, mode_flag, aligned);
}
#endif

PARROTS_EXTENSION_REGISTER(box_iou_rotated)
    .attr("aligned")
    .attr("mode_flag")
    .input(2)
    .output(1)
    .apply(box_iou_rotated_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(box_iou_rotated_cuda_parrots)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/box_iou_rotated_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef BOX_IOU_ROTATED_PYTORCH_H
#define BOX_IOU_ROTATED_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                         const int mode_flag, const bool aligned);

#ifdef MMCV_WITH_CUDA
void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                          const int mode_flag, const bool aligned);
#endif

#endif  // BOX_IOU_ROTATED_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures,
                         Tensor routput, Tensor rmasks, Tensor output,
                         int kernel_size, int group_size, int scale_factor) {
  DISPATCH_DEVICE_IMPL(carafe_forward_impl, features, masks, rfeatures, routput,
                       rmasks, output, kernel_size, group_size, scale_factor);
}

void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks,
                          Tensor rtop_grad, Tensor rbottom_grad_hs,
                          Tensor rbottom_grad, Tensor rmask_grad,
                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
                          int group_size, int scale_factor) {
  DISPATCH_DEVICE_IMPL(carafe_backward_impl, top_grad, rfeatures, masks,
                       rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad,
                       bottom_grad, mask_grad, kernel_size, group_size,
                       scale_factor);
}

void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures,
                    Tensor routput, Tensor rmasks, Tensor output,
                    int kernel_size, int group_size, int scale_factor) {
  carafe_forward_impl(features, masks, rfeatures, routput, rmasks, output,
                      kernel_size, group_size, scale_factor);
}

void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
                     Tensor rtop_grad, Tensor rbottom_grad_hs,
                     Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad,
                     Tensor mask_grad, int kernel_size, int group_size,
                     int scale_factor) {
  carafe_backward_impl(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs,
                       rbottom_grad, rmask_grad, bottom_grad, mask_grad,
                       kernel_size, group_size, scale_factor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe_naive.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output,
                               int kernel_size, int group_size,
                               int scale_factor) {
  DISPATCH_DEVICE_IMPL(carafe_naive_forward_impl, features, masks, output,
                       kernel_size, group_size, scale_factor);
}

void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks,
                                Tensor bottom_grad, Tensor mask_grad,
                                int kernel_size, int group_size,
                                int scale_factor) {
  DISPATCH_DEVICE_IMPL(carafe_naive_backward_impl, top_grad, features, masks,
                       bottom_grad, mask_grad, kernel_size, group_size,
                       scale_factor);
}

void carafe_naive_forward(Tensor features, Tensor masks, Tensor output,
                          int kernel_size, int group_size, int scale_factor) {
  carafe_naive_forward_impl(features, masks, output, kernel_size, group_size,
                            scale_factor);
}

void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks,
                           Tensor bottom_grad, Tensor mask_grad,
                           int kernel_size, int group_size, int scale_factor) {
  carafe_naive_backward_impl(top_grad, features, masks, bottom_grad, mask_grad,
                             kernel_size, group_size, scale_factor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe_naive_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "carafe_naive_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
/*void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
 *                                int kernel_size, int group_size,
 *                                int scale_factor)
 */
void carafe_naive_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                       const OperatorBase::in_list_t& ins,
                                       OperatorBase::out_list_t& outs) {
  int kernel_size, group_size, scale_factor;
  SSAttrs(attr)
      .get<int>("kernel_size", kernel_size)
      .get<int>("group_size", group_size)
      .get<int>("scale_factor", scale_factor)
      .done();

  const auto& features = buildATensor(ctx, ins[0]);
  const auto& masks = buildATensor(ctx, ins[1]);

  auto output = buildATensor(ctx, outs[0]);
  carafe_naive_forward_cuda(features, masks, output, kernel_size, group_size,
                            scale_factor);
}

/*void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor
 * masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size,
 *                                int scale_factor);
 */
void carafe_naive_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                        const OperatorBase::in_list_t& ins,
                                        OperatorBase::out_list_t& outs) {
  int kernel_size, group_size, scale_factor;
  SSAttrs(attr)
      .get<int>("kernel_size", kernel_size)
      .get<int>("group_size", group_size)
      .get<int>("scale_factor", scale_factor)
      .done();

  const auto& top_grad = buildATensor(ctx, ins[0]);
  const auto& features = buildATensor(ctx, ins[1]);
  const auto& masks = buildATensor(ctx, ins[2]);

  auto bottom_grad = buildATensor(ctx, outs[0]);
  auto mask_grad = buildATensor(ctx, outs[1]);
  carafe_naive_backward_cuda(top_grad, features, masks, bottom_grad, mask_grad,
                             kernel_size, group_size, scale_factor);
}

PARROTS_EXTENSION_REGISTER(carafe_naive_forward)
    .attr("kernel_size")
    .attr("group_size")
    .attr("scale_factor")
    .input(2)
    .output(1)
    .apply(carafe_naive_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(carafe_naive_backward)
    .attr("kernel_size")
    .attr("group_size")
    .attr("scale_factor")
    .input(3)
    .output(2)
    .apply(carafe_naive_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe_naive_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CARAFE_NAIVE_PYTORCH_H
#define CARAFE_NAIVE_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
                               int kernel_size, int group_size,
                               int scale_factor);

void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks,
                                Tensor bottom_grad, Tensor mask_grad,
                                int kernel_size, int group_size,
                                int scale_factor);
#endif  // CARAFE_NAIVE_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "carafe_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
/*
 * void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
 *                          Tensor routput, Tensor rmasks, Tensor output,
 *                          int kernel_size, int group_size, int scale_factor);
 */
void carafe_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                 const OperatorBase::in_list_t& ins,
                                 OperatorBase::out_list_t& outs) {
  int kernel_size, group_size, scale_factor;
  SSAttrs(attr)
      .get<int>("kernel_size", kernel_size)
      .get<int>("group_size", group_size)
      .get<int>("scale_factor", scale_factor)
      .done();

  const auto& features = buildATensor(ctx, ins[0]);
  const auto& masks = buildATensor(ctx, ins[1]);

  auto rfeatures = buildATensor(ctx, outs[0]);
  auto routput = buildATensor(ctx, outs[1]);
  auto rmasks = buildATensor(ctx, outs[2]);
  auto output = buildATensor(ctx, outs[3]);

  carafe_forward_cuda(features, masks, rfeatures, routput, rmasks, output,
                      kernel_size, group_size, scale_factor);
}

/*
 * void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
 *                           Tensor rtop_grad, Tensor rbottom_grad_hs,
 *                           Tensor rbottom_grad, Tensor rmask_grad,
 *                           Tensor bottom_grad, Tensor mask_grad, int
 * kernel_size, int group_size, int scale_factor);
 */
void carafe_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                  const OperatorBase::in_list_t& ins,
                                  OperatorBase::out_list_t& outs) {
  int kernel_size, group_size, scale_factor;
  SSAttrs(attr)
      .get<int>("kernel_size", kernel_size)
      .get<int>("group_size", group_size)
      .get<int>("scale_factor", scale_factor)
      .done();

  const auto& top_grad = buildATensor(ctx, ins[0]);
  const auto& rfeatures = buildATensor(ctx, ins[1]);
  const auto& masks = buildATensor(ctx, ins[2]);

  auto rtop_grad = buildATensor(ctx, outs[0]);
  auto rbottom_grad_hs = buildATensor(ctx, outs[1]);
  auto rbottom_grad = buildATensor(ctx, outs[2]);
  auto rmask_grad = buildATensor(ctx, outs[3]);
  auto bottom_grad = buildATensor(ctx, outs[4]);
  auto mask_grad = buildATensor(ctx, outs[5]);

  carafe_backward_cuda(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs,
                       rbottom_grad, rmask_grad, bottom_grad, mask_grad,
                       kernel_size, group_size, scale_factor);
}

PARROTS_EXTENSION_REGISTER(carafe_forward)
    .attr("kernel_size")
    .attr("group_size")
    .attr("scale_factor")
    .input(2)
    .output(4)
    .apply(carafe_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(carafe_backward)
    .attr("kernel_size")
    .attr("group_size")
    .attr("scale_factor")
    .input(3)
    .output(6)
    .apply(carafe_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/carafe_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CARAFE_PYTORCH_H
#define CARAFE_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
                         Tensor routput, Tensor rmasks, Tensor output,
                         int kernel_size, int group_size, int scale_factor);

void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
                          Tensor rtop_grad, Tensor rbottom_grad_hs,
                          Tensor rbottom_grad, Tensor rmask_grad,
                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
                          int group_size, int scale_factor);
#endif  // CARAFE_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/contour_expand.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// It is modified from https://github.com/whai362/PSENet
#include <iostream>
#include <queue>

#include "pytorch_cpp_helper.hpp"

using namespace std;

class Point2d {
 public:
  int x;
  int y;

  Point2d() : x(0), y(0) {}
  Point2d(int _x, int _y) : x(_x), y(_y) {}
};

void kernel_dilate(const uint8_t *data, IntArrayRef data_shape,
                   const int *label_map, int &label_num, int &min_area,
                   vector<vector<int>> &text_line) {
  std::vector<int> area(label_num + 1);
  int kernel_num = data_shape[0];
  int height = data_shape[1];
  int width = data_shape[2];

  for (int x = 0; x < height; ++x) {
    for (int y = 0; y < width; ++y) {
      int label = label_map[x * width + y];
      if (label == 0) continue;
      area[label] += 1;
    }
  }

  queue<Point2d> queue, next_queue;
  for (int x = 0; x < height; ++x) {
    vector<int> row(width);
    for (int y = 0; y < width; ++y) {
      int label = label_map[x * width + y];
      if (label == 0) continue;
      if (area[label] < min_area) continue;

      Point2d point(x, y);
      queue.push(point);
      row[y] = label;
    }
    text_line.emplace_back(row);
  }

  int dx[] = {-1, 1, 0, 0};
  int dy[] = {0, 0, -1, 1};
  vector<int> kernel_step(kernel_num);
  std::for_each(kernel_step.begin(), kernel_step.end(),
                [=](int &k) { return k * height * width; });

  for (int kernel_id = kernel_num - 2; kernel_id >= 0; --kernel_id) {
    while (!queue.empty()) {
      Point2d point = queue.front();
      queue.pop();
      int x = point.x;
      int y = point.y;
      int label = text_line[x][y];

      bool is_edge = true;
      for (int d = 0; d < 4; ++d) {
        int tmp_x = x + dx[d];
        int tmp_y = y + dy[d];

        if (tmp_x < 0 || tmp_x >= height) continue;
        if (tmp_y < 0 || tmp_y >= width) continue;
        int kernel_value = data[kernel_step[kernel_id] + tmp_x * width + tmp_y];
        if (kernel_value == 0) continue;
        if (text_line[tmp_x][tmp_y] > 0) continue;

        Point2d point(tmp_x, tmp_y);
        queue.push(point);
        text_line[tmp_x][tmp_y] = label;
        is_edge = false;
      }

      if (is_edge) {
        next_queue.push(point);
      }
    }
    swap(queue, next_queue);
  }
}

std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
                                             Tensor internal_kernel_label,
                                             int min_kernel_area,
                                             int kernel_num) {
  kernel_mask = kernel_mask.contiguous();
  internal_kernel_label = internal_kernel_label.contiguous();
  assert(kernel_mask.dim() == 3);
  assert(internal_kernel_label.dim() == 2);
  assert(kernel_mask.size(1) == internal_kernel_label.size(0));
  assert(kernel_mask.size(2) == internal_kernel_label.size(1));
  CHECK_CPU_INPUT(kernel_mask);
  CHECK_CPU_INPUT(internal_kernel_label);
  auto ptr_data = kernel_mask.data_ptr<uint8_t>();
  IntArrayRef data_shape = kernel_mask.sizes();

  auto data_label_map = internal_kernel_label.data_ptr<int32_t>();
  IntArrayRef label_map_shape = internal_kernel_label.sizes();
  vector<vector<int>> text_line;

  kernel_dilate(ptr_data, data_shape, data_label_map, kernel_num,
                min_kernel_area, text_line);

  return text_line;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/contour_expand_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "contour_expand_pytorch.h"

using namespace parrots;
using namespace std;

template <typename T>
void contour_expand_parrots(T& ctx, const SSElement& attr,
                            const OperatorBase::in_list_t& ins,
                            OperatorBase::out_list_t& outs) {
  int min_kernel_area, kernel_num;
  SSAttrs(attr)
      .get<int>("min_kernel_area", min_kernel_area)
      .get<int>("kernel_num", kernel_num)
      .done();
  at::Tensor kernel_mask;
  at::Tensor internal_kernel_label;
  kernel_mask = buildATensor(ctx, ins[0]);
  internal_kernel_label = buildATensor(ctx, ins[1]);
  auto out = contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
                            kernel_num);
  int n = out.size(), m = 0;
  for (int i = 0; i < n; ++i)
    if (m < out[i].size()) m = out[i].size();
  auto options = torch::TensorOptions().dtype(at::kInt);
  auto tensor = torch::zeros({n, m}, options);
  for (int i = 0; i < n; i++)
    tensor.slice(0, i, i + 1) =
        torch::from_blob(out[i].data(), {out[i].size()}, options);
  updateDArray(ctx, tensor, outs[0]);
}

PARROTS_EXTENSION_REGISTER(contour_expand)
    .attr("min_kernel_area")
    .attr("kernel_num")
    .input(2)
    .output(1)
    .apply(contour_expand_parrots<HostContext>)
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/contour_expand_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CONTOUR_EXPAND_PYTORCH_H
#define CONTOUR_EXPAND_PYTORCH_H
#include <torch/extension.h>
using namespace at;

std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
                                             Tensor internal_kernel_label,
                                             int min_kernel_area,
                                             int kernel_num);

#endif  // CONTOUR_EXPAND_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/convex_iou.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/SDL-GuoZonghao/BeyondBoundingBox/tree/main/mmdet/ops/iou/src
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void convex_iou_impl(const Tensor pointsets, const Tensor polygons,
                     Tensor ious) {
  DISPATCH_DEVICE_IMPL(convex_iou_impl, pointsets, polygons, ious);
}

void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious) {
  convex_iou_impl(pointsets, polygons, ious);
}

void convex_giou_impl(const Tensor pointsets, const Tensor polygons,
                      Tensor output) {
  DISPATCH_DEVICE_IMPL(convex_giou_impl, pointsets, polygons, output);
}

void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output) {
  convex_giou_impl(pointsets, polygons, output);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/convex_iou_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "convex_iou_pytorch.h"
using namespace parrots;

#ifdef MMCV_WITH_CUDA
void convex_iou_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                     const OperatorBase::in_list_t& ins,
                                     OperatorBase::out_list_t& outs) {
  auto pointsets = buildATensor(ctx, ins[0]);
  auto polygons = buildATensor(ctx, ins[1]);
  auto ious = buildATensor(ctx, outs[0]);
  convex_iou(pointsets, polygons, ious);
}

void convex_giou_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                      const OperatorBase::in_list_t& ins,
                                      OperatorBase::out_list_t& outs) {
  auto pointsets = buildATensor(ctx, ins[0]);
  auto polygons = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  convex_giou(pointsets, polygons, output);
}

PARROTS_EXTENSION_REGISTER(convex_iou)
    .input(2)
    .output(1)
    .apply(convex_iou_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(convex_giou)
    .input(2)
    .output(1)
    .apply(convex_giou_forward_cuda_parrots)
    .done();

#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/convex_iou_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CONVEX_IOU_PYTORCH_H
#define CONVEX_IOU_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious);

void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output);

#endif  // RIROI_ALIGN_ROTATED_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/corner_pool.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/princeton-vl/CornerNet-Lite/tree/master/core/models/py_utils/_cpools/src
#include "pytorch_cpp_helper.hpp"

Tensor bottom_pool_forward(Tensor input) {
  // Initialize output
  Tensor output = at::zeros_like(input);
  // Get height
  int64_t height = input.size(2);
  output.copy_(input);

  for (int64_t ind = 1; ind < height; ind <<= 1) {
    Tensor max_temp = at::slice(output, 2, ind, height);
    Tensor cur_temp = at::slice(output, 2, ind, height).clone();
    Tensor next_temp = at::slice(output, 2, 0, height - ind).clone();
    at::max_out(max_temp, cur_temp, next_temp);
  }

  return output;
}

Tensor bottom_pool_backward(Tensor input, Tensor grad_output) {
  auto output = at::zeros_like(input);

  int32_t batch = input.size(0);
  int32_t channel = input.size(1);
  int32_t height = input.size(2);
  int32_t width = input.size(3);

  auto max_val = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kFloat));
  auto max_ind = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kLong));

  auto input_temp = input.select(2, 0);
  max_val.copy_(input_temp);

  max_ind.fill_(0);

  auto output_temp = output.select(2, 0);
  auto grad_output_temp = grad_output.select(2, 0);
  output_temp.copy_(grad_output_temp);

  auto un_max_ind = max_ind.unsqueeze(2);
  auto gt_mask = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kBool));
  auto max_temp = torch::zeros({batch, channel, width},
                               at::device(at::kCUDA).dtype(at::kFloat));
  for (int32_t ind = 0; ind < height - 1; ++ind) {
    input_temp = input.select(2, ind + 1);
    at::gt_out(gt_mask, input_temp, max_val);

    at::masked_select_out(max_temp, input_temp, gt_mask);
    max_val.masked_scatter_(gt_mask, max_temp);
    max_ind.masked_fill_(gt_mask, ind + 1);

    grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2);
    output.scatter_add_(2, un_max_ind, grad_output_temp);
  }

  return output;
}

Tensor left_pool_forward(Tensor input) {
  // Initialize output
  Tensor output = at::zeros_like(input);
  // Get width
  int64_t width = input.size(3);
  output.copy_(input);

  for (int64_t ind = 1; ind < width; ind <<= 1) {
    Tensor max_temp = at::slice(output, 3, 0, width - ind);
    Tensor cur_temp = at::slice(output, 3, 0, width - ind).clone();
    Tensor next_temp = at::slice(output, 3, ind, width).clone();
    at::max_out(max_temp, cur_temp, next_temp);
  }

  return output;
}

Tensor left_pool_backward(Tensor input, Tensor grad_output) {
  auto output = at::zeros_like(input);

  int32_t batch = input.size(0);
  int32_t channel = input.size(1);
  int32_t height = input.size(2);
  int32_t width = input.size(3);

  auto max_val = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kFloat));
  auto max_ind = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kLong));

  auto input_temp = input.select(3, width - 1);
  max_val.copy_(input_temp);

  max_ind.fill_(width - 1);

  auto output_temp = output.select(3, width - 1);
  auto grad_output_temp = grad_output.select(3, width - 1);
  output_temp.copy_(grad_output_temp);

  auto un_max_ind = max_ind.unsqueeze(3);
  auto gt_mask = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kBool));
  auto max_temp = torch::zeros({batch, channel, height},
                               at::device(at::kCUDA).dtype(at::kFloat));
  for (int32_t ind = 1; ind < width; ++ind) {
    input_temp = input.select(3, width - ind - 1);
    at::gt_out(gt_mask, input_temp, max_val);

    at::masked_select_out(max_temp, input_temp, gt_mask);
    max_val.masked_scatter_(gt_mask, max_temp);
    max_ind.masked_fill_(gt_mask, width - ind - 1);

    grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3);
    output.scatter_add_(3, un_max_ind, grad_output_temp);
  }

  return output;
}

Tensor right_pool_forward(Tensor input) {
  // Initialize output
  Tensor output = at::zeros_like(input);
  // Get width
  int64_t width = input.size(3);
  output.copy_(input);

  for (int64_t ind = 1; ind < width; ind <<= 1) {
    Tensor max_temp = at::slice(output, 3, ind, width);
    Tensor cur_temp = at::slice(output, 3, ind, width).clone();
    Tensor next_temp = at::slice(output, 3, 0, width - ind).clone();
    at::max_out(max_temp, cur_temp, next_temp);
  }

  return output;
}

Tensor right_pool_backward(Tensor input, Tensor grad_output) {
  Tensor output = at::zeros_like(input);

  int32_t batch = input.size(0);
  int32_t channel = input.size(1);
  int32_t height = input.size(2);
  int32_t width = input.size(3);

  auto max_val = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kFloat));
  auto max_ind = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kLong));

  auto input_temp = input.select(3, 0);
  max_val.copy_(input_temp);

  max_ind.fill_(0);

  auto output_temp = output.select(3, 0);
  auto grad_output_temp = grad_output.select(3, 0);
  output_temp.copy_(grad_output_temp);

  auto un_max_ind = max_ind.unsqueeze(3);
  auto gt_mask = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kBool));
  auto max_temp = torch::zeros({batch, channel, height},
                               at::device(at::kCUDA).dtype(at::kFloat));
  for (int32_t ind = 0; ind < width - 1; ++ind) {
    input_temp = input.select(3, ind + 1);
    at::gt_out(gt_mask, input_temp, max_val);

    at::masked_select_out(max_temp, input_temp, gt_mask);
    max_val.masked_scatter_(gt_mask, max_temp);
    max_ind.masked_fill_(gt_mask, ind + 1);

    grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3);
    output.scatter_add_(3, un_max_ind, grad_output_temp);
  }

  return output;
}

Tensor top_pool_forward(Tensor input) {
  // Initialize output
  Tensor output = at::zeros_like(input);
  // Get height
  int64_t height = input.size(2);
  output.copy_(input);

  for (int64_t ind = 1; ind < height; ind <<= 1) {
    Tensor max_temp = at::slice(output, 2, 0, height - ind);
    Tensor cur_temp = at::slice(output, 2, 0, height - ind).clone();
    Tensor next_temp = at::slice(output, 2, ind, height).clone();
    at::max_out(max_temp, cur_temp, next_temp);
  }

  return output;
}

Tensor top_pool_backward(Tensor input, Tensor grad_output) {
  auto output = at::zeros_like(input);

  int32_t batch = input.size(0);
  int32_t channel = input.size(1);
  int32_t height = input.size(2);
  int32_t width = input.size(3);

  auto max_val = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kFloat));
  auto max_ind = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kLong));

  auto input_temp = input.select(2, height - 1);
  max_val.copy_(input_temp);

  max_ind.fill_(height - 1);

  auto output_temp = output.select(2, height - 1);
  auto grad_output_temp = grad_output.select(2, height - 1);
  output_temp.copy_(grad_output_temp);

  auto un_max_ind = max_ind.unsqueeze(2);
  auto gt_mask = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kBool));
  auto max_temp = torch::zeros({batch, channel, width},
                               at::device(at::kCUDA).dtype(at::kFloat));
  for (int32_t ind = 1; ind < height; ++ind) {
    input_temp = input.select(2, height - ind - 1);
    at::gt_out(gt_mask, input_temp, max_val);

    at::masked_select_out(max_temp, input_temp, gt_mask);
    max_val.masked_scatter_(gt_mask, max_temp);
    max_ind.masked_fill_(gt_mask, height - ind - 1);

    grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2);
    output.scatter_add_(2, un_max_ind, grad_output_temp);
  }

  return output;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/corner_pool_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "corner_pool_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void bottom_pool_forward_parrots(CudaContext& ctx, const SSElement& attr,
                                 const OperatorBase::in_list_t& ins,
                                 OperatorBase::out_list_t& outs) {
  at::Tensor input;
  input = buildATensor(ctx, ins[0]);
  auto out = bottom_pool_forward(input);
  updateDArray(ctx, out, outs[0]);
}

void bottom_pool_backward_parrots(CudaContext& ctx, const SSElement& attr,
                                  const OperatorBase::in_list_t& ins,
                                  OperatorBase::out_list_t& outs) {
  at::Tensor input, grad_output;
  input = buildATensor(ctx, ins[0]);
  grad_output = buildATensor(ctx, ins[1]);
  auto out = bottom_pool_backward(input, grad_output);
  updateDArray(ctx, out, outs[0]);
}

void left_pool_forward_parrots(CudaContext& ctx, const SSElement& attr,
                               const OperatorBase::in_list_t& ins,
                               OperatorBase::out_list_t& outs) {
  at::Tensor input;
  input = buildATensor(ctx, ins[0]);
  auto out = left_pool_forward(input);
  updateDArray(ctx, out, outs[0]);
}

void left_pool_backward_parrots(CudaContext& ctx, const SSElement& attr,
                                const OperatorBase::in_list_t& ins,
                                OperatorBase::out_list_t& outs) {
  at::Tensor input, grad_output;
  input = buildATensor(ctx, ins[0]);
  grad_output = buildATensor(ctx, ins[1]);
  auto out = left_pool_backward(input, grad_output);
  updateDArray(ctx, out, outs[0]);
}

void right_pool_forward_parrots(CudaContext& ctx, const SSElement& attr,
                                const OperatorBase::in_list_t& ins,
                                OperatorBase::out_list_t& outs) {
  at::Tensor input;
  input = buildATensor(ctx, ins[0]);
  auto out = right_pool_forward(input);
  updateDArray(ctx, out, outs[0]);
}

void right_pool_backward_parrots(CudaContext& ctx, const SSElement& attr,
                                 const OperatorBase::in_list_t& ins,
                                 OperatorBase::out_list_t& outs) {
  at::Tensor input, grad_output;
  input = buildATensor(ctx, ins[0]);
  grad_output = buildATensor(ctx, ins[1]);
  auto out = right_pool_backward(input, grad_output);
  updateDArray(ctx, out, outs[0]);
}

void top_pool_forward_parrots(CudaContext& ctx, const SSElement& attr,
                              const OperatorBase::in_list_t& ins,
                              OperatorBase::out_list_t& outs) {
  at::Tensor input;
  input = buildATensor(ctx, ins[0]);
  auto out = top_pool_forward(input);
  updateDArray(ctx, out, outs[0]);
}

void top_pool_backward_parrots(CudaContext& ctx, const SSElement& attr,
                               const OperatorBase::in_list_t& ins,
                               OperatorBase::out_list_t& outs) {
  at::Tensor input, grad_output;
  input = buildATensor(ctx, ins[0]);
  grad_output = buildATensor(ctx, ins[1]);
  auto out = top_pool_backward(input, grad_output);
  updateDArray(ctx, out, outs[0]);
}
#endif

void bottom_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr,
                                     const OperatorBase::in_list_t& ins,
                                     OperatorBase::out_list_t& outs) {
  at::Tensor input;
  input = buildATensor(ctx, ins[0]);
  auto out = bottom_pool_forward(input);
  updateDArray(ctx, out, outs[0]);
}

void bottom_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr,
                                      const OperatorBase::in_list_t& ins,
                                      OperatorBase::out_list_t& outs) {
  at::Tensor input, grad_output;
  input = buildATensor(ctx, ins[0]);
  grad_output = buildATensor(ctx, ins[1]);
  auto out = bottom_pool_backward(input, grad_output);
  updateDArray(ctx, out, outs[0]);
}

void left_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr,
                                   const OperatorBase::in_list_t& ins,
                                   OperatorBase::out_list_t& outs) {
  at::Tensor input;
  input = buildATensor(ctx, ins[0]);
  auto out = left_pool_forward(input);
  updateDArray(ctx, out, outs[0]);
}

void left_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr,
                                    const OperatorBase::in_list_t& ins,
                                    OperatorBase::out_list_t& outs) {
  at::Tensor input, grad_output;
  input = buildATensor(ctx, ins[0]);
  grad_output = buildATensor(ctx, ins[1]);
  auto out = left_pool_backward(input, grad_output);
  updateDArray(ctx, out, outs[0]);
}

void right_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr,
                                    const OperatorBase::in_list_t& ins,
                                    OperatorBase::out_list_t& outs) {
  at::Tensor input;
  input = buildATensor(ctx, ins[0]);
  auto out = right_pool_forward(input);
  updateDArray(ctx, out, outs[0]);
}

void right_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr,
                                     const OperatorBase::in_list_t& ins,
                                     OperatorBase::out_list_t& outs) {
  at::Tensor input, grad_output;
  input = buildATensor(ctx, ins[0]);
  grad_output = buildATensor(ctx, ins[1]);
  auto out = right_pool_backward(input, grad_output);
  updateDArray(ctx, out, outs[0]);
}

void top_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr,
                                  const OperatorBase::in_list_t& ins,
                                  OperatorBase::out_list_t& outs) {
  at::Tensor input;
  input = buildATensor(ctx, ins[0]);
  auto out = top_pool_forward(input);
  updateDArray(ctx, out, outs[0]);
}

void top_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr,
                                   const OperatorBase::in_list_t& ins,
                                   OperatorBase::out_list_t& outs) {
  at::Tensor input, grad_output;
  input = buildATensor(ctx, ins[0]);
  grad_output = buildATensor(ctx, ins[1]);
  auto out = top_pool_backward(input, grad_output);
  updateDArray(ctx, out, outs[0]);
}

PARROTS_EXTENSION_REGISTER(bottom_pool_forward)
    .input(1)
    .output(1)
#ifdef MMCV_WITH_CUDA
    .apply(bottom_pool_forward_parrots)
#endif
    .apply(bottom_pool_forward_parrots_cpu)
    .done();

PARROTS_EXTENSION_REGISTER(bottom_pool_backward)
    .input(2)
    .output(1)
#ifdef MMCV_WITH_CUDA
    .apply(bottom_pool_backward_parrots)
#endif
    .apply(bottom_pool_backward_parrots_cpu)
    .done();

PARROTS_EXTENSION_REGISTER(top_pool_forward)
    .input(1)
    .output(1)
#ifdef MMCV_WITH_CUDA
    .apply(top_pool_forward_parrots)
#endif
    .apply(top_pool_forward_parrots_cpu)
    .done();

PARROTS_EXTENSION_REGISTER(top_pool_backward)
    .input(2)
    .output(1)
#ifdef MMCV_WITH_CUDA
    .apply(top_pool_backward_parrots)
#endif
    .apply(top_pool_backward_parrots_cpu)
    .done();

PARROTS_EXTENSION_REGISTER(left_pool_forward)
    .input(1)
    .output(1)
#ifdef MMCV_WITH_CUDA
    .apply(left_pool_forward_parrots)
#endif
    .apply(left_pool_forward_parrots_cpu)
    .done();

PARROTS_EXTENSION_REGISTER(left_pool_backward)
    .input(2)
    .output(1)
#ifdef MMCV_WITH_CUDA
    .apply(left_pool_backward_parrots)
#endif
    .apply(left_pool_backward_parrots_cpu)
    .done();

PARROTS_EXTENSION_REGISTER(right_pool_forward)
    .input(1)
    .output(1)
#ifdef MMCV_WITH_CUDA
    .apply(right_pool_forward_parrots)
#endif
    .apply(right_pool_forward_parrots_cpu)
    .done();

PARROTS_EXTENSION_REGISTER(right_pool_backward)
    .input(2)
    .output(1)
#ifdef MMCV_WITH_CUDA
    .apply(right_pool_backward_parrots)
#endif
    .apply(right_pool_backward_parrots_cpu)
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/corner_pool_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CORNER_POOL_PYTORCH_H
#define CORNER_POOL_PYTORCH_H
#include <torch/extension.h>

at::Tensor bottom_pool_forward(at::Tensor input);
at::Tensor bottom_pool_backward(at::Tensor input, at::Tensor grad_output);
at::Tensor left_pool_forward(at::Tensor input);
at::Tensor left_pool_backward(at::Tensor input, at::Tensor grad_output);
at::Tensor right_pool_forward(at::Tensor input);
at::Tensor right_pool_backward(at::Tensor input, at::Tensor grad_output);
at::Tensor top_pool_forward(at::Tensor input);
at::Tensor top_pool_backward(at::Tensor input, at::Tensor grad_output);

#endif  // CORNER_POOL_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/correlation.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
#include <iostream>

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output,
                              int kH, int kW, int patchH, int patchW, int padH,
                              int padW, int dilationH, int dilationW,
                              int dilation_patchH, int dilation_patchW, int dH,
                              int dW) {
  DISPATCH_DEVICE_IMPL(correlation_forward_impl, input1, input2, output, kH, kW,
                       patchH, patchW, padH, padW, dilationH, dilationW,
                       dilation_patchH, dilation_patchW, dH, dW);
}

void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2,
                               Tensor grad_input1, Tensor grad_input2, int kH,
                               int kW, int patchH, int patchW, int padH,
                               int padW, int dilationH, int dilationW,
                               int dilation_patchH, int dilation_patchW, int dH,
                               int dW) {
  DISPATCH_DEVICE_IMPL(correlation_backward_impl, grad_output, input1, input2,
                       grad_input1, grad_input2, kH, kW, patchH, patchW, padH,
                       padW, dilationH, dilationW, dilation_patchH,
                       dilation_patchW, dH, dW);
}

void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH,
                         int kW, int patchH, int patchW, int padH, int padW,
                         int dilationH, int dilationW, int dilation_patchH,
                         int dilation_patchW, int dH, int dW) {
  correlation_forward_impl(input1, input2, output, kH, kW, patchH, patchW, padH,
                           padW, dilationH, dilationW, dilation_patchH,
                           dilation_patchW, dH, dW);
}

void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
                          Tensor grad_input1, Tensor grad_input2, int kH,
                          int kW, int patchH, int patchW, int padH, int padW,
                          int dilationH, int dilationW, int dilation_patchH,
                          int dilation_patchW, int dH, int dW) {
  correlation_backward_impl(grad_output, input1, input2, grad_input1,
                            grad_input2, kH, kW, patchH, patchW, padH, padW,
                            dilationH, dilationW, dilation_patchH,
                            dilation_patchW, dH, dW);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/correlation_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "correlation_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void correlation_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                      const OperatorBase::in_list_t& ins,
                                      OperatorBase::out_list_t& outs) {
  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
      dilation_patchW, dH, dW;
  SSAttrs(attr)
      .get<int>("kH", kH)
      .get<int>("kW", kW)
      .get<int>("patchH", patchH)
      .get<int>("patchW", patchW)
      .get<int>("padH", padH)
      .get<int>("padW", padW)
      .get<int>("dilationH", dilationH)
      .get<int>("dilationW", dilationW)
      .get<int>("dilation_patchH", dilation_patchH)
      .get<int>("dilation_patchW", dilation_patchW)
      .get<int>("dH", dH)
      .get<int>("dW", dW)
      .done();

  auto input1 = buildATensor(ctx, ins[0]);
  auto input2 = buildATensor(ctx, ins[1]);

  auto output = buildATensor(ctx, outs[0]);

  correlation_forward(input1, input2, output, kH, kW, patchH, patchW, padH,
                      padW, dilationH, dilationW, dilation_patchH,
                      dilation_patchW, dH, dW);
}

void correlation_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                       const OperatorBase::in_list_t& ins,
                                       OperatorBase::out_list_t& outs) {
  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
      dilation_patchW, dH, dW;
  SSAttrs(attr)
      .get<int>("kH", kH)
      .get<int>("kW", kW)
      .get<int>("patchH", patchH)
      .get<int>("patchW", patchW)
      .get<int>("padH", padH)
      .get<int>("padW", padW)
      .get<int>("dilationH", dilationH)
      .get<int>("dilationW", dilationW)
      .get<int>("dilation_patchH", dilation_patchH)
      .get<int>("dilation_patchW", dilation_patchW)
      .get<int>("dH", dH)
      .get<int>("dW", dW)
      .done();

  auto grad_output = buildATensor(ctx, ins[0]);
  auto input1 = buildATensor(ctx, ins[1]);
  auto input2 = buildATensor(ctx, ins[2]);

  auto grad_input1 = buildATensor(ctx, outs[0]);
  auto grad_input2 = buildATensor(ctx, outs[1]);

  correlation_backward(grad_output, input1, input2, grad_input1, grad_input2,
                       kH, kW, patchH, patchW, padH, padW, dilationH, dilationW,
                       dilation_patchH, dilation_patchW, dH, dW);
}
#endif

void correlation_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
                                     const OperatorBase::in_list_t& ins,
                                     OperatorBase::out_list_t& outs) {
  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
      dilation_patchW, dH, dW;
  SSAttrs(attr)
      .get<int>("kH", kH)
      .get<int>("kW", kW)
      .get<int>("patchH", patchH)
      .get<int>("patchW", patchW)
      .get<int>("padH", padH)
      .get<int>("padW", padW)
      .get<int>("dilationH", dilationH)
      .get<int>("dilationW", dilationW)
      .get<int>("dilation_patchH", dilation_patchH)
      .get<int>("dilation_patchW", dilation_patchW)
      .get<int>("dH", dH)
      .get<int>("dW", dW)
      .done();

  auto input1 = buildATensor(ctx, ins[0]);
  auto input2 = buildATensor(ctx, ins[1]);

  auto output = buildATensor(ctx, outs[0]);

  correlation_forward(input1, input2, output, kH, kW, patchH, patchW, padH,
                      padW, dilationH, dilationW, dilation_patchH,
                      dilation_patchW, dH, dW);
}

void correlation_backward_cpu_parrots(HostContext& ctx, const SSElement& attr,
                                      const OperatorBase::in_list_t& ins,
                                      OperatorBase::out_list_t& outs) {
  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
      dilation_patchW, dH, dW;
  SSAttrs(attr)
      .get<int>("kH", kH)
      .get<int>("kW", kW)
      .get<int>("patchH", patchH)
      .get<int>("patchW", patchW)
      .get<int>("padH", padH)
      .get<int>("padW", padW)
      .get<int>("dilationH", dilationH)
      .get<int>("dilationW", dilationW)
      .get<int>("dilation_patchH", dilation_patchH)
      .get<int>("dilation_patchW", dilation_patchW)
      .get<int>("dH", dH)
      .get<int>("dW", dW)
      .done();

  auto grad_output = buildATensor(ctx, ins[0]);
  auto input1 = buildATensor(ctx, ins[1]);
  auto input2 = buildATensor(ctx, ins[2]);

  auto grad_input1 = buildATensor(ctx, outs[0]);
  auto grad_input2 = buildATensor(ctx, outs[1]);

  correlation_backward(grad_output, input1, input2, grad_input1, grad_input2,
                       kH, kW, patchH, patchW, padH, padW, dilationH, dilationW,
                       dilation_patchH, dilation_patchW, dH, dW);
}

PARROTS_EXTENSION_REGISTER(correlation_forward)
    .attr("kH")
    .attr("kW")
    .attr("patchH")
    .attr("patchW")
    .attr("padH")
    .attr("padW")
    .attr("dilationH")
    .attr("dilationW")
    .attr("dilation_patchH")
    .attr("dilation_patchW")
    .attr("dH")
    .attr("dW")
    .input(2)
    .output(1)
    .apply(correlation_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(correlation_forward_cuda_parrots)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(correlation_backward)
    .attr("kH")
    .attr("kW")
    .attr("patchH")
    .attr("patchW")
    .attr("padH")
    .attr("padW")
    .attr("dilationH")
    .attr("dilationW")
    .attr("dilation_patchH")
    .attr("dilation_patchW")
    .attr("dH")
    .attr("dW")
    .input(3)
    .output(2)
    .apply(correlation_backward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(correlation_backward_cuda_parrots)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/correlation_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CORRELATION_PYTORCH_H
#define CORRELATION_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH,
                         int kW, int patchH, int patchW, int padH, int padW,
                         int dilationH, int dilationW, int dilation_patchH,
                         int dilation_patchW, int dH, int dW);

void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
                          Tensor grad_input1, Tensor grad_input2, int kH,
                          int kW, int patchH, int patchW, int padH, int padW,
                          int dilationH, int dilationW, int dilation_patchH,
                          int dilation_patchW, int dH, int dW);

#endif  // CORRELATION_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/cudabind.cpp
================================================
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void AssignScoreWithKForwardCUDAKernelLauncher(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& points, const Tensor& centers, const Tensor& scores,
    const Tensor& knn_idx, Tensor& output);

void AssignScoreWithKBackwardCUDAKernelLauncher(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
    Tensor& grad_centers, Tensor& grad_scores);

void assign_score_withk_forward_cuda(int B, int N0, int N1, int M, int K, int O,
                                     int aggregate, const Tensor& points,
                                     const Tensor& centers,
                                     const Tensor& scores,
                                     const Tensor& knn_idx, Tensor& output) {
  AssignScoreWithKForwardCUDAKernelLauncher(
      B, N0, N1, M, K, O, aggregate, points, centers, scores, knn_idx, output);
};

void assign_score_withk_backward_cuda(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
    Tensor& grad_centers, Tensor& grad_scores) {
  AssignScoreWithKBackwardCUDAKernelLauncher(
      B, N0, N1, M, K, O, aggregate, grad_out, points, centers, scores, knn_idx,
      grad_points, grad_centers, grad_scores);
};

void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
                                     int aggregate, const Tensor& points,
                                     const Tensor& centers,
                                     const Tensor& scores,
                                     const Tensor& knn_idx, Tensor& output);

void assign_score_withk_backward_impl(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
    Tensor& grad_centers, Tensor& grad_scores);

REGISTER_DEVICE_IMPL(assign_score_withk_forward_impl, CUDA,
                     assign_score_withk_forward_cuda);
REGISTER_DEVICE_IMPL(assign_score_withk_backward_impl, CUDA,
                     assign_score_withk_backward_cuda);

void BallQueryForwardCUDAKernelLauncher(int b, int n, int m, float min_radius,
                                        float max_radius, int nsample,
                                        const Tensor new_xyz, const Tensor xyz,
                                        Tensor idx);

void ball_query_forward_cuda(int b, int n, int m, float min_radius,
                             float max_radius, int nsample,
                             const Tensor new_xyz, const Tensor xyz,
                             Tensor idx) {
  BallQueryForwardCUDAKernelLauncher(b, n, m, min_radius, max_radius, nsample,
                                     new_xyz, xyz, idx);
};

void ball_query_forward_impl(int b, int n, int m, float min_radius,
                             float max_radius, int nsample,
                             const Tensor new_xyz, const Tensor xyz,
                             Tensor idx);
REGISTER_DEVICE_IMPL(ball_query_forward_impl, CUDA, ball_query_forward_cuda);

void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2,
                                    Tensor ious, const int mode,
                                    const bool aligned, const int offset);

void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
                        const int mode, const bool aligned, const int offset) {
  BBoxOverlapsCUDAKernelLauncher(bboxes1, bboxes2, ious, mode, aligned, offset);
}

void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
                        const int mode, const bool aligned, const int offset);
REGISTER_DEVICE_IMPL(bbox_overlaps_impl, CUDA, bbox_overlaps_cuda);

void BorderAlignForwardCUDAKernelLauncher(const Tensor& input,
                                          const Tensor& boxes, Tensor output,
                                          Tensor argmax_idx,
                                          const int pool_size);

void BorderAlignBackwardCUDAKernelLauncher(const Tensor& grad_output,
                                           const Tensor& boxes,
                                           const Tensor& argmax_idx,
                                           Tensor grad_input,
                                           const int pool_size);

void border_align_forward_cuda(const Tensor& input, const Tensor& boxes,
                               Tensor output, Tensor argmax_idx,
                               const int pool_size) {
  BorderAlignForwardCUDAKernelLauncher(input, boxes, output, argmax_idx,
                                       pool_size);
}

void border_align_backward_cuda(const Tensor& grad_output, const Tensor& boxes,
                                const Tensor& argmax_idx, Tensor grad_input,
                                const int pool_size) {
  BorderAlignBackwardCUDAKernelLauncher(grad_output, boxes, argmax_idx,
                                        grad_input, pool_size);
}

void border_align_forward_impl(const Tensor& input, const Tensor& boxes,
                               Tensor output, Tensor argmax_idx,
                               const int pool_size);

void border_align_backward_impl(const Tensor& grad_output, const Tensor& boxes,
                                const Tensor& argmax_idx, Tensor grad_input,
                                const int pool_size);

REGISTER_DEVICE_IMPL(border_align_forward_impl, CUDA,
                     border_align_forward_cuda);
REGISTER_DEVICE_IMPL(border_align_backward_impl, CUDA,
                     border_align_backward_cuda);

void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                          const int mode_flag, const bool aligned);

void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                          const int mode_flag, const bool aligned);
REGISTER_DEVICE_IMPL(box_iou_rotated_impl, CUDA, box_iou_rotated_cuda);

void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks,
                                     Tensor rfeatures, Tensor routput,
                                     Tensor rmasks, Tensor output,
                                     const int kernel_size,
                                     const int group_size,
                                     const int scale_factor);

void CARAFEBackwardCUDAKernelLauncher(
    const Tensor top_grad, const Tensor rfeatures, const Tensor masks,
    Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad,
    Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad,
    const int kernel_size, const int group_size, const int scale_factor);

void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
                         Tensor routput, Tensor rmasks, Tensor output,
                         int kernel_size, int group_size, int scale_factor) {
  CARAFEForwardCUDAKernelLauncher(features, masks, rfeatures, routput, rmasks,
                                  output, kernel_size, group_size,
                                  scale_factor);
}

void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
                          Tensor rtop_grad, Tensor rbottom_grad_hs,
                          Tensor rbottom_grad, Tensor rmask_grad,
                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
                          int group_size, int scale_factor) {
  CARAFEBackwardCUDAKernelLauncher(top_grad, rfeatures, masks, rtop_grad,
                                   rbottom_grad_hs, rbottom_grad, rmask_grad,
                                   bottom_grad, mask_grad, kernel_size,
                                   group_size, scale_factor);
}

void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures,
                         Tensor routput, Tensor rmasks, Tensor output,
                         int kernel_size, int group_size, int scale_factor);

void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks,
                          Tensor rtop_grad, Tensor rbottom_grad_hs,
                          Tensor rbottom_grad, Tensor rmask_grad,
                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
                          int group_size, int scale_factor);

REGISTER_DEVICE_IMPL(carafe_forward_impl, CUDA, carafe_forward_cuda);
REGISTER_DEVICE_IMPL(carafe_backward_impl, CUDA, carafe_backward_cuda);

void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features,
                                          const Tensor masks, Tensor output,
                                          const int kernel_size,
                                          const int group_size,
                                          const int scale_factor);

void CARAFENAIVEBackwardCUDAKernelLauncher(
    const Tensor top_grad, const Tensor features, const Tensor masks,
    Tensor bottom_grad, Tensor mask_grad, const int kernel_size,
    const int group_size, const int scale_factor);

void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
                               int kernel_size, int group_size,
                               int scale_factor) {
  CARAFENAIVEForwardCUDAKernelLauncher(features, masks, output, kernel_size,
                                       group_size, scale_factor);
}

void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks,
                                Tensor bottom_grad, Tensor mask_grad,
                                int kernel_size, int group_size,
                                int scale_factor) {
  CARAFENAIVEBackwardCUDAKernelLauncher(top_grad, features, masks, bottom_grad,
                                        mask_grad, kernel_size, group_size,
                                        scale_factor);
}
void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output,
                               int kernel_size, int group_size,
                               int scale_factor);

void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks,
                                Tensor bottom_grad, Tensor mask_grad,
                                int kernel_size, int group_size,
                                int scale_factor);

REGISTER_DEVICE_IMPL(carafe_naive_forward_impl, CUDA,
                     carafe_naive_forward_cuda);
REGISTER_DEVICE_IMPL(carafe_naive_backward_impl, CUDA,
                     carafe_naive_backward_cuda);

void CorrelationForwardCUDAKernelLauncher(Tensor input1, Tensor input2,
                                          Tensor output, int kH, int kW,
                                          int patchH, int patchW, int padH,
                                          int padW, int dilationH,
                                          int dilationW, int dilation_patchH,
                                          int dilation_patchW, int dH, int dW);

void CorrelationBackwardCUDAKernelLauncher(Tensor grad_output, Tensor input1,
                                           Tensor input2, Tensor grad_input1,
                                           Tensor grad_input2, int kH, int kW,
                                           int patchH, int patchW, int padH,
                                           int padW, int dilationH,
                                           int dilationW, int dilation_patchH,
                                           int dilation_patchW, int dH, int dW);

void correlation_forward_cuda(Tensor input1, Tensor input2, Tensor output,
                              int kH, int kW, int patchH, int patchW, int padH,
                              int padW, int dilationH, int dilationW,
                              int dilation_patchH, int dilation_patchW, int dH,
                              int dW) {
  CorrelationForwardCUDAKernelLauncher(
      input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH,
      dilationW, dilation_patchH, dilation_patchW, dH, dW);
}

void correlation_backward_cuda(Tensor grad_output, Tensor input1, Tensor input2,
                               Tensor grad_input1, Tensor grad_input2, int kH,
                               int kW, int patchH, int patchW, int padH,
                               int padW, int dilationH, int dilationW,
                               int dilation_patchH, int dilation_patchW, int dH,
                               int dW) {
  CorrelationBackwardCUDAKernelLauncher(
      grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH,
      patchW, padH, padW, dilationH, dilationW, dilation_patchH,
      dilation_patchW, dH, dW);
}

void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output,
                              int kH, int kW, int patchH, int patchW, int padH,
                              int padW, int dilationH, int dilationW,
                              int dilation_patchH, int dilation_patchW, int dH,
                              int dW);

void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2,
                               Tensor grad_input1, Tensor grad_input2, int kH,
                               int kW, int patchH, int patchW, int padH,
                               int padW, int dilationH, int dilationW,
                               int dilation_patchH, int dilation_patchW, int dH,
                               int dW);

REGISTER_DEVICE_IMPL(correlation_forward_impl, CUDA, correlation_forward_cuda);
REGISTER_DEVICE_IMPL(correlation_backward_impl, CUDA,
                     correlation_backward_cuda);

void deformable_im2col_cuda(Tensor data_im, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor data_col);

void deformable_col2im_cuda(Tensor data_col, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor grad_im);

void deformable_col2im_coord_cuda(
    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
    const int height, const int width, const int ksize_h, const int ksize_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int parallel_imgs,
    const int deformable_group, Tensor grad_offset);

void deformable_im2col_impl(Tensor data_im, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor data_col);

void deformable_col2im_impl(Tensor data_col, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor grad_im);

void deformable_col2im_coord_impl(
    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
    const int height, const int width, const int ksize_h, const int ksize_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int parallel_imgs,
    const int deformable_group, Tensor grad_offset);

REGISTER_DEVICE_IMPL(deformable_im2col_impl, CUDA, deformable_im2col_cuda);
REGISTER_DEVICE_IMPL(deformable_col2im_impl, CUDA, deformable_col2im_cuda);
REGISTER_DEVICE_IMPL(deformable_col2im_coord_impl, CUDA,
                     deformable_col2im_coord_cuda);

void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois,
                                            Tensor offset, Tensor output,
                                            int pooled_height, int pooled_width,
                                            float spatial_scale,
                                            int sampling_ratio, float gamma);

void DeformRoIPoolBackwardCUDAKernelLauncher(
    Tensor grad_output, Tensor input, Tensor rois, Tensor offset,
    Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width,
    float spatial_scale, int sampling_ratio, float gamma);

void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset,
                                  Tensor output, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int sampling_ratio, float gamma) {
  DeformRoIPoolForwardCUDAKernelLauncher(input, rois, offset, output,
                                         pooled_height, pooled_width,
                                         spatial_scale, sampling_ratio, gamma);
}

void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input,
                                   Tensor rois, Tensor offset,
                                   Tensor grad_input, Tensor grad_offset,
                                   int pooled_height, int pooled_width,
                                   float spatial_scale, int sampling_ratio,
                                   float gamma) {
  DeformRoIPoolBackwardCUDAKernelLauncher(
      grad_output, input, rois, offset, grad_input, grad_offset, pooled_height,
      pooled_width, spatial_scale, sampling_ratio, gamma);
}

void deform_roi_pool_forward_impl(Tensor input, Tensor rois, Tensor offset,
                                  Tensor output, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int sampling_ratio, float gamma);

void deform_roi_pool_backward_impl(Tensor grad_output, Tensor input,
                                   Tensor rois, Tensor offset,
                                   Tensor grad_input, Tensor grad_offset,
                                   int pooled_height, int pooled_width,
                                   float spatial_scale, int sampling_ratio,
                                   float gamma);

REGISTER_DEVICE_IMPL(deform_roi_pool_forward_impl, CUDA,
                     deform_roi_pool_forward_cuda);
REGISTER_DEVICE_IMPL(deform_roi_pool_backward_impl, CUDA,
                     deform_roi_pool_backward_cuda);

void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target,
                                               Tensor weight, Tensor output,
                                               const float gamma,
                                               const float alpha);

void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target,
                                                Tensor weight,
                                                Tensor grad_input,
                                                const float gamma,
                                                const float alpha);

void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor softmax, Tensor target,
                                               Tensor weight, Tensor output,
                                               const float gamma,
                                               const float alpha);

void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor softmax, Tensor target,
                                                Tensor weight, Tensor buff,
                                                Tensor grad_input,
                                                const float gamma,
                                                const float alpha);

void sigmoid_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha) {
  SigmoidFocalLossForwardCUDAKernelLauncher(input, target, weight, output,
                                            gamma, alpha);
}

void sigmoid_focal_loss_backward_cuda(Tensor input, Tensor target,
                                      Tensor weight, Tensor grad_input,
                                      float gamma, float alpha) {
  SigmoidFocalLossBackwardCUDAKernelLauncher(input, target, weight, grad_input,
                                             gamma, alpha);
}

void softmax_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha) {
  SoftmaxFocalLossForwardCUDAKernelLauncher(input, target, weight, output,
                                            gamma, alpha);
}

void softmax_focal_loss_backward_cuda(Tensor input, Tensor target,
                                      Tensor weight, Tensor buff,
                                      Tensor grad_input, float gamma,
                                      float alpha) {
  SoftmaxFocalLossBackwardCUDAKernelLauncher(input, target, weight, buff,
                                             grad_input, gamma, alpha);
}

void sigmoid_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha);

void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target,
                                      Tensor weight, Tensor grad_input,
                                      float gamma, float alpha);

void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha);

void softmax_focal_loss_backward_impl(Tensor input, Tensor target,
                                      Tensor weight, Tensor buff,
                                      Tensor grad_input, float gamma,
                                      float alpha);

REGISTER_DEVICE_IMPL(sigmoid_focal_loss_forward_impl, CUDA,
                     sigmoid_focal_loss_forward_cuda);
REGISTER_DEVICE_IMPL(sigmoid_focal_loss_backward_impl, CUDA,
                     sigmoid_focal_loss_backward_cuda);
REGISTER_DEVICE_IMPL(softmax_focal_loss_forward_impl, CUDA,
                     softmax_focal_loss_forward_cuda);
REGISTER_DEVICE_IMPL(softmax_focal_loss_backward_impl, CUDA,
                     softmax_focal_loss_backward_cuda);

void FurthestPointSamplingForwardCUDAKernelLauncher(int b, int n, int m,
                                                    const float* dataset,
                                                    float* temp, int* idxs);

void FurthestPointSamplingWithDistForwardCUDAKernelLauncher(
    int b, int n, int m, const float* dataset, float* temp, int* idxs);

void furthest_point_sampling_forward_cuda(Tensor points_tensor,
                                          Tensor temp_tensor, Tensor idx_tensor,
                                          int b, int n, int m) {
  const float* dataset = points_tensor.data_ptr<float>();
  float* temp = temp_tensor.data_ptr<float>();
  int* idxs = idx_tensor.data_ptr<int>();
  FurthestPointSamplingForwardCUDAKernelLauncher(b, n, m, dataset, temp, idxs);
}

void furthest_point_sampling_with_dist_forward_cuda(Tensor points_tensor,
                                                    Tensor temp_tensor,
                                                    Tensor idx_tensor, int b,
                                                    int n, int m) {
  const float* dataset = points_tensor.data_ptr<float>();
  float* temp = temp_tensor.data_ptr<float>();
  int* idxs = idx_tensor.data_ptr<int>();
  FurthestPointSamplingWithDistForwardCUDAKernelLauncher(b, n, m, dataset, temp,
                                                         idxs);
}

void furthest_point_sampling_forward_impl(Tensor points_tensor,
                                          Tensor temp_tensor, Tensor idx_tensor,
                                          int b, int n, int m);

void furthest_point_sampling_with_dist_forward_impl(Tensor points_tensor,
                                                    Tensor temp_tensor,
                                                    Tensor idx_tensor, int b,
                                                    int n, int m);

REGISTER_DEVICE_IMPL(furthest_point_sampling_forward_impl, CUDA,
                     furthest_point_sampling_forward_cuda);
REGISTER_DEVICE_IMPL(furthest_point_sampling_with_dist_forward_impl, CUDA,
                     furthest_point_sampling_with_dist_forward_cuda);

torch::Tensor fused_bias_leakyrelu_op(const torch::Tensor& input,
                                      const torch::Tensor& bias,
                                      const torch::Tensor& refer, int act,
                                      int grad, float alpha, float scale);

torch::Tensor fused_bias_leakyrelu_op_impl(const torch::Tensor& input,
                                           const torch::Tensor& bias,
                                           const torch::Tensor& refer, int act,
                                           int grad, float alpha, float scale);
REGISTER_DEVICE_IMPL(fused_bias_leakyrelu_op_impl, CUDA,
                     fused_bias_leakyrelu_op);

void GatherPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                           const Tensor points,
                                           const Tensor idx, Tensor out);

void GatherPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                            const Tensor grad_out,
                                            const Tensor idx,
                                            Tensor grad_points);

void gather_points_forward_cuda(int b, int c, int n, int npoints,
                                const Tensor points, const Tensor idx,
                                Tensor out) {
  GatherPointsForwardCUDAKernelLauncher(b, c, n, npoints, points, idx, out);
};

void gather_points_backward_cuda(int b, int c, int n, int npoints,
                                 const Tensor grad_out, const Tensor idx,
                                 Tensor grad_points) {
  GatherPointsBackwardCUDAKernelLauncher(b, c, n, npoints, grad_out, idx,
                                         grad_points);
};

void gather_points_forward_impl(int b, int c, int n, int npoints,
                                const Tensor points, const Tensor idx,
                                Tensor out);

void gather_points_backward_impl(int b, int c, int n, int npoints,
                                 const Tensor grad_out, const Tensor idx,
                                 Tensor grad_points);

REGISTER_DEVICE_IMPL(gather_points_forward_impl, CUDA,
                     gather_points_forward_cuda);
REGISTER_DEVICE_IMPL(gather_points_backward_impl, CUDA,
                     gather_points_backward_cuda);

void GroupPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                          int nsample, const Tensor points,
                                          const Tensor idx, Tensor out);

void GroupPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                           int nsample, const Tensor grad_out,
                                           const Tensor idx,
                                           Tensor grad_points);

void group_points_forward_cuda(int b, int c, int n, int npoints, int nsample,
                               const Tensor points, const Tensor idx,
                               Tensor out) {
  GroupPointsForwardCUDAKernelLauncher(b, c, n, npoints, nsample, points, idx,
                                       out);
};

void group_points_backward_cuda(int b, int c, int n, int npoints, int nsample,
                                const Tensor grad_out, const Tensor idx,
                                Tensor grad_points) {
  GroupPointsBackwardCUDAKernelLauncher(b, c, n, npoints, nsample, grad_out,
                                        idx, grad_points);
};

void group_points_forward_impl(int b, int c, int n, int npoints, int nsample,
                               const Tensor points, const Tensor idx,
                               Tensor out);

void group_points_backward_impl(int b, int c, int n, int npoints, int nsample,
                                const Tensor grad_out, const Tensor idx,
                                Tensor grad_points);

REGISTER_DEVICE_IMPL(group_points_forward_impl, CUDA,
                     group_points_forward_cuda);
REGISTER_DEVICE_IMPL(group_points_backward_impl, CUDA,
                     group_points_backward_cuda);

void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a,
                                                   const Tensor boxes_a,
                                                   const int num_b,
                                                   const Tensor boxes_b,
                                                   Tensor ans_overlap);

void IoU3DBoxesIoUBevForwardCUDAKernelLauncher(const int num_a,
                                               const Tensor boxes_a,
                                               const int num_b,
                                               const Tensor boxes_b,
                                               Tensor ans_iou);

void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes,
                                       unsigned long long* mask, int boxes_num,
                                       float nms_overlap_thresh);

void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes,
                                             unsigned long long* mask,
                                             int boxes_num,
                                             float nms_overlap_thresh);

void iou3d_boxes_overlap_bev_forward_cuda(const int num_a, const Tensor boxes_a,
                                          const int num_b, const Tensor boxes_b,
                                          Tensor ans_overlap) {
  IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b,
                                                ans_overlap);
};

void iou3d_boxes_iou_bev_forward_cuda(const int num_a, const Tensor boxes_a,
                                      const int num_b, const Tensor boxes_b,
                                      Tensor ans_iou) {
  IoU3DBoxesIoUBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b,
                                            ans_iou);
};

void iou3d_nms_forward_cuda(const Tensor boxes, unsigned long long* mask,
                            int boxes_num, float nms_overlap_thresh) {
  IoU3DNMSForwardCUDAKernelLauncher(boxes, mask, boxes_num, nms_overlap_thresh);
};

void iou3d_nms_normal_forward_cuda(const Tensor boxes, unsigned long long* mask,
                                   int boxes_num, float nms_overlap_thresh) {
  IoU3DNMSNormalForwardCUDAKernelLauncher(boxes, mask, boxes_num,
                                          nms_overlap_thresh);
};

void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a,
                                          const int num_b, const Tensor boxes_b,
                                          Tensor ans_overlap);

void iou3d_boxes_iou_bev_forward_impl(const int num_a, const Tensor boxes_a,
                                      const int num_b, const Tensor boxes_b,
                                      Tensor ans_iou);

void iou3d_nms_forward_impl(const Tensor boxes, unsigned long long* mask,
                            int boxes_num, float nms_overlap_thresh);

void iou3d_nms_normal_forward_impl(const Tensor boxes, unsigned long long* mask,
                                   int boxes_num, float nms_overlap_thresh);

REGISTER_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, CUDA,
                     iou3d_boxes_overlap_bev_forward_cuda);
REGISTER_DEVICE_IMPL(iou3d_boxes_iou_bev_forward_impl, CUDA,
                     iou3d_boxes_iou_bev_forward_cuda);
REGISTER_DEVICE_IMPL(iou3d_nms_forward_impl, CUDA, iou3d_nms_forward_cuda);
REGISTER_DEVICE_IMPL(iou3d_nms_normal_forward_impl, CUDA,
                     iou3d_nms_normal_forward_cuda);

void KNNForwardCUDAKernelLauncher(int b, int n, int m, int nsample,
                                  const Tensor xyz, const Tensor new_xyz,
                                  Tensor idx, Tensor dist2);

void knn_forward_cuda(int b, int n, int m, int nsample, const Tensor xyz,
                      const Tensor new_xyz, Tensor idx, Tensor dist2) {
  KNNForwardCUDAKernelLauncher(b, n, m, nsample, xyz, new_xyz, idx, dist2);
}

void knn_forward_impl(int b, int n, int m, int nsample, const Tensor xyz,
                      const Tensor new_xyz, Tensor idx, Tensor dist2);
REGISTER_DEVICE_IMPL(knn_forward_impl, CUDA, knn_forward_cuda);

void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data,
                                           const Tensor mask_h_idx,
                                           const Tensor mask_w_idx,
                                           Tensor top_data, const int kernel_h,
                                           const int kernel_w, const int pad_h,
                                           const int pad_w);

void MaskedCol2imForwardCUDAKernelLauncher(const Tensor bottom_data,
                                           const Tensor mask_h_idx,
                                           const Tensor mask_w_idx,
                                           Tensor top_data, const int height,
                                           const int width, const int channels);

void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor col,
                                const int kernel_h, const int kernel_w,
                                const int pad_h, const int pad_w) {
  // im: (n, ic, h, w), kernel size (kh, kw)
  // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
  MaskedIm2colForwardCUDAKernelLauncher(im, mask_h_idx, mask_w_idx, col,
                                        kernel_h, kernel_w, pad_h, pad_w);
}

void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor im, int height,
                                int width, int channels) {
  // im: (n, ic, h, w), kernel size (kh, kw)
  // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
  MaskedCol2imForwardCUDAKernelLauncher(col, mask_h_idx, mask_w_idx, im, height,
                                        width, channels);
}

void masked_im2col_forward_impl(const Tensor im, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor col,
                                const int kernel_h, const int kernel_w,
                                const int pad_h, const int pad_w);

void masked_col2im_forward_impl(const Tensor col, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor im, int height,
                                int width, int channels);

REGISTER_DEVICE_IMPL(masked_im2col_forward_impl, CUDA,
                     masked_im2col_forward_cuda);
REGISTER_DEVICE_IMPL(masked_col2im_forward_impl, CUDA,
                     masked_col2im_forward_cuda);

void modulated_deformable_im2col_cuda(
    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor data_col);

void modulated_deformable_col2im_cuda(
    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor grad_im);

void modulated_deformable_col2im_coord_cuda(
    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
    const Tensor data_mask, const int batch_size, const int channels,
    const int height_im, const int width_im, const int height_col,
    const int width_col, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int deformable_group,
    Tensor grad_offset, Tensor grad_mask);

void modulated_deformable_im2col_impl(
    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor data_col);

void modulated_deformable_col2im_impl(
    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor grad_im);

void modulated_deformable_col2im_coord_impl(
    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
    const Tensor data_mask, const int batch_size, const int channels,
    const int height_im, const int width_im, const int height_col,
    const int width_col, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int deformable_group,
    Tensor grad_offset, Tensor grad_mask);

REGISTER_DEVICE_IMPL(modulated_deformable_im2col_impl, CUDA,
                     modulated_deformable_im2col_cuda);
REGISTER_DEVICE_IMPL(modulated_deformable_col2im_impl, CUDA,
                     modulated_deformable_col2im_cuda);
REGISTER_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, CUDA,
                     modulated_deformable_col2im_coord_cuda);

Tensor ms_deform_attn_cuda_forward(const Tensor& value,
                                   const Tensor& spatial_shapes,
                                   const Tensor& level_start_index,
                                   const Tensor& sampling_loc,
                                   const Tensor& attn_weight,
                                   const int im2col_step);

void ms_deform_attn_cuda_backward(
    const Tensor& value, const Tensor& spatial_shapes,
    const Tensor& level_start_index, const Tensor& sampling_loc,
    const Tensor& attn_weight, const Tensor& grad_output, Tensor& grad_value,
    Tensor& grad_sampling_loc, Tensor& grad_attn_weight, const int im2col_step);

Tensor ms_deform_attn_impl_forward(const Tensor& value,
                                   const Tensor& spatial_shapes,
                                   const Tensor& level_start_index,
                                   const Tensor& sampling_loc,
                                   const Tensor& attn_weight,
                                   const int im2col_step);

void ms_deform_attn_impl_backward(
    const Tensor& value, const Tensor& spatial_shapes,
    const Tensor& level_start_index, const Tensor& sampling_loc,
    const Tensor& attn_weight, const Tensor& grad_output, Tensor& grad_value,
    Tensor& grad_sampling_loc, Tensor& grad_attn_weight, const int im2col_step);

REGISTER_DEVICE_IMPL(ms_deform_attn_impl_forward, CUDA,
                     ms_deform_attn_cuda_forward);
REGISTER_DEVICE_IMPL(ms_deform_attn_impl_backward, CUDA,
                     ms_deform_attn_cuda_backward);

Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold,
                             int offset);

Tensor nms_cuda(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
  return NMSCUDAKernelLauncher(boxes, scores, iou_threshold, offset);
}

Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset);
REGISTER_DEVICE_IMPL(nms_impl, CUDA, nms_cuda);

void PointsInBoxesPartForwardCUDAKernelLauncher(int batch_size, int boxes_num,
                                                int pts_num, const Tensor boxes,
                                                const Tensor pts,
                                                Tensor box_idx_of_points);

void PointsInBoxesAllForwardCUDAKernelLauncher(int batch_size, int boxes_num,
                                               int pts_num, const Tensor boxes,
                                               const Tensor pts,
                                               Tensor box_idx_of_points);

void points_in_boxes_part_forward_cuda(int batch_size, int boxes_num,
                                       int pts_num, const Tensor boxes,
                                       const Tensor pts,
                                       Tensor box_idx_of_points) {
  PointsInBoxesPartForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num,
                                             boxes, pts, box_idx_of_points);
};

void points_in_boxes_all_forward_cuda(int batch_size, int boxes_num,
                                      int pts_num, const Tensor boxes,
                                      const Tensor pts,
                                      Tensor box_idx_of_points) {
  PointsInBoxesAllForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num,
                                            boxes, pts, box_idx_of_points);
};

void points_in_boxes_part_forward_impl(int batch_size, int boxes_num,
                                       int pts_num, const Tensor boxes,
                                       const Tensor pts,
                                       Tensor box_idx_of_points);

void points_in_boxes_all_forward_impl(int batch_size, int boxes_num,
                                      int pts_num, const Tensor boxes,
                                      const Tensor pts,
                                      Tensor box_idx_of_points);
REGISTER_DEVICE_IMPL(points_in_boxes_part_forward_impl, CUDA,
                     points_in_boxes_part_forward_cuda);
REGISTER_DEVICE_IMPL(points_in_boxes_all_forward_impl, CUDA,
                     points_in_boxes_all_forward_cuda);

void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input,
                                      Tensor output, const int num_,
                                      const int h_feature, const int w_feature,
                                      const int h_mask, const int w_mask,
                                      const int half_h_mask,
                                      const int half_w_mask);

void PSAMaskBackwardCUDAKernelLauncher(
    const int psa_type, const Tensor grad_output, Tensor grad_input,
    const int num_, const int h_feature, const int w_feature, const int h_mask,
    const int w_mask, const int half_h_mask, const int half_w_mask);

void psamask_forward_cuda(const int psa_type, const Tensor input, Tensor output,
                          const int num_, const int h_feature,
                          const int w_feature, const int h_mask,
                          const int w_mask, const int half_h_mask,
                          const int half_w_mask) {
  PSAMaskForwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature,
                                   w_feature, h_mask, w_mask, half_h_mask,
                                   half_w_mask);
}

void psamask_backward_cuda(const int psa_type, const Tensor grad_output,
                           Tensor grad_input, const int num_,
                           const int h_feature, const int w_feature,
                           const int h_mask, const int w_mask,
                           const int half_h_mask, const int half_w_mask) {
  PSAMaskBackwardCUDAKernelLauncher(psa_type, grad_output, grad_input, num_,
                                    h_feature, w_feature, h_mask, w_mask,
                                    half_h_mask, half_w_mask);
}

void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output,
                          const int num_, const int h_feature,
                          const int w_feature, const int h_mask,
                          const int w_mask, const int half_h_mask,
                          const int half_w_mask);

void psamask_backward_impl(const int psa_type, const Tensor grad_output,
                           Tensor grad_input, const int num_,
                           const int h_feature, const int w_feature,
                           const int h_mask, const int w_mask,
                           const int half_h_mask, const int half_w_mask);
REGISTER_DEVICE_IMPL(psamask_forward_impl, CUDA, psamask_forward_cuda);
REGISTER_DEVICE_IMPL(psamask_backward_impl, CUDA, psamask_backward_cuda);

void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
                                       Tensor argmax_y, Tensor argmax_x,
                                       int aligned_height, int aligned_width,
                                       float spatial_scale, int sampling_ratio,
                                       int pool_mode, bool aligned);

void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
                                        Tensor argmax_y, Tensor argmax_x,
                                        Tensor grad_input, int aligned_height,
                                        int aligned_width, float spatial_scale,
                                        int sampling_ratio, int pool_mode,
                                        bool aligned);

void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned) {
  ROIAlignForwardCUDAKernelLauncher(
      input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width,
      spatial_scale, sampling_ratio, pool_mode, aligned);
}

void roi_align_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax_y,
                             Tensor argmax_x, Tensor grad_input,
                             int aligned_height, int aligned_width,
                             float spatial_scale, int sampling_ratio,
                             int pool_mode, bool aligned) {
  ROIAlignBackwardCUDAKernelLauncher(
      grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height,
      aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned);
}

void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned);

void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
                             Tensor argmax_x, Tensor grad_input,
                             int aligned_height, int aligned_width,
                             float spatial_scale, int sampling_ratio,
                             int pool_mode, bool aligned);

REGISTER_DEVICE_IMPL(roi_align_forward_impl, CUDA, roi_align_forward_cuda);
REGISTER_DEVICE_IMPL(roi_align_backward_impl, CUDA, roi_align_backward_cuda);

void ROIAlignRotatedForwardCUDAKernelLauncher(
    const at::Tensor features, const at::Tensor rois, const float spatial_scale,
    const int sample_num, const bool aligned, const bool clockwise,
    const int channels, const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, at::Tensor output);

void ROIAlignRotatedBackwardCUDAKernelLauncher(
    const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale,
    const int sample_num, const bool aligned, const bool clockwise,
    const int channels, const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, at::Tensor bottom_grad);

void roi_align_rotated_forward_cuda(Tensor features, Tensor rois, Tensor output,
                                    int aligned_height, int aligned_width,
                                    float spatial_scale, int sample_ratio,
                                    bool aligned, bool clockwise) {
  // Number of ROIs
  int num_rois = rois.size(0);
  int size_rois = rois.size(1);

  if (size_rois != 6) {
    AT_ERROR("wrong roi size");
  }

  int num_channels = features.size(1);
  int data_height = features.size(2);
  int data_width = features.size(3);
  ROIAlignRotatedForwardCUDAKernelLauncher(
      features, rois, spatial_scale, sample_ratio, aligned, clockwise,
      num_channels, data_height, data_width, num_rois, aligned_height,
      aligned_width, output);
}

void roi_align_rotated_backward_cuda(Tensor top_grad, Tensor rois,
                                     Tensor bottom_grad, int aligned_height,
                                     int aligned_width, float spatial_scale,
                                     int sample_ratio, bool aligned,
                                     bool clockwise) {
  // Number of ROIs
  int num_rois = rois.size(0);
  int size_rois = rois.size(1);
  if (size_rois != 6) {
    AT_ERROR("wrong roi size");
  }

  int num_channels = bottom_grad.size(1);
  int data_height = bottom_grad.size(2);
  int data_width = bottom_grad.size(3);
  ROIAlignRotatedBackwardCUDAKernelLauncher(
      top_grad, rois, spatial_scale, sample_ratio, aligned, clockwise,
      num_channels, data_height, data_width, num_rois, aligned_height,
      aligned_width, bottom_grad);
}

void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output,
                                    int aligned_height, int aligned_width,
                                    float spatial_scale, int sample_ratio,
                                    bool aligned, bool clockwise);

void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
                                     Tensor bottom_grad, int aligned_height,
                                     int aligned_width, float spatial_scale,
                                     int sample_ratio, bool aligned,
                                     bool clockwise);
REGISTER_DEVICE_IMPL(roi_align_rotated_forward_impl, CUDA,
                     roi_align_rotated_forward_cuda);
REGISTER_DEVICE_IMPL(roi_align_rotated_backward_impl, CUDA,
                     roi_align_rotated_backward_cuda);

void RiROIAlignRotatedForwardCUDAKernelLauncher(
    const at::Tensor features, const at::Tensor rois, const float spatial_scale,
    const int num_samples, const bool clockwise, const int channels,
    const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, const int num_orientations,
    at::Tensor output);

void RiROIAlignRotatedBackwardCUDAKernelLauncher(
    const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale,
    const int num_samples, const bool clockwise, const int channels,
    const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, const int num_orientations,
    at::Tensor bottom_grad);

void riroi_align_rotated_forward_cuda(Tensor features, Tensor rois,
                                      Tensor output, int pooled_height,
                                      int pooled_width, float spatial_scale,
                                      int num_samples, int num_orientations,
                                      bool clockwise) {
  // Number of ROIs
  int num_rois = rois.size(0);
  int size_rois = rois.size(1);
  if (size_rois != 6) {
    AT_ERROR("wrong roi size");
  }
  CHECK_CONTIGUOUS(features);
  CHECK_CONTIGUOUS(rois);
  int num_channels = features.size(1) / num_orientations;
  int data_height = features.size(2);
  int data_width = features.size(3);
  RiROIAlignRotatedForwardCUDAKernelLauncher(
      features, rois, spatial_scale, num_samples, clockwise, num_channels,
      data_height, data_width, num_rois, pooled_height, pooled_width,
      num_orientations, output);
}

void riroi_align_rotated_backward_cuda(Tensor top_grad, Tensor rois,
                                       Tensor bottom_grad, int pooled_height,
                                       int pooled_width, float spatial_scale,
                                       int num_samples, int num_orientations,
                                       bool clockwise) {
  // Number of ROIs
  int num_rois = rois.size(0);
  int size_rois = rois.size(1);
  if (size_rois != 6) {
    AT_ERROR("wrong roi size");
  }
  CHECK_CONTIGUOUS(top_grad);
  CHECK_CONTIGUOUS(rois);
  int num_channels = bottom_grad.size(1) / num_orientations;
  int data_height = bottom_grad.size(2);
  int data_width = bottom_grad.size(3);
  RiROIAlignRotatedBackwardCUDAKernelLauncher(
      top_grad, rois, spatial_scale, num_samples, clockwise, num_channels,
      data_height, data_width, num_rois, pooled_height, pooled_width,
      num_orientations, bottom_grad);
}

void riroi_align_rotated_forward_impl(Tensor features, Tensor rois,
                                      Tensor output, int pooled_height,
                                      int pooled_width, float spatial_scale,
                                      int num_samples, int num_orientations,
                                      bool clockwise);

void riroi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
                                       Tensor bottom_grad, int pooled_height,
                                       int pooled_width, float spatial_scale,
                                       int num_samples, int num_orientations,
                                       bool clockwise);

REGISTER_DEVICE_IMPL(riroi_align_rotated_forward_impl, CUDA,
                     riroi_align_rotated_forward_cuda);
REGISTER_DEVICE_IMPL(riroi_align_rotated_backward_impl, CUDA,
                     riroi_align_rotated_backward_cuda);

void RoiawarePool3dForwardCUDAKernelLauncher(
    int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
    int out_y, int out_z, const Tensor rois, const Tensor pts,
    const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels,
    Tensor pooled_features, int pool_method);

void RoiawarePool3dBackwardCUDAKernelLauncher(
    int boxes_num, int out_x, int out_y, int out_z, int channels,
    int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax,
    const Tensor grad_out, Tensor grad_in, int pool_method);

void roiaware_pool3d_forward_cuda(int boxes_num, int pts_num, int channels,
                                  int max_pts_each_voxel, int out_x, int out_y,
                                  int out_z, const Tensor rois,
                                  const Tensor pts, const Tensor pts_feature,
                                  Tensor argmax, Tensor pts_idx_of_voxels,
                                  Tensor pooled_features, int pool_method) {
  RoiawarePool3dForwardCUDAKernelLauncher(
      boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
      rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features,
      pool_method);
};

void roiaware_pool3d_backward_cuda(int boxes_num, int out_x, int out_y,
                                   int out_z, int channels,
                                   int max_pts_each_voxel,
                                   const Tensor pts_idx_of_voxels,
                                   const Tensor argmax, const Tensor grad_out,
                                   Tensor grad_in, int pool_method) {
  RoiawarePool3dBackwardCUDAKernelLauncher(
      boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel,
      pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method);
};

void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels,
                                  int max_pts_each_voxel, int out_x, int out_y,
                                  int out_z, const Tensor rois,
                                  const Tensor pts, const Tensor pts_feature,
                                  Tensor argmax, Tensor pts_idx_of_voxels,
                                  Tensor pooled_features, int pool_method);

void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y,
                                   int out_z, int channels,
                                   int max_pts_each_voxel,
                                   const Tensor pts_idx_of_voxels,
                                   const Tensor argmax, const Tensor grad_out,
                                   Tensor grad_in, int pool_method);

REGISTER_DEVICE_IMPL(roiaware_pool3d_forward_impl, CUDA,
                     roiaware_pool3d_forward_cuda);
REGISTER_DEVICE_IMPL(roiaware_pool3d_backward_impl, CUDA,
                     roiaware_pool3d_backward_cuda);

void RoIPointPool3dForwardCUDAKernelLauncher(
    int batch_size, int pts_num, int boxes_num, int feature_in_len,
    int sampled_pts_num, const Tensor xyz, const Tensor boxes3d,
    const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag);

void roipoint_pool3d_forward_cuda(int batch_size, int pts_num, int boxes_num,
                                  int feature_in_len, int sampled_pts_num,
                                  const Tensor xyz, const Tensor boxes3d,
                                  const Tensor pts_feature,
                                  Tensor pooled_features,
                                  Tensor pooled_empty_flag) {
  RoIPointPool3dForwardCUDAKernelLauncher(
      batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz,
      boxes3d, pts_feature, pooled_features, pooled_empty_flag);
};

void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num,
                                  int feature_in_len, int sampled_pts_num,
                                  const Tensor xyz, const Tensor boxes3d,
                                  const Tensor pts_feature,
                                  Tensor pooled_features,
                                  Tensor pooled_empty_flag);
REGISTER_DEVICE_IMPL(roipoint_pool3d_forward_impl, CUDA,
                     roipoint_pool3d_forward_cuda);

void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
                                      Tensor argmax, int pooled_height,
                                      int pooled_width, float spatial_scale);

void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
                                       Tensor argmax, Tensor grad_input,
                                       int pooled_height, int pooled_width,
                                       float spatial_scale);

void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output,
                           Tensor argmax, int pooled_height, int pooled_width,
                           float spatial_scale) {
  ROIPoolForwardCUDAKernelLauncher(input, rois, output, argmax, pooled_height,
                                   pooled_width, spatial_scale);
}

void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax,
                            Tensor grad_input, int pooled_height,
                            int pooled_width, float spatial_scale) {
  ROIPoolBackwardCUDAKernelLauncher(grad_output, rois, argmax, grad_input,
                                    pooled_height, pooled_width, spatial_scale);
}

void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
                           Tensor argmax, int pooled_height, int pooled_width,
                           float spatial_scale);
void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax,
                            Tensor grad_input, int pooled_height,
                            int pooled_width, float spatial_scale);
REGISTER_DEVICE_IMPL(roi_pool_forward_impl, CUDA, roi_pool_forward_cuda);
REGISTER_DEVICE_IMPL(roi_pool_backward_impl, CUDA, roi_pool_backward_cuda);

typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;

std::vector<at::Tensor> DynamicPointToVoxelForwardCUDAKernelLauncher(
    const at::Tensor& feats, const at::Tensor& coors,
    const reduce_t reduce_type);

void DynamicPointToVoxelBackwardCUDAKernelLauncher(
    at::Tensor& grad_feats, const at::Tensor& grad_reduced_feats,
    const at::Tensor& feats, const at::Tensor& reduced_feats,
    const at::Tensor& coors_map, const at::Tensor& reduce_count,
    const reduce_t reduce_type);

std::vector<torch::Tensor> dynamic_point_to_voxel_forward_cuda(
    const torch::Tensor& feats, const torch::Tensor& coors,
    const reduce_t reduce_type) {
  return DynamicPointToVoxelForwardCUDAKernelLauncher(feats, coors,
                                                      reduce_type);
};

void dynamic_point_to_voxel_backward_cuda(
    torch::Tensor& grad_feats, const torch::Tensor& grad_reduced_feats,
    const torch::Tensor& feats, const torch::Tensor& reduced_feats,
    const torch::Tensor& coors_idx, const torch::Tensor& reduce_count,
    const reduce_t reduce_type) {
  DynamicPointToVoxelBackwardCUDAKernelLauncher(grad_feats, grad_reduced_feats,
                                                feats, reduced_feats, coors_idx,
                                                reduce_count, reduce_type);
};

std::vector<torch::Tensor> dynamic_point_to_voxel_forward_impl(
    const torch::Tensor& feats, const torch::Tensor& coors,
    const reduce_t reduce_type);

void dynamic_point_to_voxel_backward_impl(
    torch::Tensor& grad_feats, const torch::Tensor& grad_reduced_feats,
    const torch::Tensor& feats, const torch::Tensor& reduced_feats,
    const torch::Tensor& coors_idx, const torch::Tensor& reduce_count,
    const reduce_t reduce_type);

REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_forward_impl, CUDA,
                     dynamic_point_to_voxel_forward_cuda);
REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_backward_impl, CUDA,
                     dynamic_point_to_voxel_backward_cuda);

void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean);

void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean,
                                        Tensor var);

void SyncBNForwardOutputCUDAKernelLauncher(
    const Tensor input, const Tensor mean, const Tensor var,
    Tensor running_mean, Tensor running_var, const Tensor weight,
    const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps,
    float momentum, int group_size);

void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output,
                                           const Tensor norm,
                                           Tensor grad_weight,
                                           Tensor grad_bias);

void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output,
                                          const Tensor weight,
                                          const Tensor grad_weight,
                                          const Tensor grad_bias,
                                          const Tensor norm, const Tensor std,
                                          Tensor grad_input);

void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean) {
  SyncBNForwardMeanCUDAKernelLauncher(input, mean);
}

void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean,
                              Tensor var) {
  SyncBNForwardVarCUDAKernelLauncher(input, mean, var);
}

void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean,
                                 const Tensor var, Tensor running_mean,
                                 Tensor running_var, const Tensor weight,
                                 const Tensor bias, Tensor norm, Tensor std,
                                 Tensor output, float eps, float momentum,
                                 int group_size) {
  SyncBNForwardOutputCUDAKernelLauncher(input, mean, var, running_mean,
                                        running_var, weight, bias, norm, std,
                                        output, eps, momentum, group_size);
}

void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm,
                                 Tensor grad_weight, Tensor grad_bias) {
  SyncBNBackwardParamCUDAKernelLauncher(grad_output, norm, grad_weight,
                                        grad_bias);
}

void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight,
                                const Tensor grad_weight,
                                const Tensor grad_bias, const Tensor norm,
                                const Tensor std, Tensor grad_input) {
  SyncBNBackwardDataCUDAKernelLauncher(grad_output, weight, grad_weight,
                                       grad_bias, norm, std, grad_input);
}

void sync_bn_forward_mean_impl(const Tensor input, Tensor mean);

void sync_bn_forward_var_impl(const Tensor input, const Tensor mean,
                              Tensor var);

void sync_bn_forward_output_impl(const Tensor input, const Tensor mean,
                                 const Tensor var, Tensor running_mean,
                                 Tensor running_var, const Tensor weight,
                                 const Tensor bias, Tensor norm, Tensor std,
                                 Tensor output, float eps, float momentum,
                                 int group_size);

void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm,
                                 Tensor grad_weight, Tensor grad_bias);

void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight,
                                const Tensor grad_weight,
                                const Tensor grad_bias, const Tensor norm,
                                const Tensor std, Tensor grad_input);

REGISTER_DEVICE_IMPL(sync_bn_forward_mean_impl, CUDA,
                     sync_bn_forward_mean_cuda);
REGISTER_DEVICE_IMPL(sync_bn_forward_var_impl, CUDA, sync_bn_forward_var_cuda);
REGISTER_DEVICE_IMPL(sync_bn_forward_output_impl, CUDA,
                     sync_bn_forward_output_cuda);
REGISTER_DEVICE_IMPL(sync_bn_backward_param_impl, CUDA,
                     sync_bn_backward_param_cuda);
REGISTER_DEVICE_IMPL(sync_bn_backward_data_impl, CUDA,
                     sync_bn_backward_data_cuda);

void ThreeInterpolateForwardCUDAKernelLauncher(int b, int c, int m, int n,
                                               const Tensor points,
                                               const Tensor idx,
                                               const Tensor weight, Tensor out);

void ThreeInterpolateBackwardCUDAKernelLauncher(int b, int c, int n, int m,
                                                const Tensor grad_out,
                                                const Tensor idx,
                                                const Tensor weight,
                                                Tensor grad_points);

void three_interpolate_forward_cuda(int b, int c, int m, int n,
                                    const Tensor points, const Tensor idx,
                                    const Tensor weight, Tensor out) {
  ThreeInterpolateForwardCUDAKernelLauncher(b, c, m, n, points, idx, weight,
                                            out);
};

void three_interpolate_backward_cuda(int b, int c, int n, int m,
                                     const Tensor grad_out, const Tensor idx,
                                     const Tensor weight, Tensor grad_points) {
  ThreeInterpolateBackwardCUDAKernelLauncher(b, c, n, m, grad_out, idx, weight,
                                             grad_points);
};

void three_interpolate_forward_impl(int b, int c, int m, int n,
                                    const Tensor points, const Tensor idx,
                                    const Tensor weight, Tensor out);

void three_interpolate_backward_impl(int b, int c, int n, int m,
                                     const Tensor grad_out, const Tensor idx,
                                     const Tensor weight, Tensor grad_points);
REGISTER_DEVICE_IMPL(three_interpolate_forward_impl, CUDA,
                     three_interpolate_forward_cuda);
REGISTER_DEVICE_IMPL(three_interpolate_backward_impl, CUDA,
                     three_interpolate_backward_cuda);

void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown,
                                      const Tensor known, Tensor dist2,
                                      Tensor idx);

void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown,
                           const Tensor known, Tensor dist2, Tensor idx) {
  ThreeNNForwardCUDAKernelLauncher(b, n, m, unknown, known, dist2, idx);
};

void three_nn_forward_impl(int b, int n, int m, const Tensor unknown,
                           const Tensor known, Tensor dist2, Tensor idx);
REGISTER_DEVICE_IMPL(three_nn_forward_impl, CUDA, three_nn_forward_cuda);

void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift,
                                       Tensor output);

void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift,
                                        Tensor grad_input);

void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output) {
  TINShiftForwardCUDAKernelLauncher(input, shift, output);
}

void tin_shift_backward_cuda(Tensor grad_output, Tensor shift,
                             Tensor grad_input) {
  TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input);
}

void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output);
void tin_shift_backward_impl(Tensor grad_output, Tensor shift,
                             Tensor grad_input);
REGISTER_DEVICE_IMPL(tin_shift_forward_impl, CUDA, tin_shift_forward_cuda);
REGISTER_DEVICE_IMPL(tin_shift_backward_impl, CUDA, tin_shift_backward_cuda);

torch::Tensor upfirdn2d_op(const torch::Tensor& input,
                           const torch::Tensor& kernel, int up_x, int up_y,
                           int down_x, int down_y, int pad_x0, int pad_x1,
                           int pad_y0, int pad_y1);

torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input,
                                const torch::Tensor& kernel, int up_x, int up_y,
                                int down_x, int down_y, int pad_x0, int pad_x1,
                                int pad_y0, int pad_y1);
REGISTER_DEVICE_IMPL(upfirdn2d_op_impl, CUDA, upfirdn2d_op);

int HardVoxelizeForwardCUDAKernelLauncher(
    const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors,
    at::Tensor& num_points_per_voxel, const std::vector<float> voxel_size,
    const std::vector<float> coors_range, const int max_points,
    const int max_voxels, const int NDim = 3);

void DynamicVoxelizeForwardCUDAKernelLauncher(
    const at::Tensor& points, at::Tensor& coors,
    const std::vector<float> voxel_size, const std::vector<float> coors_range,
    const int NDim = 3);

int hard_voxelize_forward_cuda(const at::Tensor& points, at::Tensor& voxels,
                               at::Tensor& coors,
                               at::Tensor& num_points_per_voxel,
                               const std::vector<float> voxel_size,
                               const std::vector<float> coors_range,
                               const int max_points, const int max_voxels,
                               const int NDim) {
  return HardVoxelizeForwardCUDAKernelLauncher(
      points, voxels, coors, num_points_per_voxel, voxel_size, coors_range,
      max_points, max_voxels, NDim);
};

void dynamic_voxelize_forward_cuda(const at::Tensor& points, at::Tensor& coors,
                                   const std::vector<float> voxel_size,
                                   const std::vector<float> coors_range,
                                   const int NDim) {
  DynamicVoxelizeForwardCUDAKernelLauncher(points, coors, voxel_size,
                                           coors_range, NDim);
};

int hard_voxelize_forward_impl(const at::Tensor& points, at::Tensor& voxels,
                               at::Tensor& coors,
                               at::Tensor& num_points_per_voxel,
                               const std::vector<float> voxel_size,
                               const std::vector<float> coors_range,
                               const int max_points, const int max_voxels,
                               const int NDim);

void dynamic_voxelize_forward_impl(const at::Tensor& points, at::Tensor& coors,
                                   const std::vector<float> voxel_size,
                                   const std::vector<float> coors_range,
                                   const int NDim);

REGISTER_DEVICE_IMPL(hard_voxelize_forward_impl, CUDA,
                     hard_voxelize_forward_cuda);
REGISTER_DEVICE_IMPL(dynamic_voxelize_forward_impl, CUDA,
                     dynamic_voxelize_forward_cuda);

void RotatedFeatureAlignForwardCUDAKernelLauncher(const Tensor features,
                                                  const Tensor best_bboxes,
                                                  const float spatial_scale,
                                                  const int points,
                                                  Tensor output);

void RotatedFeatureAlignBackwardCUDAKernelLauncher(const Tensor top_grad,
                                                   const Tensor best_bboxes,
                                                   const float spatial_scale,
                                                   const int points,
                                                   Tensor bottom_grad);

void rotated_feature_align_forward_cuda(const Tensor features,
                                        const Tensor best_bboxes,
                                        const float spatial_scale,
                                        const int points, Tensor output) {
  RotatedFeatureAlignForwardCUDAKernelLauncher(features, best_bboxes,
                                               spatial_scale, points, output);
};

void rotated_feature_align_backward_cuda(const Tensor top_grad,
                                         const Tensor best_bboxes,
                                         const float spatial_scale,
                                         const int points, Tensor bottom_grad) {
  RotatedFeatureAlignBackwardCUDAKernelLauncher(
      top_grad, best_bboxes, spatial_scale, points, bottom_grad);
};

void rotated_feature_align_forward_impl(const Tensor features,
                                        const Tensor best_bboxes,
                                        const float spatial_scale,
                                        const int points, Tensor output);

void rotated_feature_align_backward_impl(const Tensor top_grad,
                                         const Tensor best_bboxes,
                                         const float spatial_scale,
                                         const int points, Tensor bottom_grad);

REGISTER_DEVICE_IMPL(rotated_feature_align_forward_impl, CUDA,
                     rotated_feature_align_forward_cuda);
REGISTER_DEVICE_IMPL(rotated_feature_align_backward_impl, CUDA,
                     rotated_feature_align_backward_cuda);

void PointsInPolygonsForwardCUDAKernelLauncher(const at::Tensor points,
                                               const at::Tensor polygons,
                                               const int rows, const int cols,
                                               at::Tensor output);

void points_in_polygons_forward_cuda(const Tensor points, const Tensor polygons,
                                     Tensor output, const int rows,
                                     const int cols) {
  PointsInPolygonsForwardCUDAKernelLauncher(points, polygons, rows, cols,
                                            output);
};

void points_in_polygons_forward_impl(const Tensor points, const Tensor polygons,
                                     Tensor output, const int rows,
                                     const int cols);

REGISTER_DEVICE_IMPL(points_in_polygons_forward_impl, CUDA,
                     points_in_polygons_forward_cuda);

void MinAreaPolygonsCUDAKernelLauncher(const Tensor pointsets, Tensor polygons);

void min_area_polygons_cuda(const Tensor pointsets, Tensor polygons) {
  MinAreaPolygonsCUDAKernelLauncher(pointsets, polygons);
}

void min_area_polygons_impl(const Tensor pointsets, Tensor polygons);

REGISTER_DEVICE_IMPL(min_area_polygons_impl, CUDA, min_area_polygons_cuda);

void ActiveRotatedFilterForwardCUDAKernelLauncher(const Tensor input,
                                                  const Tensor indices,
                                                  Tensor output);

void ActiveRotatedFilterBackwardCUDAKernelLauncher(const Tensor grad_out,
                                                   const Tensor indices,
                                                   Tensor grad_in);

void active_rotated_filter_forward_cuda(const Tensor input,
                                        const Tensor indices, Tensor output) {
  ActiveRotatedFilterForwardCUDAKernelLauncher(input, indices, output);
};

void active_rotated_filter_backward_cuda(const Tensor grad_out,
                                         const Tensor indices, Tensor grad_in) {
  ActiveRotatedFilterBackwardCUDAKernelLauncher(grad_out, indices, grad_in);
};

void active_rotated_filter_forward_impl(const Tensor input,
                                        const Tensor indices, Tensor output);

void active_rotated_filter_backward_impl(const Tensor grad_out,
                                         const Tensor indices, Tensor grad_in);

REGISTER_DEVICE_IMPL(active_rotated_filter_forward_impl, CUDA,
                     active_rotated_filter_forward_cuda);
REGISTER_DEVICE_IMPL(active_rotated_filter_backward_impl, CUDA,
                     active_rotated_filter_backward_cuda);

void ConvexIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons,
                                 Tensor ious);

void ConvexGIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons,
                                  Tensor output);

void convex_iou_cuda(const Tensor pointsets, const Tensor polygons,
                     Tensor ious) {
  ConvexIoUCUDAKernelLauncher(pointsets, polygons, ious);
}

void convex_giou_cuda(const Tensor pointsets, const Tensor polygons,
                      Tensor output) {
  ConvexGIoUCUDAKernelLauncher(pointsets, polygons, output);
}

void convex_iou_impl(const Tensor pointsets, const Tensor polygons,
                     Tensor ious);

void convex_giou_impl(const Tensor pointsets, const Tensor polygons,
                      Tensor output);

REGISTER_DEVICE_IMPL(convex_iou_impl, CUDA, convex_iou_cuda);
REGISTER_DEVICE_IMPL(convex_giou_impl, CUDA, convex_giou_cuda);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_conv.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void deformable_im2col_impl(Tensor data_im, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor data_col) {
  DISPATCH_DEVICE_IMPL(deformable_im2col_impl, data_im, data_offset, channels,
                       height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h,
                       stride_w, dilation_h, dilation_w, parallel_imgs,
                       deformable_group, data_col);
}

void deformable_col2im_impl(Tensor data_col, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor grad_im) {
  DISPATCH_DEVICE_IMPL(deformable_col2im_impl, data_col, data_offset, channels,
                       height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h,
                       stride_w, dilation_h, dilation_w, parallel_imgs,
                       deformable_group, grad_im);
}

void deformable_col2im_coord_impl(
    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
    const int height, const int width, const int ksize_h, const int ksize_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int parallel_imgs,
    const int deformable_group, Tensor grad_offset) {
  DISPATCH_DEVICE_IMPL(deformable_col2im_coord_impl, data_col, data_im,
                       data_offset, channels, height, width, ksize_h, ksize_w,
                       pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
                       parallel_imgs, deformable_group, grad_offset);
}

void deform_conv_shape_check(at::Tensor input, at::Tensor offset,
                             at::Tensor *gradOutput, at::Tensor weight, int kH,
                             int kW, int dH, int dW, int padH, int padW,
                             int dilationH, int dilationW, int group,
                             int deformable_group) {
  TORCH_CHECK(
      weight.ndimension() == 4,
      "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, but got: %s",
      weight.ndimension());

  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");

  TORCH_CHECK(kW > 0 && kH > 0,
              "kernel size should be greater than zero, but got kH: %d kW: %d",
              kH, kW);

  TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
              "kernel size should be consistent with weight, ",
              "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d",
              kH, kW, weight.size(2), weight.size(3));

  TORCH_CHECK(dW > 0 && dH > 0,
              "stride should be greater than zero, but got dH: %d dW: %d", dH,
              dW);

  TORCH_CHECK(
      dilationW > 0 && dilationH > 0,
      "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
      dilationH, dilationW);

  int ndim = input.ndimension();
  int dimf = 0;
  int dimh = 1;
  int dimw = 2;

  if (ndim == 4) {
    dimf++;
    dimh++;
    dimw++;
  }

  TORCH_CHECK(ndim == 3 || ndim == 4,
              "3D or 4D input tensor expected but got: %s", ndim);

  long nInputPlane = weight.size(1) * group;
  long inputHeight = input.size(dimh);
  long inputWidth = input.size(dimw);
  long nOutputPlane = weight.size(0);
  long outputHeight =
      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
  long outputWidth =
      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;

  TORCH_CHECK(nInputPlane % deformable_group == 0,
              "input channels must divide deformable group size");

  if (outputWidth < 1 || outputHeight < 1)
    AT_ERROR(
        "Given input size: (%ld x %ld x %ld). "
        "Calculated output size: (%ld x %ld x %ld). Output size is too small",
        nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
        outputWidth);

  TORCH_CHECK(input.size(1) == nInputPlane,
              "invalid number of input planes, expected: %d, but got: %d",
              nInputPlane, input.size(1));

  TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
              "input image is smaller than kernel");

  TORCH_CHECK(
      (offset.size(2) == outputHeight && offset.size(3) == outputWidth),
      "invalid spatial size of offset, expected height: %d width: %d, but "
      "got height: %d width: %d",
      outputHeight, outputWidth, offset.size(2), offset.size(3));

  TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
              "invalid number of channels of offset");

  if (gradOutput != NULL) {
    TORCH_CHECK(
        gradOutput->size(dimf) == nOutputPlane,
        "invalid number of gradOutput planes, expected: %d, but got: %d",
        nOutputPlane, gradOutput->size(dimf));

    TORCH_CHECK(
        (gradOutput->size(dimh) == outputHeight &&
         gradOutput->size(dimw) == outputWidth),
        "invalid size of gradOutput, expected height: %d width: %d , but "
        "got height: %d width: %d",
        outputHeight, outputWidth, gradOutput->size(dimh),
        gradOutput->size(dimw));
  }
}

void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
                         Tensor output, Tensor columns, Tensor ones, int kW,
                         int kH, int dW, int dH, int padW, int padH,
                         int dilationW, int dilationH, int group,
                         int deformable_group, int im2col_step) {
  if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
    CHECK_CUDA_INPUT(input);
    CHECK_CUDA_INPUT(offset);
    CHECK_CUDA_INPUT(weight);
    CHECK_CUDA_INPUT(output);
    CHECK_CUDA_INPUT(columns);
    CHECK_CUDA_INPUT(ones);
#else
    AT_ERROR("DeformConv is not compiled with GPU support");
#endif
  } else {
    CHECK_CPU_INPUT(input);
    CHECK_CPU_INPUT(offset);
    CHECK_CPU_INPUT(weight);
    CHECK_CPU_INPUT(output);
    CHECK_CPU_INPUT(columns);
    CHECK_CPU_INPUT(ones);
  }

  deform_conv_shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH,
                          padW, dilationH, dilationW, group, deformable_group);
  at::DeviceGuard guard(input.device());

  int batch = 1;
  if (input.ndimension() == 3) {
    // Force batch
    batch = 0;
    input.unsqueeze_(0);
    offset.unsqueeze_(0);
  }

  // todo: assert batchsize dividable by im2col_step

  long batchSize = input.size(0);
  long nInputPlane = input.size(1);
  long inputHeight = input.size(2);
  long inputWidth = input.size(3);

  long nOutputPlane = weight.size(0);

  long outputWidth =
      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
  long outputHeight =
      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

  output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
                        outputHeight, outputWidth});
  columns = at::zeros(
      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
      input.options());

  if (ones.ndimension() != 2 ||
      ones.size(0) * ones.size(1) < outputHeight * outputWidth) {
    ones = at::ones({outputHeight, outputWidth}, input.options());
  }

  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
                      inputHeight, inputWidth});
  offset =
      offset.view({batchSize / im2col_step, im2col_step,
                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  Tensor output_buffer = at::zeros({batchSize / im2col_step, nOutputPlane,
                                    im2col_step * outputHeight, outputWidth},
                                   output.options());

  output_buffer = output_buffer.view(
      {output_buffer.size(0), group, output_buffer.size(1) / group,
       output_buffer.size(2), output_buffer.size(3)});

  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
    deformable_im2col_impl(input[elt], offset[elt], nInputPlane, inputHeight,
                           inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
                           dilationW, im2col_step, deformable_group, columns);

    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
    weight = weight.view({group, weight.size(0) / group, weight.size(1),
                          weight.size(2), weight.size(3)});

    for (int g = 0; g < group; g++) {
      output_buffer[elt][g] = output_buffer[elt][g]
                                  .flatten(1)
                                  .addmm_(weight[g].flatten(1), columns[g])
                                  .view_as(output_buffer[elt][g]);
    }
    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
                          weight.size(3), weight.size(4)});
  }

  output_buffer = output_buffer.view(
      {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2),
       output_buffer.size(3), output_buffer.size(4)});

  output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane,
                                      im2col_step, outputHeight, outputWidth});
  output_buffer.transpose_(1, 2);
  output.copy_(output_buffer);
  output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});

  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
  offset = offset.view(
      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  if (batch == 0) {
    output = output.view({nOutputPlane, outputHeight, outputWidth});
    input = input.view({nInputPlane, inputHeight, inputWidth});
    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
  }
}

void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
                                Tensor gradInput, Tensor gradOffset,
                                Tensor weight, Tensor columns, int kW, int kH,
                                int dW, int dH, int padW, int padH,
                                int dilationW, int dilationH, int group,
                                int deformable_group, int im2col_step) {
  if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
    CHECK_CUDA_INPUT(input);
    CHECK_CUDA_INPUT(offset);
    CHECK_CUDA_INPUT(gradOutput);
    CHECK_CUDA_INPUT(gradInput);
    CHECK_CUDA_INPUT(gradOffset);
    CHECK_CUDA_INPUT(weight);
    CHECK_CUDA_INPUT(columns);
#else
    AT_ERROR("DeformConv is not compiled with GPU support");
#endif
  } else {
    CHECK_CPU_INPUT(input);
    CHECK_CPU_INPUT(offset);
    CHECK_CPU_INPUT(gradOutput);
    CHECK_CPU_INPUT(gradInput);
    CHECK_CPU_INPUT(gradOffset);
    CHECK_CPU_INPUT(weight);
    CHECK_CPU_INPUT(columns);
  }
  deform_conv_shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW,
                          padH, padW, dilationH, dilationW, group,
                          deformable_group);

  at::DeviceGuard guard(input.device());

  int batch = 1;
  if (input.ndimension() == 3) {
    // Force batch
    batch = 0;
    input = input.view({1, input.size(0), input.size(1), input.size(2)});
    offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)});
    gradOutput = gradOutput.view(
        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
  }

  long batchSize = input.size(0);
  long nInputPlane = input.size(1);
  long inputHeight = input.size(2);
  long inputWidth = input.size(3);

  long nOutputPlane = weight.size(0);

  long outputWidth =
      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
  long outputHeight =
      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

  TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
  columns = at::zeros(
      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
      input.options());

  // change order of grad output
  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
                                nOutputPlane, outputHeight, outputWidth});
  gradOutput.transpose_(1, 2);

  gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane,
                              inputHeight, inputWidth});
  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
                      inputHeight, inputWidth});
  gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step,
                                deformable_group * 2 * kH * kW, outputHeight,
                                outputWidth});
  offset =
      offset.view({batchSize / im2col_step, im2col_step,
                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
    // divide into groups
    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
    weight = weight.view({group, weight.size(0) / group, weight.size(1),
                          weight.size(2), weight.size(3)});
    gradOutput = gradOutput.view(
        {gradOutput.size(0), group, gradOutput.size(1) / group,
         gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)});

    for (int g = 0; g < group; g++) {
      columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
                                     gradOutput[elt][g].flatten(1), 0.0f, 1.0f);
    }

    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
    gradOutput = gradOutput.view(
        {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2),
         gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)});

    deformable_col2im_coord_impl(columns, input[elt], offset[elt], nInputPlane,
                                 inputHeight, inputWidth, kH, kW, padH, padW,
                                 dH, dW, dilationH, dilationW, im2col_step,
                                 deformable_group, gradOffset[elt]);

    deformable_col2im_impl(columns, offset[elt], nInputPlane, inputHeight,
                           inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
                           dilationW, im2col_step, deformable_group,
                           gradInput[elt]);

    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
                          weight.size(3), weight.size(4)});
  }

  gradOutput.transpose_(1, 2);
  gradOutput =
      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});

  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
  gradOffset = gradOffset.view(
      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
  offset = offset.view(
      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  if (batch == 0) {
    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
    input = input.view({nInputPlane, inputHeight, inputWidth});
    gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});
    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
    gradOffset =
        gradOffset.view({offset.size(1), offset.size(2), offset.size(3)});
  }
}

void deform_conv_backward_parameters(Tensor input, Tensor offset,
                                     Tensor gradOutput, Tensor gradWeight,
                                     Tensor columns, Tensor ones, int kW,
                                     int kH, int dW, int dH, int padW, int padH,
                                     int dilationW, int dilationH, int group,
                                     int deformable_group, float scale,
                                     int im2col_step) {
  if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
    CHECK_CUDA_INPUT(input);
    CHECK_CUDA_INPUT(offset);
    CHECK_CUDA_INPUT(gradOutput);
    CHECK_CUDA_INPUT(gradWeight);
    CHECK_CUDA_INPUT(columns);
    CHECK_CUDA_INPUT(ones);
#else
    AT_ERROR("DeformConv is not compiled with GPU support");
#endif
  } else {
    CHECK_CPU_INPUT(input);
    CHECK_CPU_INPUT(offset);
    CHECK_CPU_INPUT(gradOutput);
    CHECK_CPU_INPUT(gradWeight);
    CHECK_CPU_INPUT(columns);
    CHECK_CPU_INPUT(ones);
  }

  deform_conv_shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH,
                          dW, padH, padW, dilationH, dilationW, group,
                          deformable_group);
  at::DeviceGuard guard(input.device());

  int batch = 1;

  if (input.ndimension() == 3) {
    // Force batch
    batch = 0;
    input = input.view(
        at::IntList({1, input.size(0), input.size(1), input.size(2)}));
    gradOutput = gradOutput.view(
        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
  }

  long batchSize = input.size(0);
  long nInputPlane = input.size(1);
  long inputHeight = input.size(2);
  long inputWidth = input.size(3);

  long nOutputPlane = gradWeight.size(0);

  long outputWidth =
      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
  long outputHeight =
      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

  columns = at::zeros(
      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
      input.options());

  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
                                nOutputPlane, outputHeight, outputWidth});
  gradOutput.transpose_(1, 2);

  Tensor gradOutputBuffer = at::zeros_like(gradOutput);
  gradOutputBuffer =
      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step,
                             outputHeight, outputWidth});
  gradOutputBuffer = gradOutputBuffer.contiguous();
  gradOutputBuffer.copy_(gradOutput);
  gradOutputBuffer =
      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane,
                             im2col_step * outputHeight, outputWidth});

  gradOutput.transpose_(1, 2);
  gradOutput =
      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});

  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
                      inputHeight, inputWidth});
  offset =
      offset.view({batchSize / im2col_step, im2col_step,
                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
    deformable_im2col_impl(input[elt], offset[elt], nInputPlane, inputHeight,
                           inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
                           dilationW, im2col_step, deformable_group, columns);

    // divide into group
    gradOutputBuffer = gradOutputBuffer.view(
        {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group,
         gradOutputBuffer.size(2), gradOutputBuffer.size(3)});
    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
    gradWeight =
        gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1),
                         gradWeight.size(2), gradWeight.size(3)});

    for (int g = 0; g < group; g++) {
      gradWeight[g] = gradWeight[g]
                          .flatten(1)
                          .addmm_(gradOutputBuffer[elt][g].flatten(1),
                                  columns[g].transpose(1, 0), 1.0, scale)
                          .view_as(gradWeight[g]);
    }
    gradOutputBuffer = gradOutputBuffer.view(
        {gradOutputBuffer.size(0),
         gradOutputBuffer.size(1) * gradOutputBuffer.size(2),
         gradOutputBuffer.size(3), gradOutputBuffer.size(4)});
    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
    gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1),
                                  gradWeight.size(2), gradWeight.size(3),
                                  gradWeight.size(4)});
  }

  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
  offset = offset.view(
      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  if (batch == 0) {
    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
    input = input.view({nInputPlane, inputHeight, inputWidth});
  }
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_conv_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "deform_conv_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void deform_conv_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                      const OperatorBase::in_list_t& ins,
                                      OperatorBase::out_list_t& outs) {
  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
      im2col_step;
  SSAttrs(attr)
      .get<int>("kW", kW)
      .get<int>("kH", kH)
      .get<int>("dW", dW)
      .get<int>("dH", dH)
      .get<int>("padW", padW)
      .get<int>("padH", padH)
      .get<int>("dilationW", dilationW)
      .get<int>("dilationH", dilationH)
      .get<int>("group", group)
      .get<int>("deformable_group", deformable_group)
      .get<int>("im2col_step", im2col_step)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& weight = buildATensor(ctx, ins[1]);
  const auto& offset = buildATensor(ctx, ins[2]);

  auto output = buildATensor(ctx, outs[0]);
  auto columns = buildATensor(ctx, outs[1]);
  auto ones = buildATensor(ctx, outs[2]);

  deform_conv_forward(input, weight, offset, output, columns, ones, kW, kH, dW,
                      dH, padW, padH, dilationW, dilationH, group,
                      deformable_group, im2col_step);
}

void deform_conv_backward_input_cuda_parrots(CudaContext& ctx,
                                             const SSElement& attr,
                                             const OperatorBase::in_list_t& ins,
                                             OperatorBase::out_list_t& outs) {
  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
      im2col_step;
  SSAttrs(attr)
      .get<int>("kW", kW)
      .get<int>("kH", kH)
      .get<int>("dW", dW)
      .get<int>("dH", dH)
      .get<int>("padW", padW)
      .get<int>("padH", padH)
      .get<int>("dilationW", dilationW)
      .get<int>("dilationH", dilationH)
      .get<int>("group", group)
      .get<int>("deformable_group", deformable_group)
      .get<int>("im2col_step", im2col_step)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& offset = buildATensor(ctx, ins[1]);
  const auto& gradOutput = buildATensor(ctx, ins[2]);

  auto gradInput = buildATensor(ctx, outs[0]);
  auto gradOffset = buildATensor(ctx, outs[1]);
  auto weight = buildATensor(ctx, outs[2]);
  auto columns = buildATensor(ctx, outs[3]);

  deform_conv_backward_input(input, offset, gradOutput, gradInput, gradOffset,
                             weight, columns, kW, kH, dW, dH, padW, padH,
                             dilationW, dilationH, group, deformable_group,
                             im2col_step);
}

void deform_conv_backward_parameters_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
      im2col_step;
  float scale;
  SSAttrs(attr)
      .get<int>("kW", kW)
      .get<int>("kH", kH)
      .get<int>("dW", dW)
      .get<int>("dH", dH)
      .get<int>("padW", padW)
      .get<int>("padH", padH)
      .get<int>("dilationW", dilationW)
      .get<int>("dilationH", dilationH)
      .get<int>("group", group)
      .get<int>("deformable_group", deformable_group)
      .get<float>("scale", scale)
      .get<int>("im2col_step", im2col_step)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& offset = buildATensor(ctx, ins[1]);
  const auto& gradOutput = buildATensor(ctx, ins[2]);

  auto gradWeight = buildATensor(ctx, outs[0]);
  auto columns = buildATensor(ctx, outs[1]);
  auto ones = buildATensor(ctx, outs[2]);
  deform_conv_backward_parameters(input, offset, gradOutput, gradWeight,
                                  columns, ones, kW, kH, dW, dH, padW, padH,
                                  dilationW, dilationH, group, deformable_group,
                                  scale, im2col_step);
}
#endif

void deform_conv_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
                                     const OperatorBase::in_list_t& ins,
                                     OperatorBase::out_list_t& outs) {
  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
      im2col_step;
  SSAttrs(attr)
      .get<int>("kW", kW)
      .get<int>("kH", kH)
      .get<int>("dW", dW)
      .get<int>("dH", dH)
      .get<int>("padW", padW)
      .get<int>("padH", padH)
      .get<int>("dilationW", dilationW)
      .get<int>("dilationH", dilationH)
      .get<int>("group", group)
      .get<int>("deformable_group", deformable_group)
      .get<int>("im2col_step", im2col_step)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& weight = buildATensor(ctx, ins[1]);
  const auto& offset = buildATensor(ctx, ins[2]);

  auto output = buildATensor(ctx, outs[0]);
  auto columns = buildATensor(ctx, outs[1]);
  auto ones = buildATensor(ctx, outs[2]);

  deform_conv_forward(input, weight, offset, output, columns, ones, kW, kH, dW,
                      dH, padW, padH, dilationW, dilationH, group,
                      deformable_group, im2col_step);
}

void deform_conv_backward_input_cpu_parrots(HostContext& ctx,
                                            const SSElement& attr,
                                            const OperatorBase::in_list_t& ins,
                                            OperatorBase::out_list_t& outs) {
  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
      im2col_step;
  SSAttrs(attr)
      .get<int>("kW", kW)
      .get<int>("kH", kH)
      .get<int>("dW", dW)
      .get<int>("dH", dH)
      .get<int>("padW", padW)
      .get<int>("padH", padH)
      .get<int>("dilationW", dilationW)
      .get<int>("dilationH", dilationH)
      .get<int>("group", group)
      .get<int>("deformable_group", deformable_group)
      .get<int>("im2col_step", im2col_step)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& offset = buildATensor(ctx, ins[1]);
  const auto& gradOutput = buildATensor(ctx, ins[2]);

  auto gradInput = buildATensor(ctx, outs[0]);
  auto gradOffset = buildATensor(ctx, outs[1]);
  auto weight = buildATensor(ctx, outs[2]);
  auto columns = buildATensor(ctx, outs[3]);

  deform_conv_backward_input(input, offset, gradOutput, gradInput, gradOffset,
                             weight, columns, kW, kH, dW, dH, padW, padH,
                             dilationW, dilationH, group, deformable_group,
                             im2col_step);
}

void deform_conv_backward_parameters_cpu_parrots(
    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
      im2col_step;
  float scale;
  SSAttrs(attr)
      .get<int>("kW", kW)
      .get<int>("kH", kH)
      .get<int>("dW", dW)
      .get<int>("dH", dH)
      .get<int>("padW", padW)
      .get<int>("padH", padH)
      .get<int>("dilationW", dilationW)
      .get<int>("dilationH", dilationH)
      .get<int>("group", group)
      .get<int>("deformable_group", deformable_group)
      .get<float>("scale", scale)
      .get<int>("im2col_step", im2col_step)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& offset = buildATensor(ctx, ins[1]);
  const auto& gradOutput = buildATensor(ctx, ins[2]);

  auto gradWeight = buildATensor(ctx, outs[0]);
  auto columns = buildATensor(ctx, outs[1]);
  auto ones = buildATensor(ctx, outs[2]);
  deform_conv_backward_parameters(input, offset, gradOutput, gradWeight,
                                  columns, ones, kW, kH, dW, dH, padW, padH,
                                  dilationW, dilationH, group, deformable_group,
                                  scale, im2col_step);
}

PARROTS_EXTENSION_REGISTER(deform_conv_forward)
    .attr("kW")
    .attr("kH")
    .attr("dW")
    .attr("dH")
    .attr("padW")
    .attr("padH")
    .attr("dilationW")
    .attr("dilationH")
    .attr("group")
    .attr("deformable_group")
    .attr("im2col_step")
    .input(3)
    .output(3)
    .apply(deform_conv_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(deform_conv_forward_cuda_parrots)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(deform_conv_backward_input)
    .attr("kW")
    .attr("kH")
    .attr("dW")
    .attr("dH")
    .attr("padW")
    .attr("padH")
    .attr("dilationW")
    .attr("dilationH")
    .attr("group")
    .attr("deformable_group")
    .attr("im2col_step")
    .input(3)
    .output(4)
    .apply(deform_conv_backward_input_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(deform_conv_backward_input_cuda_parrots)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(deform_conv_backward_parameters)
    .attr("kW")
    .attr("kH")
    .attr("dW")
    .attr("dH")
    .attr("padW")
    .attr("padH")
    .attr("dilationW")
    .attr("dilationH")
    .attr("group")
    .attr("deformable_group")
    .attr("scale")
    .attr("im2col_step")
    .input(3)
    .output(3)
    .apply(deform_conv_backward_parameters_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(deform_conv_backward_parameters_cuda_parrots)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_conv_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef DEFORM_CONV_PYTORCH_H
#define DEFORM_CONV_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
                         Tensor output, Tensor columns, Tensor ones, int kW,
                         int kH, int dW, int dH, int padW, int padH,
                         int dilationW, int dilationH, int group,
                         int deformable_group, int im2col_step);

void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
                                Tensor gradInput, Tensor gradOffset,
                                Tensor weight, Tensor columns, int kW, int kH,
                                int dW, int dH, int padW, int padH,
                                int dilationW, int dilationH, int group,
                                int deformable_group, int im2col_step);

void deform_conv_backward_parameters(Tensor input, Tensor offset,
                                     Tensor gradOutput, Tensor gradWeight,
                                     Tensor columns, Tensor ones, int kW,
                                     int kH, int dW, int dH, int padW, int padH,
                                     int dilationW, int dilationH, int group,
                                     int deformable_group, float scale,
                                     int im2col_step);

#endif  // DEFORM_CONV_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void deform_roi_pool_forward_impl(Tensor input, Tensor rois, Tensor offset,
                                  Tensor output, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int sampling_ratio, float gamma) {
  DISPATCH_DEVICE_IMPL(deform_roi_pool_forward_impl, input, rois, offset,
                       output, pooled_height, pooled_width, spatial_scale,
                       sampling_ratio, gamma);
}

void deform_roi_pool_backward_impl(Tensor grad_output, Tensor input,
                                   Tensor rois, Tensor offset,
                                   Tensor grad_input, Tensor grad_offset,
                                   int pooled_height, int pooled_width,
                                   float spatial_scale, int sampling_ratio,
                                   float gamma) {
  DISPATCH_DEVICE_IMPL(deform_roi_pool_backward_impl, grad_output, input, rois,
                       offset, grad_input, grad_offset, pooled_height,
                       pooled_width, spatial_scale, sampling_ratio, gamma);
}

void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset,
                             Tensor output, int pooled_height, int pooled_width,
                             float spatial_scale, int sampling_ratio,
                             float gamma) {
  deform_roi_pool_forward_impl(input, rois, offset, output, pooled_height,
                               pooled_width, spatial_scale, sampling_ratio,
                               gamma);
}

void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
                              Tensor offset, Tensor grad_input,
                              Tensor grad_offset, int pooled_height,
                              int pooled_width, float spatial_scale,
                              int sampling_ratio, float gamma) {
  deform_roi_pool_backward_impl(grad_output, input, rois, offset, grad_input,
                                grad_offset, pooled_height, pooled_width,
                                spatial_scale, sampling_ratio, gamma);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "deform_roi_pool_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
/*void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset,
 *                                  Tensor output, int pooled_height,
 *                                  int pooled_width, float spatial_scale,
 *                                  int sampling_ratio, float gamma);
 */
void deform_roi_pool_forward_cuda_parrots(CudaContext& ctx,
                                          const SSElement& attr,
                                          const OperatorBase::in_list_t& ins,
                                          OperatorBase::out_list_t& outs) {
  int pooled_height;
  int pooled_width;
  float spatial_scale;
  int sampling_ratio;
  float gamma;
  SSAttrs(attr)
      .get<int>("pooled_height", pooled_height)
      .get<int>("pooled_width", pooled_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("sampling_ratio", sampling_ratio)
      .get<float>("gamma", gamma)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  const auto& offset = buildATensor(ctx, ins[2]);

  auto output = buildATensor(ctx, outs[0]);
  deform_roi_pool_forward_cuda(input, rois, offset, output, pooled_height,
                               pooled_width, spatial_scale, sampling_ratio,
                               gamma);
}

/*void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input,
 *                                   Tensor rois, Tensor offset,
 *                                   Tensor grad_input, Tensor grad_offset,
 *                                   int pooled_height, int pooled_width,
 *                                   float spatial_scale, int sampling_ratio,
 *                                   float gamma);
 */
void deform_roi_pool_backward_cuda_parrots(CudaContext& ctx,
                                           const SSElement& attr,
                                           const OperatorBase::in_list_t& ins,
                                           OperatorBase::out_list_t& outs) {
  int pooled_height;
  int pooled_width;
  float spatial_scale;
  int sampling_ratio;
  float gamma;

  SSAttrs(attr)
      .get<int>("pooled_height", pooled_height)
      .get<int>("pooled_width", pooled_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("sampling_ratio", sampling_ratio)
      .get<float>("gamma", gamma)
      .done();

  const auto& grad_output = buildATensor(ctx, ins[0]);
  const auto& input = buildATensor(ctx, ins[1]);
  const auto& rois = buildATensor(ctx, ins[2]);
  const auto& offset = buildATensor(ctx, ins[3]);

  auto grad_input = buildATensor(ctx, outs[0]);
  auto grad_offset = buildATensor(ctx, outs[1]);

  deform_roi_pool_backward_cuda(grad_output, input, rois, offset, grad_input,
                                grad_offset, pooled_height, pooled_width,
                                spatial_scale, sampling_ratio, gamma);
}

PARROTS_EXTENSION_REGISTER(deform_roi_pool_forward)
    .attr("pooled_height")
    .attr("pooled_width")
    .attr("spatial_scale")
    .attr("sampling_ratio")
    .attr("gamma")
    .input(3)
    .output(1)
    .apply(deform_roi_pool_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(deform_roi_pool_backward)
    .attr("pooled_height")
    .attr("pooled_width")
    .attr("spatial_scale")
    .attr("sampling_ratio")
    .attr("gamma")
    .input(4)
    .output(2)
    .apply(deform_roi_pool_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef DEFORM_ROI_POOL_PYTORCH_H
#define DEFORM_ROI_POOL_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset,
                                  Tensor output, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int sampling_ratio, float gamma);

void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input,
                                   Tensor rois, Tensor offset,
                                   Tensor grad_input, Tensor grad_offset,
                                   int pooled_height, int pooled_width,
                                   float spatial_scale, int sampling_ratio,
                                   float gamma);
#endif  // DEFORM_ROI_POOL_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/focal_loss.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void sigmoid_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha) {
  DISPATCH_DEVICE_IMPL(sigmoid_focal_loss_forward_impl, input, target, weight,
                       output, gamma, alpha);
}

void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target,
                                      Tensor weight, Tensor grad_input,
                                      float gamma, float alpha) {
  DISPATCH_DEVICE_IMPL(sigmoid_focal_loss_backward_impl, input, target, weight,
                       grad_input, gamma, alpha);
}

void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha) {
  DISPATCH_DEVICE_IMPL(softmax_focal_loss_forward_impl, input, target, weight,
                       output, gamma, alpha);
}

void softmax_focal_loss_backward_impl(Tensor input, Tensor target,
                                      Tensor weight, Tensor buff,
                                      Tensor grad_input, float gamma,
                                      float alpha) {
  DISPATCH_DEVICE_IMPL(softmax_focal_loss_backward_impl, input, target, weight,
                       buff, grad_input, gamma, alpha);
}

void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
                                Tensor output, float gamma, float alpha) {
  sigmoid_focal_loss_forward_impl(input, target, weight, output, gamma, alpha);
}

void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
                                 Tensor grad_input, float gamma, float alpha) {
  sigmoid_focal_loss_backward_impl(input, target, weight, grad_input, gamma,
                                   alpha);
}

void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
                                Tensor output, float gamma, float alpha) {
  softmax_focal_loss_forward_impl(input, target, weight, output, gamma, alpha);
}

void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
                                 Tensor buff, Tensor grad_input, float gamma,
                                 float alpha) {
  softmax_focal_loss_backward_impl(input, target, weight, buff, grad_input,
                                   gamma, alpha);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/focal_loss_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "focal_loss_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void sigmoid_focal_loss_forward_cuda_parrots(CudaContext& ctx,
                                             const SSElement& attr,
                                             const OperatorBase::in_list_t& ins,
                                             OperatorBase::out_list_t& outs) {
  float gamma;
  float alpha;
  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();

  // get inputs and outputs
  const auto& input = buildATensor(ctx, ins[0]);
  const auto& target = buildATensor(ctx, ins[1]);
  const auto& weight = buildATensor(ctx, ins[2]);

  auto output = buildATensor(ctx, outs[0]);

  sigmoid_focal_loss_forward_cuda(input, target, weight, output, gamma, alpha);
}

void sigmoid_focal_loss_backward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  float gamma;
  float alpha;
  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();

  // get inputs and outputs
  const auto& input = buildATensor(ctx, ins[0]);
  const auto& target = buildATensor(ctx, ins[1]);
  const auto& weight = buildATensor(ctx, ins[2]);

  auto grad_input = buildATensor(ctx, outs[0]);

  sigmoid_focal_loss_backward_cuda(input, target, weight, grad_input, gamma,
                                   alpha);
}

void softmax_focal_loss_forward_cuda_parrots(CudaContext& ctx,
                                             const SSElement& attr,
                                             const OperatorBase::in_list_t& ins,
                                             OperatorBase::out_list_t& outs) {
  float gamma;
  float alpha;
  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();

  // get inputs and outputs
  const auto& input = buildATensor(ctx, ins[0]);
  const auto& target = buildATensor(ctx, ins[1]);
  const auto& weight = buildATensor(ctx, ins[2]);

  auto output = buildATensor(ctx, outs[0]);
  softmax_focal_loss_forward_cuda(input, target, weight, output, gamma, alpha);
}

void softmax_focal_loss_backward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  float gamma;
  float alpha;
  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();

  // get inputs and outputs
  const auto& input = buildATensor(ctx, ins[0]);
  const auto& target = buildATensor(ctx, ins[1]);
  const auto& weight = buildATensor(ctx, ins[2]);

  auto buff = buildATensor(ctx, outs[0]);
  auto grad_input = buildATensor(ctx, outs[1]);
  softmax_focal_loss_backward_cuda(input, target, weight, buff, grad_input,
                                   gamma, alpha);
}

PARROTS_EXTENSION_REGISTER(sigmoid_focal_loss_forward)
    .attr("gamma")
    .attr("alpha")
    .input(3)
    .output(1)
    .apply(sigmoid_focal_loss_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(sigmoid_focal_loss_backward)
    .attr("gamma")
    .attr("alpha")
    .input(3)
    .output(1)
    .apply(sigmoid_focal_loss_backward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(softmax_focal_loss_forward)
    .attr("gamma")
    .attr("alpha")
    .input(3)
    .output(1)
    .apply(softmax_focal_loss_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(softmax_focal_loss_backward)
    .attr("gamma")
    .attr("alpha")
    .input(3)
    .output(2)
    .apply(softmax_focal_loss_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/focal_loss_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef FOCAL_LOSS_PYTORCH_H
#define FOCAL_LOSS_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void sigmoid_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha);

void sigmoid_focal_loss_backward_cuda(Tensor input, Tensor target,
                                      Tensor weight, Tensor grad_input,
                                      float gamma, float alpha);

void softmax_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha);

void softmax_focal_loss_backward_cuda(Tensor input, Tensor target,
                                      Tensor weight, Tensor buff,
                                      Tensor grad_input, float gamma,
                                      float alpha);
#endif  // FOCAL_LOSS_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/furthest_point_sample.cpp
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void furthest_point_sampling_forward_impl(Tensor points_tensor,
                                          Tensor temp_tensor, Tensor idx_tensor,
                                          int b, int n, int m) {
  DISPATCH_DEVICE_IMPL(furthest_point_sampling_forward_impl, points_tensor,
                       temp_tensor, idx_tensor, b, n, m);
}

void furthest_point_sampling_with_dist_forward_impl(Tensor points_tensor,
                                                    Tensor temp_tensor,
                                                    Tensor idx_tensor, int b,
                                                    int n, int m) {
  DISPATCH_DEVICE_IMPL(furthest_point_sampling_with_dist_forward_impl,
                       points_tensor, temp_tensor, idx_tensor, b, n, m);
}

void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
                                     Tensor idx_tensor, int b, int n, int m) {
  furthest_point_sampling_forward_impl(points_tensor, temp_tensor, idx_tensor,
                                       b, n, m);
}

void furthest_point_sampling_with_dist_forward(Tensor points_tensor,
                                               Tensor temp_tensor,
                                               Tensor idx_tensor, int b, int n,
                                               int m) {
  furthest_point_sampling_with_dist_forward_impl(points_tensor, temp_tensor,
                                                 idx_tensor, b, n, m);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/furthest_point_sample_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "furthest_point_sample_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void furthest_point_sample_forward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int b, n, m;
  SSAttrs(attr).get<int>("b", b).get<int>("n", n).get<int>("m", m).done();

  auto points_tensor = buildATensor(ctx, ins[0]);
  auto temp_tensor = buildATensor(ctx, ins[1]);

  auto idx_tensor = buildATensor(ctx, outs[0]);

  furthest_point_sampling_forward(points_tensor, temp_tensor, idx_tensor, b, n,
                                  m);
}

void furthest_point_sampling_with_dist_forward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int b, n, m;
  SSAttrs(attr).get<int>("b", b).get<int>("n", n).get<int>("m", m).done();

  auto points_tensor = buildATensor(ctx, ins[0]);
  auto temp_tensor = buildATensor(ctx, ins[1]);

  auto idx_tensor = buildATensor(ctx, outs[0]);

  furthest_point_sampling_with_dist_forward(points_tensor, temp_tensor,
                                            idx_tensor, b, n, m);
}
PARROTS_EXTENSION_REGISTER(furthest_point_sampling_forward)
    .attr("b")
    .attr("n")
    .attr("m")
    .input(2)
    .output(1)
    .apply(furthest_point_sample_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(furthest_point_sampling_with_dist_forward)
    .attr("b")
    .attr("n")
    .attr("m")
    .input(2)
    .output(1)
    .apply(furthest_point_sampling_with_dist_forward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/furthest_point_sample_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef FURTHEST_POINT_SAMPLE_PYTORCH_H
#define FURTHEST_POINT_SAMPLE_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
                                     Tensor idx_tensor, int b, int n, int m);

void furthest_point_sampling_with_dist_forward(Tensor points_tensor,
                                               Tensor temp_tensor,
                                               Tensor idx_tensor, int b, int n,
                                               int m);
#endif  // FURTHEST_POINT_SAMPLE_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp
================================================
// Modified from
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act.cpp

/*
Copyright (c) 2021, NVIDIA Corporation. All rights reserved.

NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
Augmentation (ADA)
=======================================================================

1. Definitions

"Licensor" means any person or entity that distributes its Work.

"Software" means the original work of authorship made available under
this License.

"Work" means the Software and any additions to or derivative works of
the Software that are made available under this License.

The terms "reproduce," "reproduction," "derivative works," and
"distribution" have the meaning as provided under U.S. copyright law;
provided, however, that for the purposes of this License, derivative
works shall not include works that remain separable from, or merely
link (or bind by name) to the interfaces of, the Work.

Works, including the Software, are "made available" under this License
by including in or with the Work either (a) a copyright notice
referencing the applicability of this License to the Work, or (b) a
copy of this License.

2. License Grants

    2.1 Copyright Grant. Subject to the terms and conditions of this
    License, each Licensor grants to you a perpetual, worldwide,
    non-exclusive, royalty-free, copyright license to reproduce,
    prepare derivative works of, publicly display, publicly perform,
    sublicense and distribute its Work and any resulting derivative
    works in any form.

3. Limitations

    3.1 Redistribution. You may reproduce or distribute the Work only
    if (a) you do so under this License, (b) you include a complete
    copy of this License with your distribution, and (c) you retain
    without modification any copyright, patent, trademark, or
    attribution notices that are present in the Work.

    3.2 Derivative Works. You may specify that additional or different
    terms apply to the use, reproduction, and distribution of your
    derivative works of the Work ("Your Terms") only if (a) Your Terms
    provide that the use limitation in Section 3.3 applies to your
    derivative works, and (b) you identify the specific derivative
    works that are subject to Your Terms. Notwithstanding Your Terms,
    this License (including the redistribution requirements in Section
    3.1) will continue to apply to the Work itself.

    3.3 Use Limitation. The Work and any derivative works thereof only
    may be used or intended for use non-commercially. Notwithstanding
    the foregoing, NVIDIA and its affiliates may use the Work and any
    derivative works commercially. As used herein, "non-commercially"
    means for research or evaluation purposes only.

    3.4 Patent Claims. If you bring or threaten to bring a patent claim
    against any Licensor (including any claim, cross-claim or
    counterclaim in a lawsuit) to enforce any patents that you allege
    are infringed by any Work, then your rights under this License from
    such Licensor (including the grant in Section 2.1) will terminate
    immediately.

    3.5 Trademarks. This License does not grant any rights to use any
    Licensor’s or its affiliates’ names, logos, or trademarks, except
    as necessary to reproduce the notices described in this License.

    3.6 Termination. If you violate any term of this License, then your
    rights under this License (including the grant in Section 2.1) will
    terminate immediately.

4. Disclaimer of Warranty.

THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
THIS LICENSE.

5. Limitation of Liability.

EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
THE POSSIBILITY OF SUCH DAMAGES.

=======================================================================
*/

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

torch::Tensor fused_bias_leakyrelu_op_impl(const torch::Tensor& input,
                                           const torch::Tensor& bias,
                                           const torch::Tensor& refer, int act,
                                           int grad, float alpha, float scale) {
  return DISPATCH_DEVICE_IMPL(fused_bias_leakyrelu_op_impl, input, bias, refer,
                              act, grad, alpha, scale);
}

torch::Tensor fused_bias_leakyrelu(const torch::Tensor& input,
                                   const torch::Tensor& bias,
                                   const torch::Tensor& refer, int act,
                                   int grad, float alpha, float scale) {
  return fused_bias_leakyrelu_op_impl(input, bias, refer, act, grad, alpha,
                                      scale);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/fused_bias_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <torch/extension.h>

#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
using namespace at;
using namespace parrots;

torch::Tensor fused_bias_leakyrelu(const torch::Tensor &input,
                                   const torch::Tensor &bias,
                                   const torch::Tensor &refer, int act,
                                   int grad, float alpha, float scale);

void fused_bias_leakyrelu_parrots(CudaContext &ctx, const SSElement &attr,
                                  const OperatorBase::in_list_t &ins,
                                  OperatorBase::out_list_t &outs) {
  int act, grad;
  float alpha, scale;
  SSAttrs(attr)
      .get<int>("act", act)
      .get<int>("grad", grad)
      .get<float>("alpha", alpha)
      .get<float>("scale", scale)
      .done();
  const auto &input = buildATensor(ctx, ins[0]);
  const auto &bias = buildATensor(ctx, ins[1]);
  const auto &refer = buildATensor(ctx, ins[2]);
  auto out = fused_bias_leakyrelu(input, bias, refer, act, grad, alpha, scale);
  updateDArray(ctx, out, outs[0]);
}

PARROTS_EXTENSION_REGISTER(fused_bias_leakyrelu)
    .attr("act")
    .attr("grad")
    .attr("alpha")
    .attr("scale")
    .input(3)
    .output(1)
    .apply(fused_bias_leakyrelu_parrots)
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/gather_points.cpp
================================================
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void gather_points_forward_impl(int b, int c, int n, int npoints,
                                const Tensor points, const Tensor idx,
                                Tensor out) {
  DISPATCH_DEVICE_IMPL(gather_points_forward_impl, b, c, n, npoints, points,
                       idx, out);
}

void gather_points_backward_impl(int b, int c, int n, int npoints,
                                 const Tensor grad_out, const Tensor idx,
                                 Tensor grad_points) {
  DISPATCH_DEVICE_IMPL(gather_points_backward_impl, b, c, n, npoints, grad_out,
                       idx, grad_points);
}

void gather_points_forward(Tensor points_tensor, Tensor idx_tensor,
                           Tensor out_tensor, int b, int c, int n,
                           int npoints) {
  gather_points_forward_impl(b, c, n, npoints, points_tensor, idx_tensor,
                             out_tensor);
}

void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                            Tensor grad_points_tensor, int b, int c, int n,
                            int npoints) {
  gather_points_backward_impl(b, c, n, npoints, grad_out_tensor, idx_tensor,
                              grad_points_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/gather_points_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "gather_points_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void gather_points_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                        const OperatorBase::in_list_t& ins,
                                        OperatorBase::out_list_t& outs) {
  int b, c, n, npoints;
  SSAttrs(attr)
      .get<int>("b", b)
      .get<int>("c", c)
      .get<int>("n", n)
      .get<int>("npoints", npoints)
      .done();

  auto points_tensor = buildATensor(ctx, ins[0]);
  auto idx_tensor = buildATensor(ctx, ins[1]);

  auto out_tensor = buildATensor(ctx, outs[0]);

  gather_points_forward(points_tensor, idx_tensor, out_tensor, b, c, n,
                        npoints);
}

void gather_points_backward_cuda_parrots(CudaContext& ctx,
                                         const SSElement& attr,
                                         const OperatorBase::in_list_t& ins,
                                         OperatorBase::out_list_t& outs) {
  int b, c, n, npoints;
  SSAttrs(attr)
      .get<int>("b", b)
      .get<int>("c", c)
      .get<int>("n", n)
      .get<int>("npoints", npoints)
      .done();

  auto grad_out_tensor = buildATensor(ctx, ins[0]);
  auto idx_tensor = buildATensor(ctx, ins[1]);

  auto grad_points_tensor = buildATensor(ctx, outs[0]);

  gather_points_backward(grad_out_tensor, idx_tensor, grad_points_tensor, b, c,
                         n, npoints);
}

PARROTS_EXTENSION_REGISTER(gather_points_forward)
    .attr("b")
    .attr("c")
    .attr("n")
    .attr("npoints")
    .input(2)
    .output(1)
    .apply(gather_points_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(gather_points_backward)
    .attr("b")
    .attr("c")
    .attr("n")
    .attr("npoints")
    .input(2)
    .output(1)
    .apply(gather_points_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/gather_points_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef GATHER_POINTS_PYTORCH_H
#define GATHER_POINTS_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void gather_points_forward(Tensor points_tensor, Tensor idx_tensor,
                           Tensor out_tensor, int b, int c, int n, int npoints);

void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                            Tensor grad_points_tensor, int b, int c, int n,
                            int npoints);
#endif  // GATHER_POINTS_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/group_points.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void group_points_forward_impl(int b, int c, int n, int npoints, int nsample,
                               const Tensor points, const Tensor idx,
                               Tensor out) {
  DISPATCH_DEVICE_IMPL(group_points_forward_impl, b, c, n, npoints, nsample,
                       points, idx, out);
}

void group_points_backward_impl(int b, int c, int n, int npoints, int nsample,
                                const Tensor grad_out, const Tensor idx,
                                Tensor grad_points) {
  DISPATCH_DEVICE_IMPL(group_points_backward_impl, b, c, n, npoints, nsample,
                       grad_out, idx, grad_points);
}

void group_points_forward(Tensor points_tensor, Tensor idx_tensor,
                          Tensor out_tensor, int b, int c, int n, int npoints,
                          int nsample) {
  DISPATCH_DEVICE_IMPL(group_points_forward_impl, b, c, n, npoints, nsample,
                       points_tensor, idx_tensor, out_tensor);
}

void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                           Tensor grad_points_tensor, int b, int c, int n,
                           int npoints, int nsample) {
  group_points_backward_impl(b, c, n, npoints, nsample, grad_out_tensor,
                             idx_tensor, grad_points_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/group_points_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "group_points_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void group_points_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                       const OperatorBase::in_list_t& ins,
                                       OperatorBase::out_list_t& outs) {
  int b, c, n, npoints, nsample;
  SSAttrs(attr)
      .get<int>("b", b)
      .get<int>("c", c)
      .get<int>("n", n)
      .get<int>("npoints", npoints)
      .get<int>("nsample", nsample)
      .done();
  auto points_tensor = buildATensor(ctx, ins[0]);
  auto idx_tensor = buildATensor(ctx, ins[1]);

  auto out_tensor = buildATensor(ctx, outs[0]);

  group_points_forward(points_tensor, idx_tensor, out_tensor, b, c, n, npoints,
                       nsample);
}

void group_points_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                        const OperatorBase::in_list_t& ins,
                                        OperatorBase::out_list_t& outs) {
  int b, c, n, npoints, nsample;
  SSAttrs(attr)
      .get<int>("b", b)
      .get<int>("c", c)
      .get<int>("n", n)
      .get<int>("npoints", npoints)
      .get<int>("nsample", nsample)
      .done();
  auto grad_out_tensor = buildATensor(ctx, ins[0]);
  auto idx_tensor = buildATensor(ctx, ins[1]);

  auto grad_points_tensor = buildATensor(ctx, outs[0]);

  group_points_backward(grad_out_tensor, idx_tensor, grad_points_tensor, b, c,
                        n, npoints, nsample);
}

PARROTS_EXTENSION_REGISTER(group_points_forward)
    .attr("b")
    .attr("c")
    .attr("n")
    .attr("npoints")
    .attr("nsample")
    .input(2)
    .output(1)
    .apply(group_points_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(group_points_backward)
    .attr("b")
    .attr("c")
    .attr("n")
    .attr("npoints")
    .attr("nsample")
    .input(2)
    .output(1)
    .apply(group_points_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/group_points_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef GROUP_POINTS_PYTORCH_H
#define GROUP_POINTS_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void group_points_forward(Tensor points_tensor, Tensor idx_tensor,
                          Tensor out_tensor, int b, int c, int n, int npoints,
                          int nsample);

void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                           Tensor grad_points_tensor, int b, int c, int n,
                           int npoints, int nsample);

#endif  // GROUP_POINTS_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/info.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
#include "pytorch_cpp_helper.hpp"

#ifdef MMCV_WITH_CUDA
#ifndef HIP_DIFF
#include <cuda_runtime_api.h>
int get_cudart_version() { return CUDART_VERSION; }
#endif
#endif

std::string get_compiling_cuda_version() {
#ifdef MMCV_WITH_CUDA
#ifndef HIP_DIFF
  std::ostringstream oss;
  // copied from
  // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
  auto printCudaStyleVersion = [&](int v) {
    oss << (v / 1000) << "." << (v / 10 % 100);
    if (v % 10 != 0) {
      oss << "." << (v % 10);
    }
  };
  printCudaStyleVersion(get_cudart_version());
  return oss.str();
#else
  return std::string("rocm not available");
#endif
#else
  return std::string("not available");
#endif
}

// similar to
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
std::string get_compiler_version() {
  std::ostringstream ss;
#if defined(__GNUC__)
#ifndef __clang__
  { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
#endif
#endif

#if defined(__clang_major__)
  {
    ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
       << __clang_patchlevel__;
  }
#endif

#if defined(_MSC_VER)
  { ss << "MSVC " << _MSC_FULL_VER; }
#endif
  return ss.str();
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/iou3d.cpp
================================================
// Modified from
// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp

/*
3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
Written by Shaoshuai Shi
All Rights Reserved 2019-2020.
*/

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;

void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a,
                                          const int num_b, const Tensor boxes_b,
                                          Tensor ans_overlap) {
  DISPATCH_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, num_a, boxes_a,
                       num_b, boxes_b, ans_overlap);
}

void iou3d_boxes_iou_bev_forward_impl(const int num_a, const Tensor boxes_a,
                                      const int num_b, const Tensor boxes_b,
                                      Tensor ans_iou) {
  DISPATCH_DEVICE_IMPL(iou3d_boxes_iou_bev_forward_impl, num_a, boxes_a, num_b,
                       boxes_b, ans_iou);
}

void iou3d_nms_forward_impl(const Tensor boxes, unsigned long long *mask,
                            int boxes_num, float nms_overlap_thresh) {
  DISPATCH_DEVICE_IMPL(iou3d_nms_forward_impl, boxes, mask, boxes_num,
                       nms_overlap_thresh);
}

void iou3d_nms_normal_forward_impl(const Tensor boxes, unsigned long long *mask,
                                   int boxes_num, float nms_overlap_thresh) {
  DISPATCH_DEVICE_IMPL(iou3d_nms_normal_forward_impl, boxes, mask, boxes_num,
                       nms_overlap_thresh);
}

void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
                                     Tensor ans_overlap) {
  // params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
  // params boxes_b: (M, 5)
  // params ans_overlap: (N, M)

  int num_a = boxes_a.size(0);
  int num_b = boxes_b.size(0);

  iou3d_boxes_overlap_bev_forward_impl(num_a, boxes_a, num_b, boxes_b,
                                       ans_overlap);
}

void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
                                 Tensor ans_iou) {
  // params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
  // params boxes_b: (M, 5)
  // params ans_overlap: (N, M)
  int num_a = boxes_a.size(0);
  int num_b = boxes_b.size(0);

  iou3d_boxes_iou_bev_forward_impl(num_a, boxes_a, num_b, boxes_b, ans_iou);
}

void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num,
                       float nms_overlap_thresh) {
  // params boxes: (N, 5) [x1, y1, x2, y2, ry]
  // params keep: (N)
  CHECK_CONTIGUOUS(boxes);
  CHECK_CONTIGUOUS(keep);

  int boxes_num = boxes.size(0);
  int64_t *keep_data = keep.data_ptr<int64_t>();
  int64_t *keep_num_data = keep_num.data_ptr<int64_t>();

  const int col_blocks =
      (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;

  Tensor mask =
      at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong));
  unsigned long long *mask_data =
      (unsigned long long *)mask.data_ptr<int64_t>();
  iou3d_nms_forward_impl(boxes, mask_data, boxes_num, nms_overlap_thresh);

  at::Tensor mask_cpu = mask.to(at::kCPU);
  unsigned long long *mask_host =
      (unsigned long long *)mask_cpu.data_ptr<int64_t>();

  std::vector<unsigned long long> remv_cpu(col_blocks);
  memset(&remv_cpu[0], 0, sizeof(unsigned long long) * col_blocks);

  int num_to_keep = 0;

  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / THREADS_PER_BLOCK_NMS;
    int inblock = i % THREADS_PER_BLOCK_NMS;

    if (!(remv_cpu[nblock] & (1ULL << inblock))) {
      keep_data[num_to_keep++] = i;
      unsigned long long *p = &mask_host[0] + i * col_blocks;
      for (int j = nblock; j < col_blocks; j++) {
        remv_cpu[j] |= p[j];
      }
    }
    *keep_num_data = num_to_keep;
  }
}

void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
                              float nms_overlap_thresh) {
  // params boxes: (N, 5) [x1, y1, x2, y2, ry]
  // params keep: (N)

  CHECK_CONTIGUOUS(boxes);
  CHECK_CONTIGUOUS(keep);

  int boxes_num = boxes.size(0);
  int64_t *keep_data = keep.data_ptr<int64_t>();
  int64_t *keep_num_data = keep_num.data_ptr<int64_t>();

  const int col_blocks =
      (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;

  Tensor mask =
      at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong));
  unsigned long long *mask_data =
      (unsigned long long *)mask.data_ptr<int64_t>();
  iou3d_nms_normal_forward_impl(boxes, mask_data, boxes_num,
                                nms_overlap_thresh);

  at::Tensor mask_cpu = mask.to(at::kCPU);
  unsigned long long *mask_host =
      (unsigned long long *)mask_cpu.data_ptr<int64_t>();

  std::vector<unsigned long long> remv_cpu(col_blocks);
  memset(&remv_cpu[0], 0, sizeof(unsigned long long) * col_blocks);
  int num_to_keep = 0;

  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / THREADS_PER_BLOCK_NMS;
    int inblock = i % THREADS_PER_BLOCK_NMS;

    if (!(remv_cpu[nblock] & (1ULL << inblock))) {
      keep_data[num_to_keep++] = i;
      unsigned long long *p = &mask_host[0] + i * col_blocks;
      for (int j = nblock; j < col_blocks; j++) {
        remv_cpu[j] |= p[j];
      }
    }
  }

  *keep_num_data = num_to_keep;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/iou3d_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "iou3d_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void iou3d_boxes_iou_bev_forward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  auto boxes_a = buildATensor(ctx, ins[0]);
  auto boxes_b = buildATensor(ctx, ins[1]);

  auto ans_iou = buildATensor(ctx, outs[0]);

  iou3d_boxes_iou_bev_forward(boxes_a, boxes_b, ans_iou);
}

void iou3d_nms_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                    const OperatorBase::in_list_t& ins,
                                    OperatorBase::out_list_t& outs) {
  float nms_overlap_thresh;
  SSAttrs(attr).get<float>("nms_overlap_thresh", nms_overlap_thresh).done();

  auto boxes = buildATensor(ctx, ins[0]);

  auto keep = buildATensor(ctx, outs[0]);
  auto keep_num = buildATensor(ctx, outs[1]);

  iou3d_nms_forward(boxes, keep, keep_num, nms_overlap_thresh);
}

void iou3d_nms_normal_forward_cuda_parrots(CudaContext& ctx,
                                           const SSElement& attr,
                                           const OperatorBase::in_list_t& ins,
                                           OperatorBase::out_list_t& outs) {
  float nms_overlap_thresh;
  SSAttrs(attr).get<float>("nms_overlap_thresh", nms_overlap_thresh).done();

  auto boxes = buildATensor(ctx, ins[0]);

  auto keep = buildATensor(ctx, outs[0]);
  auto keep_num = buildATensor(ctx, outs[1]);

  iou3d_nms_normal_forward(boxes, keep, keep_num, nms_overlap_thresh);
}

PARROTS_EXTENSION_REGISTER(iou3d_boxes_iou_bev_forward)
    .input(2)
    .output(1)
    .apply(iou3d_boxes_iou_bev_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(iou3d_nms_forward)
    .attr("nms_overlap_thresh")
    .input(1)
    .output(2)
    .apply(iou3d_nms_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(iou3d_nms_normal_forward)
    .attr("nms_overlap_thresh")
    .input(1)
    .output(2)
    .apply(iou3d_nms_normal_forward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/iou3d_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef IOU_3D_PYTORCH_H
#define IOU_3D_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
                                 Tensor ans_iou);

void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num,
                       float nms_overlap_thresh);

void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
                              float nms_overlap_thresh);

#endif  // IOU_3D_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/knn.cpp
================================================
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void knn_forward_impl(int b, int n, int m, int nsample, const Tensor xyz,
                      const Tensor new_xyz, Tensor idx, Tensor dist2) {
  DISPATCH_DEVICE_IMPL(knn_forward_impl, b, n, m, nsample, xyz, new_xyz, idx,
                       dist2);
}

void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
                 Tensor dist2_tensor, int b, int n, int m, int nsample) {
  knn_forward_impl(b, n, m, nsample, xyz_tensor, new_xyz_tensor, idx_tensor,
                   dist2_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/knn_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "knn_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void knn_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                              const OperatorBase::in_list_t& ins,
                              OperatorBase::out_list_t& outs) {
  int b, n, m, nsample;
  SSAttrs(attr)
      .get<int>("b", b)
      .get<int>("n", n)
      .get<int>("m", m)
      .get<int>("nsample", nsample)
      .done();

  auto xyz_tensor = buildATensor(ctx, ins[0]);
  auto new_xyz_tensor = buildATensor(ctx, ins[1]);

  auto idx_tensor = buildATensor(ctx, outs[0]);
  auto dist2_tensor = buildATensor(ctx, outs[1]);

  knn_forward(xyz_tensor, new_xyz_tensor, idx_tensor, dist2_tensor, b, n, m,
              nsample);
}

PARROTS_EXTENSION_REGISTER(knn_forward)
    .attr("b")
    .attr("n")
    .attr("m")
    .attr("nsample")
    .input(2)
    .output(2)
    .apply(knn_forward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/knn_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef KNN_PYTORCH_H
#define KNN_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
                 Tensor dist2_tensor, int b, int n, int m, int nsample);
#endif  // KNN_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/masked_conv2d.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void masked_im2col_forward_impl(const Tensor im, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor col,
                                const int kernel_h, const int kernel_w,
                                const int pad_h, const int pad_w) {
  DISPATCH_DEVICE_IMPL(masked_im2col_forward_impl, im, mask_h_idx, mask_w_idx,
                       col, kernel_h, kernel_w, pad_h, pad_w);
}

void masked_col2im_forward_impl(const Tensor col, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor im, int height,
                                int width, int channels) {
  DISPATCH_DEVICE_IMPL(masked_col2im_forward_impl, col, mask_h_idx, mask_w_idx,
                       im, height, width, channels);
}

void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx,
                           const Tensor mask_w_idx, Tensor col,
                           const int kernel_h, const int kernel_w,
                           const int pad_h, const int pad_w) {
  masked_im2col_forward_impl(im, mask_h_idx, mask_w_idx, col, kernel_h,
                             kernel_w, pad_h, pad_w);
}

void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx,
                           const Tensor mask_w_idx, Tensor im, int height,
                           int width, int channels) {
  masked_col2im_forward_impl(col, mask_h_idx, mask_w_idx, im, height, width,
                             channels);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/masked_conv2d_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "masked_conv2d_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void masked_im2col_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                        const OperatorBase::in_list_t& ins,
                                        OperatorBase::out_list_t& outs) {
  // im: (n, ic, h, w), kernel size (kh, kw)
  // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
  int kernel_h, kernel_w, pad_h, pad_w;
  SSAttrs(attr)
      .get<int>("kernel_h", kernel_h)
      .get<int>("kernel_w", kernel_w)
      .get<int>("pad_h", pad_h)
      .get<int>("pad_w", pad_w)
      .done();

  const auto& im = buildATensor(ctx, ins[0]);
  const auto& mask_h_idx = buildATensor(ctx, ins[1]);
  const auto& mask_w_idx = buildATensor(ctx, ins[2]);

  auto col = buildATensor(ctx, outs[0]);
  masked_im2col_forward_cuda(im, mask_h_idx, mask_w_idx, col, kernel_h,
                             kernel_w, pad_h, pad_w);
}

void masked_col2im_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                        const OperatorBase::in_list_t& ins,
                                        OperatorBase::out_list_t& outs) {
  // im: (n, ic, h, w), kernel size (kh, kw)
  // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
  int height, width, channels;
  SSAttrs(attr)
      .get<int>("height", height)
      .get<int>("width", width)
      .get<int>("channels", channels)
      .done();

  const auto& col = buildATensor(ctx, ins[0]);
  const auto& mask_h_idx = buildATensor(ctx, ins[1]);
  const auto& mask_w_idx = buildATensor(ctx, ins[2]);

  auto im = buildATensor(ctx, outs[0]);
  masked_col2im_forward_cuda(col, mask_h_idx, mask_w_idx, im, height, width,
                             channels);
}

PARROTS_EXTENSION_REGISTER(masked_im2col_forward)
    .attr("kernel_h")
    .attr("kernel_w")
    .attr("pad_h")
    .attr("pad_w")
    .input(3)
    .output(1)
    .apply(masked_im2col_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(masked_col2im_forward)
    .attr("height")
    .attr("width")
    .attr("channels")
    .input(3)
    .output(1)
    .apply(masked_col2im_forward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/masked_conv2d_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef MASKED_CONV2D_PYTORCH_H
#define MASKED_CONV2D_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor col,
                                const int kernel_h, const int kernel_w,
                                const int pad_h, const int pad_w);

void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor im, int height,
                                int width, int channels);
#endif  // MASKED_CONV2D_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/min_area_polygons.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void min_area_polygons_impl(const Tensor pointsets, Tensor polygons) {
  DISPATCH_DEVICE_IMPL(min_area_polygons_impl, pointsets, polygons);
}

void min_area_polygons(const Tensor pointsets, Tensor polygons) {
  min_area_polygons_impl(pointsets, polygons);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/min_area_polygons_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "min_area_polygons_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void min_area_polygons_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                    const OperatorBase::in_list_t& ins,
                                    OperatorBase::out_list_t& outs) {
  auto pointsets = buildATensor(ctx, ins[0]);

  auto polygons = buildATensor(ctx, outs[0]);
  min_area_polygons(pointsets, polygons);
}

PARROTS_EXTENSION_REGISTER(min_area_polygons)
    .input(1)
    .output(1)
    .apply(min_area_polygons_cuda_parrots)
    .done();

#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/min_area_polygons_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef MIN_AREA_POLYGONS_PYTORCH_H
#define MIN_AREA_POLYGONS_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void min_area_polygons(const Tensor pointsets, Tensor polygons);

#endif  // MIN_AREA_POLYGONS_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void modulated_deformable_im2col_impl(
    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor data_col) {
  DISPATCH_DEVICE_IMPL(modulated_deformable_im2col_impl, data_im, data_offset,
                       data_mask, batch_size, channels, height_im, width_im,
                       height_col, width_col, kernel_h, kernel_w, pad_h, pad_w,
                       stride_h, stride_w, dilation_h, dilation_w,
                       deformable_group, data_col);
}

void modulated_deformable_col2im_impl(
    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor grad_im) {
  DISPATCH_DEVICE_IMPL(modulated_deformable_col2im_impl, data_col, data_offset,
                       data_mask, batch_size, channels, height_im, width_im,
                       height_col, width_col, kernel_h, kernel_w, pad_h, pad_w,
                       stride_h, stride_w, dilation_h, dilation_w,
                       deformable_group, grad_im);
}

void modulated_deformable_col2im_coord_impl(
    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
    const Tensor data_mask, const int batch_size, const int channels,
    const int height_im, const int width_im, const int height_col,
    const int width_col, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int deformable_group,
    Tensor grad_offset, Tensor grad_mask) {
  DISPATCH_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, data_col,
                       data_im, data_offset, data_mask, batch_size, channels,
                       height_im, width_im, height_col, width_col, kernel_h,
                       kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
                       dilation_w, deformable_group, grad_offset, grad_mask);
}

void modulated_deform_conv_forward(
    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
    const int dilation_h, const int dilation_w, const int group,
    const int deformable_group, const bool with_bias) {
  at::DeviceGuard guard(input.device());

  const int batch = input.size(0);
  const int channels = input.size(1);
  const int height = input.size(2);
  const int width = input.size(3);

  const int channels_out = weight.size(0);
  const int channels_kernel = weight.size(1);
  const int kernel_h_ = weight.size(2);
  const int kernel_w_ = weight.size(3);

  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).",
             kernel_h_, kernel_w, kernel_h_, kernel_w_);
  if (channels != channels_kernel * group)
    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).",
             channels, channels_kernel * group);

  const int height_out =
      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
  const int width_out =
      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;

  if (ones.ndimension() != 2 ||
      ones.size(0) * ones.size(1) < height_out * width_out) {
    // Resize plane and fill with ones...
    ones = at::ones({height_out, width_out}, input.options());
  }

  // resize output
  output = output.view({batch, channels_out, height_out, width_out}).zero_();
  // resize temporary columns
  columns =
      at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out},
                input.options());

  output = output.view({output.size(0), group, output.size(1) / group,
                        output.size(2), output.size(3)});

  for (int b = 0; b < batch; b++) {
    modulated_deformable_im2col_impl(
        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
        dilation_h, dilation_w, deformable_group, columns);

    // divide into group
    weight = weight.view({group, weight.size(0) / group, weight.size(1),
                          weight.size(2), weight.size(3)});
    columns = columns.view({group, columns.size(0) / group, columns.size(1)});

    for (int g = 0; g < group; g++) {
      output[b][g] = output[b][g]
                         .flatten(1)
                         .addmm_(weight[g].flatten(1), columns[g])
                         .view_as(output[b][g]);
    }

    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
                          weight.size(3), weight.size(4)});
    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
  }

  output = output.view({output.size(0), output.size(1) * output.size(2),
                        output.size(3), output.size(4)});

  if (with_bias) {
    output += bias.view({1, bias.size(0), 1, 1});
  }
}

void modulated_deform_conv_backward(
    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
    const bool with_bias) {
  at::DeviceGuard guard(input.device());

  const int batch = input.size(0);
  const int channels = input.size(1);
  const int height = input.size(2);
  const int width = input.size(3);

  const int channels_kernel = weight.size(1);
  const int kernel_h_ = weight.size(2);
  const int kernel_w_ = weight.size(3);
  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).",
             kernel_h_, kernel_w, kernel_h_, kernel_w_);
  if (channels != channels_kernel * group)
    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).",
             channels, channels_kernel * group);

  const int height_out =
      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
  const int width_out =
      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;

  if (ones.ndimension() != 2 ||
      ones.size(0) * ones.size(1) < height_out * width_out) {
    // Resize plane and fill with ones...
    ones = at::ones({height_out, width_out}, input.options());
  }

  grad_input = grad_input.view({batch, channels, height, width});
  columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out},
                      input.options());

  grad_output =
      grad_output.view({grad_output.size(0), group, grad_output.size(1) / group,
                        grad_output.size(2), grad_output.size(3)});

  for (int b = 0; b < batch; b++) {
    // divide int group
    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
    weight = weight.view({group, weight.size(0) / group, weight.size(1),
                          weight.size(2), weight.size(3)});

    for (int g = 0; g < group; g++) {
      columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
                        grad_output[b][g].flatten(1), 0.0f, 1.0f);
    }

    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
                          weight.size(3), weight.size(4)});

    // gradient w.r.t. input coordinate data
    modulated_deformable_col2im_coord_impl(
        columns, input[b], offset[b], mask[b], 1, channels, height, width,
        height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h,
        stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b],
        grad_mask[b]);
    // gradient w.r.t. input data
    modulated_deformable_col2im_impl(
        columns, offset[b], mask[b], 1, channels, height, width, height_out,
        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
        dilation_h, dilation_w, deformable_group, grad_input[b]);

    // gradient w.r.t. weight, dWeight should accumulate across the batch and
    // group
    modulated_deformable_im2col_impl(
        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
        dilation_h, dilation_w, deformable_group, columns);

    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
    grad_weight = grad_weight.view({group, grad_weight.size(0) / group,
                                    grad_weight.size(1), grad_weight.size(2),
                                    grad_weight.size(3)});
    if (with_bias)
      grad_bias = grad_bias.view({group, grad_bias.size(0) / group});

    for (int g = 0; g < group; g++) {
      grad_weight[g] =
          grad_weight[g]
              .flatten(1)
              .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1))
              .view_as(grad_weight[g]);
      if (with_bias) {
        grad_bias[g] =
            grad_bias[g]
                .view({-1, 1})
                .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1}))
                .view(-1);
      }
    }

    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
    grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1),
                                    grad_weight.size(2), grad_weight.size(3),
                                    grad_weight.size(4)});
    if (with_bias)
      grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)});
  }
  grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1),
                                  grad_output.size(2), grad_output.size(3),
                                  grad_output.size(4)});
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "modulated_deform_conv_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void modulated_deform_conv_forward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
      dilation_w, group, deformable_group, with_bias;
  SSAttrs(attr)
      .get<int>("kernel_h", kernel_h)
      .get<int>("kernel_w", kernel_w)
      .get<int>("stride_h", stride_h)
      .get<int>("stride_w", stride_w)
      .get<int>("pad_h", pad_h)
      .get<int>("pad_w", pad_w)
      .get<int>("dilation_h", dilation_h)
      .get<int>("dilation_w", dilation_w)
      .get<int>("group", group)
      .get<int>("deformable_group", deformable_group)
      .get<int>("with_bias", with_bias)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& weight = buildATensor(ctx, ins[1]);
  const auto& bias = buildATensor(ctx, ins[2]);
  const auto& ones = buildATensor(ctx, ins[3]);
  const auto& offset = buildATensor(ctx, ins[4]);
  const auto& mask = buildATensor(ctx, ins[5]);

  auto output = buildATensor(ctx, outs[0]);
  auto columns = buildATensor(ctx, outs[1]);

  modulated_deform_conv_forward(input, weight, bias, ones, offset, mask, output,
                                columns, kernel_h, kernel_w, stride_h, stride_w,
                                pad_h, pad_w, dilation_h, dilation_w, group,
                                deformable_group, with_bias);
}

void modulated_deform_conv_backward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
      dilation_w, group, deformable_group, with_bias;
  SSAttrs(attr)
      .get<int>("kernel_h", kernel_h)
      .get<int>("kernel_w", kernel_w)
      .get<int>("stride_h", stride_h)
      .get<int>("stride_w", stride_w)
      .get<int>("pad_h", pad_h)
      .get<int>("pad_w", pad_w)
      .get<int>("dilation_h", dilation_h)
      .get<int>("dilation_w", dilation_w)
      .get<int>("group", group)
      .get<int>("deformable_group", deformable_group)
      .get<int>("with_bias", with_bias)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& weight = buildATensor(ctx, ins[1]);
  const auto& bias = buildATensor(ctx, ins[2]);
  const auto& ones = buildATensor(ctx, ins[3]);
  const auto& offset = buildATensor(ctx, ins[4]);
  const auto& mask = buildATensor(ctx, ins[5]);

  auto columns = buildATensor(ctx, outs[0]);
  auto grad_input = buildATensor(ctx, outs[1]);
  auto grad_weight = buildATensor(ctx, outs[2]);
  auto grad_bias = buildATensor(ctx, outs[3]);
  auto grad_offset = buildATensor(ctx, outs[4]);
  auto grad_mask = buildATensor(ctx, outs[5]);
  auto grad_output = buildATensor(ctx, outs[6]);
  modulated_deform_conv_backward(
      input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight,
      grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w,
      stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
      deformable_group, with_bias);
}
#endif

void modulated_deform_conv_forward_cpu_parrots(
    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
      dilation_w, group, deformable_group, with_bias;
  SSAttrs(attr)
      .get<int>("kernel_h", kernel_h)
      .get<int>("kernel_w", kernel_w)
      .get<int>("stride_h", stride_h)
      .get<int>("stride_w", stride_w)
      .get<int>("pad_h", pad_h)
      .get<int>("pad_w", pad_w)
      .get<int>("dilation_h", dilation_h)
      .get<int>("dilation_w", dilation_w)
      .get<int>("group", group)
      .get<int>("deformable_group", deformable_group)
      .get<int>("with_bias", with_bias)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& weight = buildATensor(ctx, ins[1]);
  const auto& bias = buildATensor(ctx, ins[2]);
  const auto& ones = buildATensor(ctx, ins[3]);
  const auto& offset = buildATensor(ctx, ins[4]);
  const auto& mask = buildATensor(ctx, ins[5]);

  auto output = buildATensor(ctx, outs[0]);
  auto columns = buildATensor(ctx, outs[1]);

  modulated_deform_conv_forward(input, weight, bias, ones, offset, mask, output,
                                columns, kernel_h, kernel_w, stride_h, stride_w,
                                pad_h, pad_w, dilation_h, dilation_w, group,
                                deformable_group, with_bias);
}

void modulated_deform_conv_backward_cpu_parrots(
    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
      dilation_w, group, deformable_group, with_bias;
  SSAttrs(attr)
      .get<int>("kernel_h", kernel_h)
      .get<int>("kernel_w", kernel_w)
      .get<int>("stride_h", stride_h)
      .get<int>("stride_w", stride_w)
      .get<int>("pad_h", pad_h)
      .get<int>("pad_w", pad_w)
      .get<int>("dilation_h", dilation_h)
      .get<int>("dilation_w", dilation_w)
      .get<int>("group", group)
      .get<int>("deformable_group", deformable_group)
      .get<int>("with_bias", with_bias)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& weight = buildATensor(ctx, ins[1]);
  const auto& bias = buildATensor(ctx, ins[2]);
  const auto& ones = buildATensor(ctx, ins[3]);
  const auto& offset = buildATensor(ctx, ins[4]);
  const auto& mask = buildATensor(ctx, ins[5]);

  auto columns = buildATensor(ctx, outs[0]);
  auto grad_input = buildATensor(ctx, outs[1]);
  auto grad_weight = buildATensor(ctx, outs[2]);
  auto grad_bias = buildATensor(ctx, outs[3]);
  auto grad_offset = buildATensor(ctx, outs[4]);
  auto grad_mask = buildATensor(ctx, outs[5]);
  auto grad_output = buildATensor(ctx, outs[6]);
  modulated_deform_conv_backward(
      input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight,
      grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w,
      stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
      deformable_group, with_bias);
}
PARROTS_EXTENSION_REGISTER(modulated_deform_conv_forward)
    .attr("kernel_h")
    .attr("kernel_w")
    .attr("stride_h")
    .attr("stride_w")
    .attr("pad_h")
    .attr("pad_w")
    .attr("dilation_h")
    .attr("dilation_w")
    .attr("group")
    .attr("deformable_group")
    .attr("with_bias")
    .input(6)
    .output(2)
    .apply(modulated_deform_conv_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(modulated_deform_conv_forward_cuda_parrots)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(modulated_deform_conv_backward)
    .attr("kernel_h")
    .attr("kernel_w")
    .attr("stride_h")
    .attr("stride_w")
    .attr("pad_h")
    .attr("pad_w")
    .attr("dilation_h")
    .attr("dilation_w")
    .attr("group")
    .attr("deformable_group")
    .attr("with_bias")
    .input(6)
    .output(7)
    .apply(modulated_deform_conv_backward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(modulated_deform_conv_backward_cuda_parrots)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef MODULATED_DEFORM_CONV_PYTORCH_H
#define MODULATED_DEFORM_CONV_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void modulated_deform_conv_forward(
    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
    const int dilation_h, const int dilation_w, const int group,
    const int deformable_group, const bool with_bias);

void modulated_deform_conv_backward(
    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
    const bool with_bias);
#endif  // MODULATED_DEFORM_CONV_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/ms_deform_attn.cpp
================================================
/*!
**************************************************************************************************
* Deformable DETR
* Copyright (c) 2020 SenseTime. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
**************************************************************************************************
* Modified from
*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
**************************************************************************************************
*/

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

Tensor ms_deform_attn_impl_forward(const Tensor &value,
                                   const Tensor &spatial_shapes,
                                   const Tensor &level_start_index,
                                   const Tensor &sampling_loc,
                                   const Tensor &attn_weight,
                                   const int im2col_step) {
  return DISPATCH_DEVICE_IMPL(ms_deform_attn_impl_forward, value,
                              spatial_shapes, level_start_index, sampling_loc,
                              attn_weight, im2col_step);
}

void ms_deform_attn_impl_backward(
    const Tensor &value, const Tensor &spatial_shapes,
    const Tensor &level_start_index, const Tensor &sampling_loc,
    const Tensor &attn_weight, const Tensor &grad_output, Tensor &grad_value,
    Tensor &grad_sampling_loc, Tensor &grad_attn_weight,
    const int im2col_step) {
  DISPATCH_DEVICE_IMPL(ms_deform_attn_impl_backward, value, spatial_shapes,
                       level_start_index, sampling_loc, attn_weight,
                       grad_output, grad_value, grad_sampling_loc,
                       grad_attn_weight, im2col_step);
}

Tensor ms_deform_attn_forward(const Tensor &value, const Tensor &spatial_shapes,
                              const Tensor &level_start_index,
                              const Tensor &sampling_loc,
                              const Tensor &attn_weight,
                              const int im2col_step) {
  at::DeviceGuard guard(value.device());
  return ms_deform_attn_impl_forward(value, spatial_shapes, level_start_index,
                                     sampling_loc, attn_weight, im2col_step);
}

void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes,
                             const Tensor &level_start_index,
                             const Tensor &sampling_loc,
                             const Tensor &attn_weight,
                             const Tensor &grad_output, Tensor &grad_value,
                             Tensor &grad_sampling_loc,
                             Tensor &grad_attn_weight, const int im2col_step) {
  at::DeviceGuard guard(value.device());
  ms_deform_attn_impl_backward(value, spatial_shapes, level_start_index,
                               sampling_loc, attn_weight, grad_output,
                               grad_value, grad_sampling_loc, grad_attn_weight,
                               im2col_step);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/ms_deform_attn_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <torch/extension.h>

#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
using namespace at;
using namespace parrots;

Tensor ms_deform_attn_forward(const Tensor &value, const Tensor &spatial_shapes,
                              const Tensor &level_start_index,
                              const Tensor &sampling_loc,
                              const Tensor &attn_weight, const int im2col_step);

void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes,
                             const Tensor &level_start_index,
                             const Tensor &sampling_loc,
                             const Tensor &attn_weight,
                             const Tensor &grad_output, Tensor &grad_value,
                             Tensor &grad_sampling_loc,
                             Tensor &grad_attn_weight, const int im2col_step);

void ms_deform_attn_forward_parrots(CudaContext &ctx, const SSElement &attr,
                                    const OperatorBase::in_list_t &ins,
                                    OperatorBase::out_list_t &outs) {
  int im2col_step;
  SSAttrs(attr).get<int>("im2col_step", im2col_step).done();
  const auto &value = buildATensor(ctx, ins[0]);
  const auto &spatial_shapes = buildATensor(ctx, ins[1]);
  const auto &level_start_index = buildATensor(ctx, ins[2]);
  const auto &sampling_loc = buildATensor(ctx, ins[3]);
  const auto &attn_weight = buildATensor(ctx, ins[4]);
  auto out = ms_deform_attn_forward(value, spatial_shapes, level_start_index,
                                    sampling_loc, attn_weight, im2col_step);
  updateDArray(ctx, out, outs[0]);
}

void ms_deform_attn_backward_parrots(CudaContext &ctx, const SSElement &attr,
                                     const OperatorBase::in_list_t &ins,
                                     OperatorBase::out_list_t &outs) {
  int im2col_step;
  SSAttrs(attr).get<int>("im2col_step", im2col_step).done();
  const auto &value = buildATensor(ctx, ins[0]);
  const auto &spatial_shapes = buildATensor(ctx, ins[1]);
  const auto &level_start_index = buildATensor(ctx, ins[2]);
  const auto &sampling_loc = buildATensor(ctx, ins[3]);
  const auto &attn_weight = buildATensor(ctx, ins[4]);
  const auto &grad_output = buildATensor(ctx, ins[5]);
  auto grad_value = buildATensor(ctx, outs[0]);
  auto grad_sampling_loc = buildATensor(ctx, outs[1]);
  auto grad_attn_weight = buildATensor(ctx, outs[2]);
  ms_deform_attn_backward(value, spatial_shapes, level_start_index,
                          sampling_loc, attn_weight, grad_output, grad_value,
                          grad_sampling_loc, grad_attn_weight, im2col_step);
}

PARROTS_EXTENSION_REGISTER(ms_deform_attn_forward)
    .attr("im2col_step")
    .input(5)
    .output(1)
    .apply(ms_deform_attn_forward_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(ms_deform_attn_backward)
    .attr("im2col_step")
    .input(6)
    .output(3)
    .apply(ms_deform_attn_backward_parrots)
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/nms.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
  return DISPATCH_DEVICE_IMPL(nms_impl, boxes, scores, iou_threshold, offset);
}

Tensor softnms_impl(Tensor boxes, Tensor scores, Tensor dets,
                    float iou_threshold, float sigma, float min_score,
                    int method, int offset) {
  return DISPATCH_DEVICE_IMPL(softnms_impl, boxes, scores, dets, iou_threshold,
                              sigma, min_score, method, offset);
}

std::vector<std::vector<int> > nms_match_impl(Tensor dets,
                                              float iou_threshold) {
  return DISPATCH_DEVICE_IMPL(nms_match_impl, dets, iou_threshold);
}

Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
  return nms_impl(boxes, scores, iou_threshold, offset);
}

Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold,
               float sigma, float min_score, int method, int offset) {
  return softnms_impl(boxes, scores, dets, iou_threshold, sigma, min_score,
                      method, offset);
}

std::vector<std::vector<int> > nms_match(Tensor dets, float iou_threshold) {
  return nms_match_impl(dets, iou_threshold);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/nms_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "nms_pytorch.h"

using namespace parrots;

// Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset);
template <typename T>
void nms_parrots(T& ctx, const SSElement& attr,
                 const OperatorBase::in_list_t& ins,
                 OperatorBase::out_list_t& outs) {
  float iou_threshold;
  int offset;
  SSAttrs(attr)
      .get("iou_threshold", iou_threshold)
      .get("offset", offset)
      .done();
  at::Tensor boxes, scores;
  boxes = buildATensor(ctx, ins[0]);
  scores = buildATensor(ctx, ins[1]);
  auto out = nms(boxes, scores, iou_threshold, offset);
  updateDArray(ctx, out, outs[0]);
}

/*Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold,
 *                float sigma, float min_score, int method, int offset);*/
template <typename T>
void softnms_parrots(T& ctx, const SSElement& attr,
                     const OperatorBase::in_list_t& ins,
                     OperatorBase::out_list_t& outs) {
  float iou_threshold, sigma, min_score;
  int method, offset;
  SSAttrs(attr)
      .get("iou_threshold", iou_threshold)
      .get("sigma", sigma)
      .get("min_score", min_score)
      .get("method", method)
      .get("offset", offset)
      .done();
  at::Tensor boxes, scores, dets;
  boxes = buildATensor(ctx, ins[0]);
  scores = buildATensor(ctx, ins[1]);
  dets = buildATensor(ctx, ins[2]);
  auto out = softnms(boxes, scores, dets, iou_threshold, sigma, min_score,
                     method, offset);
  updateDArray(ctx, out, outs[0]);
}

// std::vector<std::vector<int> > nms_match(Tensor dets, float iou_threshold);
template <typename T>
void nms_match_parrots(T& ctx, const SSElement& attr,
                       const OperatorBase::in_list_t& ins,
                       OperatorBase::out_list_t& outs) {
  float iou_threshold;
  SSAttrs(attr).get("iou_threshold", iou_threshold).done();
  at::Tensor dets;
  dets = buildATensor(ctx, ins[0]);
  auto out = nms_match(dets, iou_threshold);
  int n = out.size(), m = 0;
  for (int i = 0; i < n; ++i)
    if (m < out[i].size()) m = out[i].size();
  auto options = torch::TensorOptions().dtype(at::kInt);
  auto tensor = torch::zeros({n, m}, options);
  for (int i = 0; i < n; i++)
    tensor.slice(0, i, i + 1) =
        torch::from_blob(out[i].data(), {out[i].size()}, options);
  updateDArray(ctx, tensor, outs[0]);
}

/*Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
 *                    const Tensor dets_sorted, const float iou_threshold,
 *                                       const int multi_label);*/
template <typename T>
void nms_rotated_parrots(T& ctx, const SSElement& attr,
                         const OperatorBase::in_list_t& ins,
                         OperatorBase::out_list_t& outs) {
  float iou_threshold;
  int multi_label;
  SSAttrs(attr)
      .get("iou_threshold", iou_threshold)
      .get("multi_label", multi_label)
      .done();
  at::Tensor dets, scores, order, dets_sorted;
  dets = buildATensor(ctx, ins[0]);
  scores = buildATensor(ctx, ins[1]);
  order = buildATensor(ctx, ins[2]);
  dets_sorted = buildATensor(ctx, ins[3]);
  auto out =
      nms_rotated(dets, scores, order, dets_sorted, iou_threshold, multi_label);
  updateDArray(ctx, out, outs[0]);
}

PARROTS_EXTENSION_REGISTER(nms)
    .attr("iou_threshold")
    .attr("offset")
    .input(2)
    .output(1)
    .apply(nms_parrots<HostContext>)
#ifdef MMCV_WITH_CUDA
    .apply(nms_parrots<CudaContext>)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(softnms)
    .attr("iou_threshold")
    .attr("sigma")
    .attr("min_score")
    .attr("method")
    .attr("offset")
    .input(3)
    .output(1)
    .apply(softnms_parrots<HostContext>)
#ifdef MMCV_WITH_CUDA
    .apply(softnms_parrots<CudaContext>)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(nms_match)
    .attr("iou_threshold")
    .input(1)
    .output(1)
    .apply(nms_match_parrots<HostContext>)
#ifdef MMCV_WITH_CUDA
    .apply(nms_match_parrots<CudaContext>)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(nms_rotated)
    .attr("multi_label")
    .attr("iou_threshold")
    .input(4)
    .output(1)
    .apply(nms_rotated_parrots<HostContext>)
#ifdef MMCV_WITH_CUDA
    .apply(nms_rotated_parrots<CudaContext>)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/nms_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef NMS_PYTORCH_H
#define NMS_PYTORCH_H
#include <torch/extension.h>

at::Tensor nms(at::Tensor boxes, at::Tensor scores, float iou_threshold,
               int offset);

at::Tensor softnms(at::Tensor boxes, at::Tensor scores, at::Tensor dets,
                   float iou_threshold, float sigma, float min_score,
                   int method, int offset);

std::vector<std::vector<int> > nms_match(at::Tensor dets, float iou_threshold);

at::Tensor nms_rotated(const at::Tensor dets, const at::Tensor scores,
                       const at::Tensor order, const at::Tensor dets_sorted,
                       const float iou_threshold, const int multi_label);
#endif  // NMS_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/nms_rotated.cpp
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated.h
#include "pytorch_cpp_helper.hpp"

Tensor nms_rotated_cpu(const Tensor dets, const Tensor scores,
                       const float iou_threshold);

#ifdef MMCV_WITH_CUDA
Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores,
                        const Tensor order, const Tensor dets_sorted,
                        const float iou_threshold, const int multi_label);
#endif

// Interface for Python
// inline is needed to prevent multiple function definitions when this header is
// included by different cpps
Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
                   const Tensor dets_sorted, const float iou_threshold,
                   const int multi_label) {
  assert(dets.device().is_cuda() == scores.device().is_cuda());
  if (dets.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
    return nms_rotated_cuda(dets, scores, order, dets_sorted, iou_threshold,
                            multi_label);
#else
    AT_ERROR("Not compiled with GPU support");
#endif
  }

  return nms_rotated_cpu(dets, scores, iou_threshold);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/pixel_group.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// It is modified from https://github.com/WenmuZhou/PAN.pytorch

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

std::vector<std::vector<float>> pixel_group_impl(
    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
    Tensor kernel_contour, int kernel_region_num, float dis_threshold) {
  return DISPATCH_DEVICE_IMPL(pixel_group_impl, score, mask, embedding,
                              kernel_label, kernel_contour, kernel_region_num,
                              dis_threshold);
}

std::vector<std::vector<float>> pixel_group(
    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
    Tensor kernel_contour, int kernel_region_num, float distance_threshold) {
  score = score.contiguous();
  mask = mask.contiguous();
  embedding = embedding.contiguous();
  kernel_label = kernel_label.contiguous();
  kernel_contour = kernel_contour.contiguous();

  return pixel_group_impl(score, mask, embedding, kernel_label, kernel_contour,
                          kernel_region_num, distance_threshold);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/pixel_group_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "pixel_group_pytorch.h"

using namespace parrots;
using namespace std;

template <typename T>
void pixel_group_parrots(T& ctx, const SSElement& attr,
                         const OperatorBase::in_list_t& ins,
                         OperatorBase::out_list_t& outs) {
  int kernel_region_num;
  float distance_threshold;
  SSAttrs(attr)
      .get<int>("kernel_region_num", kernel_region_num)
      .get<float>("distance_threshold", distance_threshold)
      .done();
  at::Tensor score;
  at::Tensor mask;
  at::Tensor embedding;
  at::Tensor kernel_label;
  at::Tensor kernel_contour;
  score = buildATensor(ctx, ins[0]);
  mask = buildATensor(ctx, ins[1]);
  embedding = buildATensor(ctx, ins[2]);
  kernel_label = buildATensor(ctx, ins[3]);
  kernel_contour = buildATensor(ctx, ins[4]);
  auto out = pixel_group(score, mask, embedding, kernel_label, kernel_contour,
                         kernel_region_num, distance_threshold);
  int n = out.size();
  std::vector<float> out_tensor;
  for (int i = 0; i < n; ++i) out_tensor.push_back(float(out[i].size()));
  for (int i = 0; i < n; ++i)
    out_tensor.insert(out_tensor.end(), out[i].begin(), out[i].end());
  auto options = torch::TensorOptions().dtype(at::kFloat);
  auto tensor = torch::zeros({1, out_tensor.size()}, options);
  tensor.slice(0, 0, 1) =
      torch::from_blob(out_tensor.data(), {out_tensor.size()}, options);
  updateDArray(ctx, tensor, outs[0]);
}

PARROTS_EXTENSION_REGISTER(pixel_group)
    .attr("kernel_region_num")
    .attr("distance_threshold")
    .input(5)
    .output(1)
    .apply(pixel_group_parrots<HostContext>)
#ifdef MMCV_WITH_CUDA
    .apply(pixel_group_parrots<CudaContext>)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/pixel_group_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef PIXEL_GROUP_PYTORCH_H
#define PIXEL_GROUP_PYTORCH_H
#include <torch/extension.h>
using namespace at;

std::vector<std::vector<float>> pixel_group(
    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
    Tensor kernel_contour, int kernel_region_num, float distance_threshold);

#endif  // PIXEL_GROUP_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_boxes.cpp
================================================
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void points_in_boxes_part_forward_impl(int batch_size, int boxes_num,
                                       int pts_num, const Tensor boxes,
                                       const Tensor pts,
                                       Tensor box_idx_of_points) {
  DISPATCH_DEVICE_IMPL(points_in_boxes_part_forward_impl, batch_size, boxes_num,
                       pts_num, boxes, pts, box_idx_of_points);
}

void points_in_boxes_all_forward_impl(int batch_size, int boxes_num,
                                      int pts_num, const Tensor boxes,
                                      const Tensor pts,
                                      Tensor box_idx_of_points) {
  DISPATCH_DEVICE_IMPL(points_in_boxes_all_forward_impl, batch_size, boxes_num,
                       pts_num, boxes, pts, box_idx_of_points);
}

void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                  Tensor box_idx_of_points_tensor) {
  // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate, z is the bottom center, each box params pts: (B, npoints, 3)
  // [x, y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints),
  // default -1
  int batch_size = boxes_tensor.size(0);
  int boxes_num = boxes_tensor.size(1);
  int pts_num = pts_tensor.size(1);
  points_in_boxes_part_forward_impl(batch_size, boxes_num, pts_num,
                                    boxes_tensor, pts_tensor,
                                    box_idx_of_points_tensor);
}

void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                 Tensor box_idx_of_points_tensor) {
  // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate, z is the bottom center. params pts: (B, npoints, 3) [x, y, z]
  // in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default -1
  int batch_size = boxes_tensor.size(0);
  int boxes_num = boxes_tensor.size(1);
  int pts_num = pts_tensor.size(1);
  points_in_boxes_all_forward_impl(batch_size, boxes_num, pts_num, boxes_tensor,
                                   pts_tensor, box_idx_of_points_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_boxes_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "points_in_boxes_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void points_in_boxes_part_forward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  auto boxes_tensor = buildATensor(ctx, ins[0]);
  auto pts_tensor = buildATensor(ctx, ins[1]);

  auto box_idx_of_points_tensor = buildATensor(ctx, outs[0]);

  points_in_boxes_part_forward(boxes_tensor, pts_tensor,
                               box_idx_of_points_tensor);
}

void points_in_boxes_all_forward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  auto boxes_tensor = buildATensor(ctx, ins[0]);
  auto pts_tensor = buildATensor(ctx, ins[1]);

  auto box_idx_of_points_tensor = buildATensor(ctx, outs[0]);

  points_in_boxes_all_forward(boxes_tensor, pts_tensor,
                              box_idx_of_points_tensor);
}

PARROTS_EXTENSION_REGISTER(points_in_boxes_part_forward)
    .input(2)
    .output(1)
    .apply(points_in_boxes_part_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(points_in_boxes_all_forward)
    .input(2)
    .output(1)
    .apply(points_in_boxes_all_forward_cuda_parrots)
    .done();
#endif

void points_in_boxes_forward_cpu_parrots(HostContext& ctx,
                                         const SSElement& attr,
                                         const OperatorBase::in_list_t& ins,
                                         OperatorBase::out_list_t& outs) {
  auto boxes_tensor = buildATensor(ctx, ins[0]);
  auto pts_tensor = buildATensor(ctx, ins[1]);

  auto pts_indices_tensor = buildATensor(ctx, outs[0]);

  points_in_boxes_cpu_forward(boxes_tensor, pts_tensor, pts_indices_tensor);
}

PARROTS_EXTENSION_REGISTER(points_in_boxes_cpu_forward)
    .input(2)
    .output(1)
    .apply(points_in_boxes_forward_cpu_parrots)
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_boxes_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef POINTS_IN_BOXES_PYTORCH_H
#define POINTS_IN_BOXES_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                  Tensor box_idx_of_points_tensor);

void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                 Tensor box_idx_of_points_tensor);

void points_in_boxes_cpu_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                 Tensor pts_indices_tensor);

#endif  // POINTS_IN_BOXES_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_polygons.cpp
================================================
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void points_in_polygons_forward_impl(const Tensor points, const Tensor polygons,
                                     Tensor output, const int rows,
                                     const int cols) {
  DISPATCH_DEVICE_IMPL(points_in_polygons_forward_impl, points, polygons,
                       output, rows, cols);
}

void points_in_polygons_forward(Tensor points, Tensor polygons, Tensor output) {
  int rows = points.size(0);
  int cols = polygons.size(0);
  points_in_polygons_forward_impl(points, polygons, output, rows, cols);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_polygons_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "points_in_polygons_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void points_in_polygons_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                     const OperatorBase::in_list_t& ins,
                                     OperatorBase::out_list_t& outs) {
  auto points = buildATensor(ctx, ins[0]);
  auto polygons = buildATensor(ctx, ins[1]);

  auto output = buildATensor(ctx, outs[0]);

  points_in_polygons_forward(points, polygons, output);
}

PARROTS_EXTENSION_REGISTER(points_in_polygons_forward)
    .input(2)
    .output(1)
    .apply(points_in_polygons_cuda_parrots)
    .done();

#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/points_in_polygons_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef POINTS_IN_POLYGONS_PYTORCH_H
#define POINTS_IN_POLYGONS_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void points_in_polygons_forward(Tensor points, Tensor polygons, Tensor output);

#endif  // POINTS_IN_POLYGONS_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/psamask.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/hszhao/semseg/blob/master/lib/psa/src
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output,
                          const int num_, const int h_feature,
                          const int w_feature, const int h_mask,
                          const int w_mask, const int half_h_mask,
                          const int half_w_mask) {
  DISPATCH_DEVICE_IMPL(psamask_forward_impl, psa_type, input, output, num_,
                       h_feature, w_feature, h_mask, w_mask, half_h_mask,
                       half_w_mask);
}

void psamask_backward_impl(const int psa_type, const Tensor grad_output,
                           Tensor grad_input, const int num_,
                           const int h_feature, const int w_feature,
                           const int h_mask, const int w_mask,
                           const int half_h_mask, const int half_w_mask) {
  DISPATCH_DEVICE_IMPL(psamask_backward_impl, psa_type, grad_output, grad_input,
                       num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
                       half_w_mask);
}

void psamask_forward(const Tensor input, Tensor output, const int psa_type,
                     const int num_, const int h_feature, const int w_feature,
                     const int h_mask, const int w_mask, const int half_h_mask,
                     const int half_w_mask) {
  psamask_forward_impl(psa_type, input, output, num_, h_feature, w_feature,
                       h_mask, w_mask, half_h_mask, half_w_mask);
}

void psamask_backward(Tensor grad_output, const Tensor grad_input,
                      const int psa_type, const int num_, const int h_feature,
                      const int w_feature, const int h_mask, const int w_mask,
                      const int half_h_mask, const int half_w_mask) {
  psamask_backward_impl(psa_type, grad_output, grad_input, num_, h_feature,
                        w_feature, h_mask, w_mask, half_h_mask, half_w_mask);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/psamask_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "psamask_pytorch.h"
using namespace parrots;

#ifdef MMCV_WITH_CUDA
void psamask_forward_cuda_parrots(CudaContext &ctx, const SSElement &attr,
                                  const OperatorBase::in_list_t &ins,
                                  OperatorBase::out_list_t &outs) {
  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
      half_w_mask;
  SSAttrs(attr)
      .get<int>("psa_type", psa_type)
      .get<int>("num_", num_)
      .get<int>("h_feature", h_feature)
      .get<int>("w_feature", w_feature)
      .get<int>("h_mask", h_mask)
      .get<int>("w_mask", w_mask)
      .get<int>("half_h_mask", half_h_mask)
      .get<int>("half_w_mask", half_w_mask)
      .done();
  const auto &input = buildATensor(ctx, ins[0]);
  auto output = buildATensor(ctx, outs[0]);
  psamask_forward_cuda(psa_type, input, output, num_, h_feature, w_feature,
                       h_mask, w_mask, half_h_mask, half_w_mask);
}

void psamask_backward_cuda_parrots(CudaContext &ctx, const SSElement &attr,
                                   const OperatorBase::in_list_t &ins,
                                   OperatorBase::out_list_t &outs) {
  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
      half_w_mask;
  SSAttrs(attr)
      .get<int>("psa_type", psa_type)
      .get<int>("num_", num_)
      .get<int>("h_feature", h_feature)
      .get<int>("w_feature", w_feature)
      .get<int>("h_mask", h_mask)
      .get<int>("w_mask", w_mask)
      .get<int>("half_h_mask", half_h_mask)
      .get<int>("half_w_mask", half_w_mask)
      .done();

  const auto &grad_output = buildATensor(ctx, ins[0]);
  auto grad_input = buildATensor(ctx, outs[0]);
  psamask_backward_cuda(psa_type, grad_output, grad_input, num_, h_feature,
                        w_feature, h_mask, w_mask, half_h_mask, half_w_mask);
}
#endif

void psamask_forward_cpu_parrots(HostContext &ctx, const SSElement &attr,
                                 const OperatorBase::in_list_t &ins,
                                 OperatorBase::out_list_t &outs) {
  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
      half_w_mask;
  SSAttrs(attr)
      .get<int>("psa_type", psa_type)
      .get<int>("num_", num_)
      .get<int>("h_feature", h_feature)
      .get<int>("w_feature", w_feature)
      .get<int>("h_mask", h_mask)
      .get<int>("w_mask", w_mask)
      .get<int>("half_h_mask", half_h_mask)
      .get<int>("half_w_mask", half_w_mask)
      .done();
  const auto &input = buildATensor(ctx, ins[0]);
  auto output = buildATensor(ctx, outs[0]);
  psamask_forward_cpu(psa_type, input, output, num_, h_feature, w_feature,
                      h_mask, w_mask, half_h_mask, half_w_mask);
}

void psamask_backward_cpu_parrots(HostContext &ctx, const SSElement &attr,
                                  const OperatorBase::in_list_t &ins,
                                  OperatorBase::out_list_t &outs) {
  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
      half_w_mask;
  SSAttrs(attr)
      .get<int>("psa_type", psa_type)
      .get<int>("num_", num_)
      .get<int>("h_feature", h_feature)
      .get<int>("w_feature", w_feature)
      .get<int>("h_mask", h_mask)
      .get<int>("w_mask", w_mask)
      .get<int>("half_h_mask", half_h_mask)
      .get<int>("half_w_mask", half_w_mask)
      .done();

  const auto &grad_output = buildATensor(ctx, ins[0]);
  auto grad_input = buildATensor(ctx, outs[0]);
  psamask_backward_cpu(psa_type, grad_output, grad_input, num_, h_feature,
                       w_feature, h_mask, w_mask, half_h_mask, half_w_mask);
}

PARROTS_EXTENSION_REGISTER(psamask_forward)
    .attr("psa_type")
    .attr("num_")
    .attr("h_feature")
    .attr("w_feature")
    .attr("h_mask")
    .attr("w_mask")
    .attr("half_h_mask")
    .attr("half_w_mask")
    .input(1)
    .output(1)
    .apply(psamask_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(psamask_forward_cuda_parrots)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(psamask_backward)
    .attr("psa_type")
    .attr("num_")
    .attr("h_feature")
    .attr("w_feature")
    .attr("h_mask")
    .attr("w_mask")
    .attr("half_h_mask")
    .attr("half_w_mask")
    .input(1)
    .output(1)
    .apply(psamask_backward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(psamask_backward_cuda_parrots)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/psamask_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef PSAMASK_PYTORCH_H
#define PSAMASK_PYTORCH_H
#include <torch/extension.h>
using namespace at;

#ifdef MMCV_WITH_CUDA
void psamask_forward_cuda(const int psa_type, const Tensor input, Tensor output,
                          const int num_, const int h_feature,
                          const int w_feature, const int h_mask,
                          const int w_mask, const int half_h_mask,
                          const int half_w_mask);

void psamask_backward_cuda(const int psa_type, const Tensor grad_output,
                           Tensor grad_input, const int num_,
                           const int h_feature, const int w_feature,
                           const int h_mask, const int w_mask,
                           const int half_h_mask, const int half_w_mask);
#endif
void psamask_forward_cpu(const int psa_type, const Tensor input, Tensor output,
                         const int num_, const int h_feature,
                         const int w_feature, const int h_mask,
                         const int w_mask, const int half_h_mask,
                         const int half_w_mask);

void psamask_backward_cpu(const int psa_type, const Tensor grad_output,
                          Tensor grad_input, const int num_,
                          const int h_feature, const int w_feature,
                          const int h_mask, const int w_mask,
                          const int half_h_mask, const int half_w_mask);
#endif  // PSAMASK_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/riroi_align_rotated.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void riroi_align_rotated_forward_impl(Tensor features, Tensor rois,
                                      Tensor output, int pooled_height,
                                      int pooled_width, float spatial_scale,
                                      int num_samples, int num_orientations,
                                      bool clockwise) {
  DISPATCH_DEVICE_IMPL(riroi_align_rotated_forward_impl, features, rois, output,
                       pooled_height, pooled_width, spatial_scale, num_samples,
                       num_orientations, clockwise);
}

void riroi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
                                       Tensor bottom_grad, int pooled_height,
                                       int pooled_width, float spatial_scale,
                                       int num_samples, int num_orientations,
                                       bool clockwise) {
  DISPATCH_DEVICE_IMPL(riroi_align_rotated_backward_impl, top_grad, rois,
                       bottom_grad, pooled_height, pooled_width, spatial_scale,
                       num_samples, num_orientations, clockwise);
}

void riroi_align_rotated_forward(Tensor features, Tensor rois, Tensor output,
                                 int pooled_height, int pooled_width,
                                 float spatial_scale, int num_samples,
                                 int num_orientations, bool clockwise) {
  riroi_align_rotated_forward_impl(features, rois, output, pooled_height,
                                   pooled_width, spatial_scale, num_samples,
                                   num_orientations, clockwise);
}

void riroi_align_rotated_backward(Tensor top_grad, Tensor rois,
                                  Tensor bottom_grad, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int num_samples, int num_orientations,
                                  bool clockwise) {
  riroi_align_rotated_backward_impl(top_grad, rois, bottom_grad, pooled_height,
                                    pooled_width, spatial_scale, num_samples,
                                    num_orientations, clockwise);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/riroi_align_rotated_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "riroi_align_rotated_pytorch.h"
using namespace parrots;

#ifdef MMCV_WITH_CUDA
void riroi_align_rotated_forward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int pooled_height;
  int pooled_width;
  float spatial_scale;
  int sample_num;
  int num_orientations;
  bool clockwise;
  SSAttrs(attr)
      .get<int>("pooled_height", pooled_height)
      .get<int>("pooled_width", pooled_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("num_samples", sample_num)
      .get<int>("num_orientations", num_orientations)
      .get<bool>("clockwise", clockwise)
      .done();

  auto input = buildATensor(ctx, ins[0]);
  auto rois = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  riroi_align_rotated_forward(input, rois, output, pooled_height, pooled_width,
                              spatial_scale, sample_num, num_orientations,
                              clockwise);
}

void riroi_align_rotated_backward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  int pooled_height;
  int pooled_width;
  float spatial_scale;
  int sample_num;
  int num_orientations;
  bool clockwise;
  SSAttrs(attr)
      .get<int>("pooled_height", pooled_height)
      .get<int>("pooled_width", pooled_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("num_samples", sample_num)
      .get<int>("num_orientations", num_orientations)
      .get<bool>("clockwise", clockwise)
      .done();

  auto grad_output = buildATensor(ctx, ins[0]);
  auto rois = buildATensor(ctx, ins[1]);
  auto grad_input = buildATensor(ctx, outs[0]);
  riroi_align_rotated_backward(grad_output, rois, grad_input, pooled_height,
                               pooled_width, spatial_scale, sample_num,
                               num_orientations, clockwise);
}

PARROTS_EXTENSION_REGISTER(riroi_align_rotated_forward)
    .attr("pooled_height")
    .attr("pooled_width")
    .attr("spatial_scale")
    .attr("num_samples")
    .attr("num_orientations")
    .attr("clockwise")
    .input(2)
    .output(1)
    .apply(riroi_align_rotated_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(riroi_align_rotated_backward)
    .attr("pooled_height")
    .attr("pooled_width")
    .attr("spatial_scale")
    .attr("num_samples")
    .attr("num_orientations")
    .attr("clockwise")
    .input(2)
    .output(1)
    .apply(riroi_align_rotated_backward_cuda_parrots)
    .done();

#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/riroi_align_rotated_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef RIROI_ALIGN_ROTATED_PYTORCH_H
#define RIROI_ALIGN_ROTATED_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void riroi_align_rotated_forward(Tensor features, Tensor rois, Tensor output,
                                 int pooled_height, int pooled_width,
                                 float spatial_scale, int num_samples,
                                 int num_orientations, bool clockwise);

void riroi_align_rotated_backward(Tensor top_grad, Tensor rois,
                                  Tensor bottom_grad, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int num_samples, int num_orientations,
                                  bool clockwise);

#endif  // RIROI_ALIGN_ROTATED_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned) {
  DISPATCH_DEVICE_IMPL(roi_align_forward_impl, input, rois, output, argmax_y,
                       argmax_x, aligned_height, aligned_width, spatial_scale,
                       sampling_ratio, pool_mode, aligned);
}

void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
                             Tensor argmax_x, Tensor grad_input,
                             int aligned_height, int aligned_width,
                             float spatial_scale, int sampling_ratio,
                             int pool_mode, bool aligned) {
  DISPATCH_DEVICE_IMPL(roi_align_backward_impl, grad_output, rois, argmax_y,
                       argmax_x, grad_input, aligned_height, aligned_width,
                       spatial_scale, sampling_ratio, pool_mode, aligned);
}

void roi_align_forward(Tensor input, Tensor rois, Tensor output,
                       Tensor argmax_y, Tensor argmax_x, int aligned_height,
                       int aligned_width, float spatial_scale,
                       int sampling_ratio, int pool_mode, bool aligned) {
  roi_align_forward_impl(input, rois, output, argmax_y, argmax_x,
                         aligned_height, aligned_width, spatial_scale,
                         sampling_ratio, pool_mode, aligned);
}

void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y,
                        Tensor argmax_x, Tensor grad_input, int aligned_height,
                        int aligned_width, float spatial_scale,
                        int sampling_ratio, int pool_mode, bool aligned) {
  roi_align_backward_impl(grad_output, rois, argmax_y, argmax_x, grad_input,
                          aligned_height, aligned_width, spatial_scale,
                          sampling_ratio, pool_mode, aligned);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "roi_align_pytorch.h"
using namespace parrots;

#ifdef MMCV_WITH_CUDA
void roi_align_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                    const OperatorBase::in_list_t& ins,
                                    OperatorBase::out_list_t& outs) {
  int aligned_height;
  int aligned_width;
  float spatial_scale;
  int sampling_ratio;
  int pool_mode;
  bool aligned;
  SSAttrs(attr)
      .get<int>("aligned_height", aligned_height)
      .get<int>("aligned_width", aligned_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("sampling_ratio", sampling_ratio)
      .get<int>("pool_mode", pool_mode)
      .get<bool>("aligned", aligned)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  auto argmax_y = buildATensor(ctx, outs[1]);
  auto argmax_x = buildATensor(ctx, outs[2]);
  roi_align_forward_cuda(input, rois, output, argmax_y, argmax_x,
                         aligned_height, aligned_width, spatial_scale,
                         sampling_ratio, pool_mode, aligned);
}

void roi_align_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                     const OperatorBase::in_list_t& ins,
                                     OperatorBase::out_list_t& outs) {
  int aligned_height;
  int aligned_width;
  float spatial_scale;
  int sampling_ratio;
  int pool_mode;
  bool aligned;
  SSAttrs(attr)
      .get<int>("aligned_height", aligned_height)
      .get<int>("aligned_width", aligned_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("sampling_ratio", sampling_ratio)
      .get<int>("pool_mode", pool_mode)
      .get<bool>("aligned", aligned)
      .done();

  const auto& grad_output = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  const auto& argmax_y = buildATensor(ctx, ins[2]);
  const auto& argmax_x = buildATensor(ctx, ins[3]);
  auto grad_input = buildATensor(ctx, outs[0]);
  roi_align_backward_cuda(grad_output, rois, argmax_y, argmax_x, grad_input,
                          aligned_height, aligned_width, spatial_scale,
                          sampling_ratio, pool_mode, aligned);
}
#endif

void roi_align_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
                                   const OperatorBase::in_list_t& ins,
                                   OperatorBase::out_list_t& outs) {
  int aligned_height;
  int aligned_width;
  float spatial_scale;
  int sampling_ratio;
  int pool_mode;
  bool aligned;
  SSAttrs(attr)
      .get<int>("aligned_height", aligned_height)
      .get<int>("aligned_width", aligned_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("sampling_ratio", sampling_ratio)
      .get<int>("pool_mode", pool_mode)
      .get<bool>("aligned", aligned)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  auto argmax_y = buildATensor(ctx, outs[1]);
  auto argmax_x = buildATensor(ctx, outs[2]);
  roi_align_forward_cpu(input, rois, output, argmax_y, argmax_x, aligned_height,
                        aligned_width, spatial_scale, sampling_ratio, pool_mode,
                        aligned);
}

void roi_align_backward_cpu_parrots(HostContext& ctx, const SSElement& attr,
                                    const OperatorBase::in_list_t& ins,
                                    OperatorBase::out_list_t& outs) {
  int aligned_height;
  int aligned_width;
  float spatial_scale;
  int sampling_ratio;
  int pool_mode;
  bool aligned;
  SSAttrs(attr)
      .get<int>("aligned_height", aligned_height)
      .get<int>("aligned_width", aligned_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("sampling_ratio", sampling_ratio)
      .get<int>("pool_mode", pool_mode)
      .get<bool>("aligned", aligned)
      .done();

  const auto& grad_output = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  const auto& argmax_y = buildATensor(ctx, ins[2]);
  const auto& argmax_x = buildATensor(ctx, ins[3]);
  auto grad_input = buildATensor(ctx, outs[0]);
  roi_align_backward_cpu(grad_output, rois, argmax_y, argmax_x, grad_input,
                         aligned_height, aligned_width, spatial_scale,
                         sampling_ratio, pool_mode, aligned);
}

PARROTS_EXTENSION_REGISTER(roi_align_forward)
    .attr("aligned_height")
    .attr("aligned_width")
    .attr("spatial_scale")
    .attr("sampling_ratio")
    .attr("pool_mode")
    .attr("aligned")
    .input(2)
    .output(3)
    .apply(roi_align_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(roi_align_forward_cuda_parrots)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(roi_align_backward)
    .attr("aligned_height")
    .attr("aligned_width")
    .attr("spatial_scale")
    .attr("sampling_ratio")
    .attr("pool_mode")
    .attr("aligned")
    .input(4)
    .output(1)
    .apply(roi_align_backward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(roi_align_backward_cuda_parrots)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROI_ALIGN_PYTORCH_H
#define ROI_ALIGN_PYTORCH_H
#include <torch/extension.h>
using namespace at;

#ifdef MMCV_WITH_CUDA
void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned);

void roi_align_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax_y,
                             Tensor argmax_x, Tensor grad_input,
                             int aligned_height, int aligned_width,
                             float spatial_scale, int sampling_ratio,
                             int pool_mode, bool aligned);
#endif

void roi_align_forward_cpu(Tensor input, Tensor rois, Tensor output,
                           Tensor argmax_y, Tensor argmax_x, int aligned_height,
                           int aligned_width, float spatial_scale,
                           int sampling_ratio, int pool_mode, bool aligned);

void roi_align_backward_cpu(Tensor grad_output, Tensor rois, Tensor argmax_y,
                            Tensor argmax_x, Tensor grad_input,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned);

#endif  // ROI_ALIGN_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align_rotated.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output,
                                    int aligned_height, int aligned_width,
                                    float spatial_scale, int sample_ratio,
                                    bool aligned, bool clockwise) {
  DISPATCH_DEVICE_IMPL(roi_align_rotated_forward_impl, features, rois, output,
                       aligned_height, aligned_width, spatial_scale,
                       sample_ratio, aligned, clockwise);
}

void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
                                     Tensor bottom_grad, int aligned_height,
                                     int aligned_width, float spatial_scale,
                                     int sample_ratio, bool aligned,
                                     bool clockwise) {
  DISPATCH_DEVICE_IMPL(roi_align_rotated_backward_impl, top_grad, rois,
                       bottom_grad, aligned_height, aligned_width,
                       spatial_scale, sample_ratio, aligned, clockwise);
}

void roi_align_rotated_forward(Tensor input, Tensor rois, Tensor output,
                               int aligned_height, int aligned_width,
                               float spatial_scale, int sampling_ratio,
                               bool aligned, bool clockwise) {
  roi_align_rotated_forward_impl(input, rois, output, aligned_height,
                                 aligned_width, spatial_scale, sampling_ratio,
                                 aligned, clockwise);
}

void roi_align_rotated_backward(Tensor top_grad, Tensor rois,
                                Tensor bottom_grad, int aligned_height,
                                int aligned_width, float spatial_scale,
                                int sampling_ratio, bool aligned,
                                bool clockwise) {
  roi_align_rotated_backward_impl(top_grad, rois, bottom_grad, aligned_height,
                                  aligned_width, spatial_scale, sampling_ratio,
                                  aligned, clockwise);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align_rotated_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "roi_align_rotated_pytorch.h"
using namespace parrots;

#ifdef MMCV_WITH_CUDA
void roi_align_rotated_forward_cuda_parrots(CudaContext& ctx,
                                            const SSElement& attr,
                                            const OperatorBase::in_list_t& ins,
                                            OperatorBase::out_list_t& outs) {
  int pooled_height;
  int pooled_width;
  float spatial_scale;
  int sample_num;
  bool aligned;
  bool clockwise;
  SSAttrs(attr)
      .get<int>("pooled_height", pooled_height)
      .get<int>("pooled_width", pooled_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("sample_num", sample_num)
      .get<bool>("aligned", aligned)
      .get<bool>("clockwise", clockwise)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  roi_align_rotated_forward_cuda(input, rois, output, pooled_height,
                                 pooled_width, spatial_scale, sample_num,
                                 aligned, clockwise);
}

void roi_align_rotated_backward_cuda_parrots(CudaContext& ctx,
                                             const SSElement& attr,
                                             const OperatorBase::in_list_t& ins,
                                             OperatorBase::out_list_t& outs) {
  int pooled_height;
  int pooled_width;
  float spatial_scale;
  int sample_num;
  bool aligned;
  bool clockwise;
  SSAttrs(attr)
      .get<int>("pooled_height", pooled_height)
      .get<int>("pooled_width", pooled_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("sample_num", sample_num)
      .get<bool>("aligned", aligned)
      .get<bool>("clockwise", clockwise)
      .done();

  const auto& grad_output = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  auto grad_input = buildATensor(ctx, outs[0]);
  roi_align_rotated_backward_cuda(grad_output, rois, grad_input, pooled_height,
                                  pooled_width, spatial_scale, sample_num,
                                  aligned, clockwise);
}
#endif

void roi_align_rotated_forward_cpu_parrots(HostContext& ctx,
                                           const SSElement& attr,
                                           const OperatorBase::in_list_t& ins,
                                           OperatorBase::out_list_t& outs) {
  int pooled_height;
  int pooled_width;
  float spatial_scale;
  int sample_num;
  bool aligned;
  bool clockwise;
  SSAttrs(attr)
      .get<int>("pooled_height", pooled_height)
      .get<int>("pooled_width", pooled_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("sample_num", sample_num)
      .get<bool>("aligned", aligned)
      .get<bool>("clockwise", clockwise)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  roi_align_rotated_forward_cpu(input, rois, output, pooled_height,
                                pooled_width, spatial_scale, sample_num,
                                aligned, clockwise);
}

void roi_align_rotated_backward_cpu_parrots(HostContext& ctx,
                                            const SSElement& attr,
                                            const OperatorBase::in_list_t& ins,
                                            OperatorBase::out_list_t& outs) {
  int pooled_height;
  int pooled_width;
  float spatial_scale;
  int sample_num;
  bool aligned;
  bool clockwise;
  SSAttrs(attr)
      .get<int>("pooled_height", pooled_height)
      .get<int>("pooled_width", pooled_width)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("sample_num", sample_num)
      .get<bool>("aligned", aligned)
      .get<bool>("clockwise", clockwise)
      .done();

  const auto& grad_output = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  auto grad_input = buildATensor(ctx, outs[0]);
  roi_align_rotated_backward_cpu(grad_output, rois, grad_input, pooled_height,
                                 pooled_width, spatial_scale, sample_num,
                                 aligned, clockwise);
}

PARROTS_EXTENSION_REGISTER(roi_align_rotated_forward)
    .attr("pooled_height")
    .attr("pooled_width")
    .attr("spatial_scale")
    .attr("sample_num")
    .attr("aligned")
    .attr("clockwise")
    .input(2)
    .output(1)
    .apply(roi_align_rotated_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(roi_align_rotated_forward_cuda_parrots)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(roi_align_rotated_backward)
    .attr("pooled_height")
    .attr("pooled_width")
    .attr("spatial_scale")
    .attr("sample_num")
    .attr("aligned")
    .attr("clockwise")
    .input(2)
    .output(1)
    .apply(roi_align_rotated_backward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(roi_align_rotated_backward_cuda_parrots)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_align_rotated_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROI_ALIGN_ROTATED_PYTORCH_H
#define ROI_ALIGN_ROTATED_PYTORCH_H
#include <torch/extension.h>
using namespace at;

#ifdef MMCV_WITH_CUDA
void roi_align_rotated_forward_cuda(Tensor features, Tensor rois, Tensor output,
                                    int pooled_height, int pooled_width,
                                    float spatial_scale, int sample_num,
                                    bool aligned, bool clockwise);

void roi_align_rotated_backward_cuda(Tensor grad_output, Tensor rois,
                                     Tensor bottom_grad, int pooled_height,
                                     int pooled_width, float spatial_scale,
                                     int sample_num, bool aligned,
                                     bool clockwise);
#endif

void roi_align_rotated_forward_cpu(Tensor features, Tensor rois, Tensor output,
                                   int pooled_height, int pooled_width,
                                   float spatial_scale, int sample_num,
                                   bool aligned, bool clockwise);

void roi_align_rotated_backward_cpu(Tensor grad_output, Tensor rois,
                                    Tensor bottom_grad, int pooled_height,
                                    int pooled_width, float spatial_scale,
                                    int sample_num, bool aligned,
                                    bool clockwise);

#endif  // ROI_ALIGN_ROTATED_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_pool.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
                           Tensor argmax, int pooled_height, int pooled_width,
                           float spatial_scale) {
  DISPATCH_DEVICE_IMPL(roi_pool_forward_impl, input, rois, output, argmax,
                       pooled_height, pooled_width, spatial_scale);
}

void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax,
                            Tensor grad_input, int pooled_height,
                            int pooled_width, float spatial_scale) {
  DISPATCH_DEVICE_IMPL(roi_pool_backward_impl, grad_output, rois, argmax,
                       grad_input, pooled_height, pooled_width, spatial_scale);
}

void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax,
                      int pooled_height, int pooled_width,
                      float spatial_scale) {
  roi_pool_forward_impl(input, rois, output, argmax, pooled_height,
                        pooled_width, spatial_scale);
}

void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax,
                       Tensor grad_input, int pooled_height, int pooled_width,
                       float spatial_scale) {
  roi_pool_backward_impl(grad_output, rois, argmax, grad_input, pooled_height,
                         pooled_width, spatial_scale);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_pool_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "roi_pool_pytorch.h"
using namespace parrots;

#ifdef MMCV_WITH_CUDA
void roi_pool_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                   const OperatorBase::in_list_t& ins,
                                   OperatorBase::out_list_t& outs) {
  int pooled_height;
  int pooled_width;
  float spatial_scale;
  SSAttrs(attr)
      .get<int>("pooled_height", pooled_height)
      .get<int>("pooled_width", pooled_width)
      .get<float>("spatial_scale", spatial_scale)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  auto argmax = buildATensor(ctx, outs[1]);
  roi_pool_forward_cuda(input, rois, output, argmax, pooled_height,
                        pooled_width, spatial_scale);
}

void roi_pool_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                    const OperatorBase::in_list_t& ins,
                                    OperatorBase::out_list_t& outs) {
  int pooled_height;
  int pooled_width;
  float spatial_scale;
  SSAttrs(attr)
      .get<int>("pooled_height", pooled_height)
      .get<int>("pooled_width", pooled_width)
      .get<float>("spatial_scale", spatial_scale)
      .done();

  const auto& grad_output = buildATensor(ctx, ins[0]);
  const auto& rois = buildATensor(ctx, ins[1]);
  const auto& argmax = buildATensor(ctx, ins[2]);
  auto grad_input = buildATensor(ctx, outs[0]);
  roi_pool_backward_cuda(grad_output, rois, argmax, grad_input, pooled_height,
                         pooled_width, spatial_scale);
}

PARROTS_EXTENSION_REGISTER(roi_pool_forward)
    .attr("pooled_height")
    .attr("pooled_width")
    .attr("spatial_scale")
    .input(2)
    .output(2)
    .apply(roi_pool_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(roi_pool_backward)
    .attr("pooled_height")
    .attr("pooled_width")
    .attr("spatial_scale")
    .input(3)
    .output(1)
    .apply(roi_pool_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roi_pool_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROI_POOL_PYTORCH_H
#define ROI_POOL_PYTORCH_H
#include <torch/extension.h>
using namespace at;

#ifdef MMCV_WITH_CUDA
void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output,
                           Tensor argmax, int pooled_height, int pooled_width,
                           float spatial_scale);

void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax,
                            Tensor grad_input, int pooled_height,
                            int pooled_width, float spatial_scale);
#endif
#endif  // ROI_POOL_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roiaware_pool3d.cpp
================================================
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels,
                                  int max_pts_each_voxel, int out_x, int out_y,
                                  int out_z, const Tensor rois,
                                  const Tensor pts, const Tensor pts_feature,
                                  Tensor argmax, Tensor pts_idx_of_voxels,
                                  Tensor pooled_features, int pool_method) {
  DISPATCH_DEVICE_IMPL(roiaware_pool3d_forward_impl, boxes_num, pts_num,
                       channels, max_pts_each_voxel, out_x, out_y, out_z, rois,
                       pts, pts_feature, argmax, pts_idx_of_voxels,
                       pooled_features, pool_method);
}

void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y,
                                   int out_z, int channels,
                                   int max_pts_each_voxel,
                                   const Tensor pts_idx_of_voxels,
                                   const Tensor argmax, const Tensor grad_out,
                                   Tensor grad_in, int pool_method) {
  DISPATCH_DEVICE_IMPL(roiaware_pool3d_backward_impl, boxes_num, out_x, out_y,
                       out_z, channels, max_pts_each_voxel, pts_idx_of_voxels,
                       argmax, grad_out, grad_in, pool_method);
}

void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature,
                             Tensor argmax, Tensor pts_idx_of_voxels,
                             Tensor pooled_features, int pool_method) {
  // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, ry] in LiDAR
  // coordinate
  // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
  // params pts_feature: (npoints, C)
  // params argmax: (N, out_x, out_y, out_z, C)
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
  // params pooled_features: (N, out_x, out_y, out_z, C)
  // params pool_method: 0: max_pool 1: avg_pool
  int boxes_num = rois.size(0);
  int pts_num = pts.size(0);
  int channels = pts_feature.size(1);
  int max_pts_each_voxel = pts_idx_of_voxels.size(4);  // index 0 is the counter
  int out_x = pts_idx_of_voxels.size(1);
  int out_y = pts_idx_of_voxels.size(2);
  int out_z = pts_idx_of_voxels.size(3);
  assert((out_x < 256) && (out_y < 256) &&
         (out_z < 256));  // we encode index with 8bit

  roiaware_pool3d_forward_impl(boxes_num, pts_num, channels, max_pts_each_voxel,
                               out_x, out_y, out_z, rois, pts, pts_feature,
                               argmax, pts_idx_of_voxels, pooled_features,
                               pool_method);
}

void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax,
                              Tensor grad_out, Tensor grad_in,
                              int pool_method) {
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
  // params argmax: (N, out_x, out_y, out_z, C)
  // params grad_out: (N, out_x, out_y, out_z, C)
  // params grad_in: (npoints, C), return value
  // params pool_method: 0: max_pool 1: avg_pool
  int boxes_num = pts_idx_of_voxels.size(0);
  int out_x = pts_idx_of_voxels.size(1);
  int out_y = pts_idx_of_voxels.size(2);
  int out_z = pts_idx_of_voxels.size(3);
  int max_pts_each_voxel = pts_idx_of_voxels.size(4);  // index 0 is the counter
  int channels = grad_out.size(4);

  roiaware_pool3d_backward_impl(boxes_num, out_x, out_y, out_z, channels,
                                max_pts_each_voxel, pts_idx_of_voxels, argmax,
                                grad_out, grad_in, pool_method);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roiaware_pool3d_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "roiaware_pool3d_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void roiaware_pool3d_forward_cuda_parrots(CudaContext& ctx,
                                          const SSElement& attr,
                                          const OperatorBase::in_list_t& ins,
                                          OperatorBase::out_list_t& outs) {
  int pool_method;
  SSAttrs(attr).get<int>("pool_method", pool_method).done();
  auto rois = buildATensor(ctx, ins[0]);
  auto pts = buildATensor(ctx, ins[1]);
  auto pts_feature = buildATensor(ctx, ins[2]);

  auto argmax = buildATensor(ctx, outs[0]);
  auto pts_idx_of_voxels = buildATensor(ctx, outs[1]);
  auto pooled_features = buildATensor(ctx, outs[2]);

  roiaware_pool3d_forward(rois, pts, pts_feature, argmax, pts_idx_of_voxels,
                          pooled_features, pool_method);
}

void roiaware_pool3d_backward_cuda_parrots(CudaContext& ctx,
                                           const SSElement& attr,
                                           const OperatorBase::in_list_t& ins,
                                           OperatorBase::out_list_t& outs) {
  int pool_method;
  SSAttrs(attr).get<int>("pool_method", pool_method).done();
  auto pts_idx_of_voxels = buildATensor(ctx, ins[0]);
  auto argmax = buildATensor(ctx, ins[1]);
  auto grad_out = buildATensor(ctx, ins[2]);

  auto grad_in = buildATensor(ctx, outs[0]);

  roiaware_pool3d_backward(pts_idx_of_voxels, argmax, grad_out, grad_in,
                           pool_method);
}

PARROTS_EXTENSION_REGISTER(roiaware_pool3d_forward)
    .attr("pool_method")
    .input(3)
    .output(3)
    .apply(roiaware_pool3d_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(roiaware_pool3d_backward)
    .attr("pool_method")
    .input(3)
    .output(1)
    .apply(roiaware_pool3d_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roiaware_pool3d_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROIAWARE_POOL3D_PYTORCH_H
#define ROIAWARE_POOL3D_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature,
                             Tensor argmax, Tensor pts_idx_of_voxels,
                             Tensor pooled_features, int pool_method);

void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax,
                              Tensor grad_out, Tensor grad_in, int pool_method);

#endif  // ROIAWARE_POOL3D_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roipoint_pool3d.cpp
================================================
/*
Modified from
https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d.cpp
Point cloud feature pooling
Written by Shaoshuai Shi
All Rights Reserved 2018.
*/

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num,
                                  int feature_in_len, int sampled_pts_num,
                                  const Tensor xyz, const Tensor boxes3d,
                                  const Tensor pts_feature,
                                  Tensor pooled_features,
                                  Tensor pooled_empty_flag) {
  DISPATCH_DEVICE_IMPL(roipoint_pool3d_forward_impl, batch_size, pts_num,
                       boxes_num, feature_in_len, sampled_pts_num, xyz, boxes3d,
                       pts_feature, pooled_features, pooled_empty_flag);
}

void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
                             Tensor pooled_features, Tensor pooled_empty_flag) {
  // params xyz: (B, N, 3)
  // params boxes3d: (B, M, 7)
  // params pts_feature: (B, N, C)
  // params pooled_features: (B, M, 512, 3+C)
  // params pooled_empty_flag: (B, M)
  int batch_size = xyz.size(0);
  int pts_num = xyz.size(1);
  int boxes_num = boxes3d.size(1);
  int feature_in_len = pts_feature.size(2);
  int sampled_pts_num = pooled_features.size(2);

  roipoint_pool3d_forward_impl(batch_size, pts_num, boxes_num, feature_in_len,
                               sampled_pts_num, xyz, boxes3d, pts_feature,
                               pooled_features, pooled_empty_flag);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roipoint_pool3d_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "roipoint_pool3d_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void roipoint_pool3d_forward_cuda_parrots(CudaContext& ctx,
                                          const SSElement& attr,
                                          const OperatorBase::in_list_t& ins,
                                          OperatorBase::out_list_t& outs) {
  auto xyz = buildATensor(ctx, ins[0]);
  auto boxes3d = buildATensor(ctx, ins[1]);
  auto pts_feature = buildATensor(ctx, ins[2]);

  auto pooled_features = buildATensor(ctx, outs[0]);
  auto pooled_empty_flag = buildATensor(ctx, outs[1]);

  roipoint_pool3d_forward(xyz, boxes3d, pts_feature, pooled_features,
                          pooled_empty_flag);
}

PARROTS_EXTENSION_REGISTER(roipoint_pool3d_forward)
    .input(3)
    .output(2)
    .apply(roipoint_pool3d_forward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/roipoint_pool3d_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROIPOINT_POOL3D_PYTORCH_H
#define ROIPOINT_POOL3D_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
                             Tensor pooled_features, Tensor pooled_empty_flag);

#endif  // ROIPOINT_POOL3D_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/rotated_feature_align.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_cuda.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void rotated_feature_align_forward_impl(const Tensor features,
                                        const Tensor best_bboxes,
                                        const float spatial_scale,
                                        const int points, Tensor output) {
  DISPATCH_DEVICE_IMPL(rotated_feature_align_forward_impl, features,
                       best_bboxes, spatial_scale, points, output);
}

void rotated_feature_align_backward_impl(const Tensor top_grad,
                                         const Tensor best_bboxes,
                                         const float spatial_scale,
                                         const int points, Tensor bottom_grad) {
  DISPATCH_DEVICE_IMPL(rotated_feature_align_backward_impl, top_grad,
                       best_bboxes, spatial_scale, points, bottom_grad);
}

void rotated_feature_align_forward(const Tensor features,
                                   const Tensor best_bboxes, Tensor output,
                                   const float spatial_scale,
                                   const int points) {
  rotated_feature_align_forward_impl(features, best_bboxes, spatial_scale,
                                     points, output);
}

void rotated_feature_align_backward(const Tensor top_grad,
                                    const Tensor best_bboxes,
                                    Tensor bottom_grad,
                                    const float spatial_scale,
                                    const int points) {
  rotated_feature_align_backward_impl(top_grad, best_bboxes, spatial_scale,
                                      points, bottom_grad);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/rotated_feature_align_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "rotated_feature_align_pytorch.h"
using namespace parrots;

#ifdef MMCV_WITH_CUDA
void rotated_feature_align_forward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  float spatial_scale;
  int points;
  SSAttrs(attr)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("points", points)
      .done();

  auto features = buildATensor(ctx, ins[0]);
  auto best_bboxes = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  rotated_feature_align_forward(features, best_bboxes, output, spatial_scale,
                                points);
}

void rotated_feature_align_backward_cuda_parrots(
    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
    OperatorBase::out_list_t& outs) {
  float spatial_scale;
  int points;
  SSAttrs(attr)
      .get<float>("spatial_scale", spatial_scale)
      .get<int>("points", points)
      .done();

  auto grad_output = buildATensor(ctx, ins[0]);
  auto best_bboxes = buildATensor(ctx, ins[1]);
  auto grad_input = buildATensor(ctx, outs[0]);
  rotated_feature_align_backward(grad_output, best_bboxes, grad_input,
                                 spatial_scale, points);
}

PARROTS_EXTENSION_REGISTER(rotated_feature_align_forward)
    .attr("spatial_scale")
    .attr("points")
    .input(2)
    .output(1)
    .apply(rotated_feature_align_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(rotated_feature_align_backward)
    .attr("spatial_scale")
    .attr("points")
    .input(2)
    .output(1)
    .apply(rotated_feature_align_backward_cuda_parrots)
    .done();

#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/rotated_feature_align_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROTATED_FEATURE_ALIGN_PYTORCH_H
#define ROTATED_FEATURE_ALIGN_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void rotated_feature_align_forward(const Tensor features,
                                   const Tensor best_bboxes, Tensor output,
                                   const float spatial_scale, const int points);

void rotated_feature_align_backward(const Tensor top_grad,
                                    const Tensor best_bboxes,
                                    Tensor bottom_grad,
                                    const float spatial_scale,
                                    const int points);

#endif  // ROTATED_FEATURE_ALIGN_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/sync_bn.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void sync_bn_forward_mean_impl(const Tensor input, Tensor mean) {
  DISPATCH_DEVICE_IMPL(sync_bn_forward_mean_impl, input, mean);
}

void sync_bn_forward_var_impl(const Tensor input, const Tensor mean,
                              Tensor var) {
  DISPATCH_DEVICE_IMPL(sync_bn_forward_var_impl, input, mean, var);
}

void sync_bn_forward_output_impl(const Tensor input, const Tensor mean,
                                 const Tensor var, Tensor running_mean,
                                 Tensor running_var, const Tensor weight,
                                 const Tensor bias, Tensor norm, Tensor std,
                                 Tensor output, float eps, float momentum,
                                 int group_size) {
  DISPATCH_DEVICE_IMPL(sync_bn_forward_output_impl, input, mean, var,
                       running_mean, running_var, weight, bias, norm, std,
                       output, eps, momentum, group_size);
}

void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm,
                                 Tensor grad_weight, Tensor grad_bias) {
  DISPATCH_DEVICE_IMPL(sync_bn_backward_param_impl, grad_output, norm,
                       grad_weight, grad_bias);
}

void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight,
                                const Tensor grad_weight,
                                const Tensor grad_bias, const Tensor norm,
                                const Tensor std, Tensor grad_input) {
  DISPATCH_DEVICE_IMPL(sync_bn_backward_data_impl, grad_output, weight,
                       grad_weight, grad_bias, norm, std, grad_input);
}

void sync_bn_forward_mean(const Tensor input, Tensor mean) {
  sync_bn_forward_mean_impl(input, mean);
}

void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var) {
  sync_bn_forward_var_impl(input, mean, var);
}

void sync_bn_forward_output(const Tensor input, const Tensor mean,
                            const Tensor var, const Tensor weight,
                            const Tensor bias, Tensor running_mean,
                            Tensor running_var, Tensor norm, Tensor std,
                            Tensor output, float eps, float momentum,
                            int group_size) {
  sync_bn_forward_output_impl(input, mean, var, running_mean, running_var,
                              weight, bias, norm, std, output, eps, momentum,
                              group_size);
}

void sync_bn_backward_param(const Tensor grad_output, const Tensor norm,
                            Tensor grad_weight, Tensor grad_bias) {
  sync_bn_backward_param_impl(grad_output, norm, grad_weight, grad_bias);
}

void sync_bn_backward_data(const Tensor grad_output, const Tensor weight,
                           const Tensor grad_weight, const Tensor grad_bias,
                           const Tensor norm, const Tensor std,
                           Tensor grad_input) {
  sync_bn_backward_data_impl(grad_output, weight, grad_weight, grad_bias, norm,
                             std, grad_input);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/sync_bn_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "sync_bn_pytorch.h"
using namespace parrots;

#ifdef MMCV_WITH_CUDA
void sync_bn_forward_mean_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                       const OperatorBase::in_list_t& ins,
                                       OperatorBase::out_list_t& outs) {
  const auto& input = buildATensor(ctx, ins[0]);
  auto mean = buildATensor(ctx, outs[0]);
  sync_bn_forward_mean_cuda(input, mean);
}

void sync_bn_forward_var_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                      const OperatorBase::in_list_t& ins,
                                      OperatorBase::out_list_t& outs) {
  const auto& input = buildATensor(ctx, ins[0]);
  const auto& mean = buildATensor(ctx, ins[1]);
  auto var = buildATensor(ctx, outs[0]);
  sync_bn_forward_var_cuda(input, mean, var);
}

void sync_bn_forward_output_cuda_parrots(CudaContext& ctx,
                                         const SSElement& attr,
                                         const OperatorBase::in_list_t& ins,
                                         OperatorBase::out_list_t& outs) {
  size_t group_size;
  float eps, momentum;
  SSAttrs(attr)
      .get<float>("eps", eps)
      .get<float>("momentum", momentum)
      .get<size_t>("group_size", group_size)
      .done();

  const auto& input = buildATensor(ctx, ins[0]);
  const auto& mean = buildATensor(ctx, ins[1]);
  const auto& var = buildATensor(ctx, ins[2]);
  const auto& weight = buildATensor(ctx, ins[3]);
  const auto& bias = buildATensor(ctx, ins[4]);
  auto running_mean = buildATensor(ctx, outs[0]);
  auto running_var = buildATensor(ctx, outs[1]);
  auto norm = buildATensor(ctx, outs[2]);
  auto std = buildATensor(ctx, outs[3]);
  auto output = buildATensor(ctx, outs[4]);
  sync_bn_forward_output_cuda(input, mean, var, running_mean, running_var,
                              weight, bias, norm, std, output, eps, momentum,
                              group_size);
}

void sync_bn_backward_param_cuda_parrots(CudaContext& ctx,
                                         const SSElement& attr,
                                         const OperatorBase::in_list_t& ins,
                                         OperatorBase::out_list_t& outs) {
  const auto& grad_output = buildATensor(ctx, ins[0]);
  const auto& norm = buildATensor(ctx, ins[1]);
  auto grad_weight = buildATensor(ctx, outs[0]);
  auto grad_bias = buildATensor(ctx, outs[1]);
  sync_bn_backward_param_cuda(grad_output, norm, grad_weight, grad_bias);
}

void sync_bn_backward_data_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                        const OperatorBase::in_list_t& ins,
                                        OperatorBase::out_list_t& outs) {
  const auto& grad_output = buildATensor(ctx, ins[0]);
  const auto& weight = buildATensor(ctx, ins[1]);
  const auto& grad_weight = buildATensor(ctx, ins[2]);
  const auto& grad_bias = buildATensor(ctx, ins[3]);
  const auto& norm = buildATensor(ctx, ins[4]);
  const auto& std = buildATensor(ctx, ins[5]);
  auto grad_input = buildATensor(ctx, outs[0]);
  sync_bn_backward_data_cuda(grad_output, weight, grad_weight, grad_bias, norm,
                             std, grad_input);
}

PARROTS_EXTENSION_REGISTER(sync_bn_forward_mean)
    .input(1)
    .output(1)
    .apply(sync_bn_forward_mean_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(sync_bn_forward_var)
    .input(2)
    .output(1)
    .apply(sync_bn_forward_var_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(sync_bn_forward_output)
    .attr("eps")
    .attr("momentum")
    .attr("group_size")
    .input(5)
    .output(5)
    .apply(sync_bn_forward_output_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(sync_bn_backward_param)
    .input(2)
    .output(2)
    .apply(sync_bn_backward_param_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(sync_bn_backward_data)
    .input(6)
    .output(1)
    .apply(sync_bn_backward_data_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/sync_bn_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef SYNC_BN_PYTORCH_H
#define SYNC_BN_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean);

void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean,
                              Tensor var);

void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean,
                                 const Tensor var, Tensor running_mean,
                                 Tensor running_var, const Tensor weight,
                                 const Tensor bias, Tensor norm, Tensor std,
                                 Tensor output, float eps, float momentum,
                                 int group_size);

void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm,
                                 Tensor grad_weight, Tensor grad_bias);

void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight,
                                const Tensor grad_weight,
                                const Tensor grad_bias, const Tensor norm,
                                const Tensor std, Tensor grad_input);
#endif  // SYNC_BN_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_interpolate.cpp
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void three_interpolate_forward_impl(int b, int c, int m, int n,
                                    const Tensor points, const Tensor idx,
                                    const Tensor weight, Tensor out) {
  DISPATCH_DEVICE_IMPL(three_interpolate_forward_impl, b, c, m, n, points, idx,
                       weight, out);
}

void three_interpolate_backward_impl(int b, int c, int n, int m,
                                     const Tensor grad_out, const Tensor idx,
                                     const Tensor weight, Tensor grad_points) {
  DISPATCH_DEVICE_IMPL(three_interpolate_backward_impl, b, c, n, m, grad_out,
                       idx, weight, grad_points);
}

void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
                               Tensor weight_tensor, Tensor out_tensor, int b,
                               int c, int m, int n) {
  three_interpolate_forward_impl(b, c, m, n, points_tensor, idx_tensor,
                                 weight_tensor, out_tensor);
}

void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                                Tensor weight_tensor, Tensor grad_points_tensor,
                                int b, int c, int n, int m) {
  three_interpolate_backward_impl(b, c, n, m, grad_out_tensor, idx_tensor,
                                  weight_tensor, grad_points_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_interpolate_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "three_interpolate_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void three_interpolate_forward_cuda_parrots(CudaContext& ctx,
                                            const SSElement& attr,
                                            const OperatorBase::in_list_t& ins,
                                            OperatorBase::out_list_t& outs) {
  int b, c, m, n;
  SSAttrs(attr)
      .get<int>("b", b)
      .get<int>("c", c)
      .get<int>("m", m)
      .get<int>("n", n)
      .done();

  auto points_tensor = buildATensor(ctx, ins[0]);
  auto idx_tensor = buildATensor(ctx, ins[1]);
  auto weight_tensor = buildATensor(ctx, ins[2]);

  auto out_tensor = buildATensor(ctx, outs[0]);

  three_interpolate_forward(points_tensor, idx_tensor, weight_tensor,
                            out_tensor, b, c, m, n);
}

void three_interpolate_backward_cuda_parrots(CudaContext& ctx,
                                             const SSElement& attr,
                                             const OperatorBase::in_list_t& ins,
                                             OperatorBase::out_list_t& outs) {
  int b, c, n, m;
  SSAttrs(attr)
      .get<int>("b", b)
      .get<int>("c", c)
      .get<int>("n", n)
      .get<int>("m", m)
      .done();

  auto grad_out_tensor = buildATensor(ctx, ins[0]);
  auto idx_tensor = buildATensor(ctx, ins[1]);
  auto weight_tensor = buildATensor(ctx, ins[2]);

  auto grad_points_tensor = buildATensor(ctx, outs[0]);

  three_interpolate_backward(grad_out_tensor, idx_tensor, weight_tensor,
                             grad_points_tensor, b, c, n, m);
}

PARROTS_EXTENSION_REGISTER(three_interpolate_forward)
    .attr("b")
    .attr("c")
    .attr("m")
    .attr("n")
    .input(3)
    .output(1)
    .apply(three_interpolate_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(three_interpolate_backward)
    .attr("b")
    .attr("c")
    .attr("n")
    .attr("m")
    .input(3)
    .output(1)
    .apply(three_interpolate_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_interpolate_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef THREE_INTERPOLATE_PYTORCH_H
#define THREE_INTERPOLATE_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
                               Tensor weight_tensor, Tensor out_tensor, int b,
                               int c, int m, int n);

void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                                Tensor weight_tensor, Tensor grad_points_tensor,
                                int b, int c, int n, int m);
#endif  // THREE_INTERPOLATE_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_nn.cpp
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void three_nn_forward_impl(int b, int n, int m, const Tensor unknown,
                           const Tensor known, Tensor dist2, Tensor idx) {
  DISPATCH_DEVICE_IMPL(three_nn_forward_impl, b, n, m, unknown, known, dist2,
                       idx);
}

void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
                      Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
                      int m) {
  three_nn_forward_impl(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
                        idx_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_nn_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "three_nn_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void three_nn_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                   const OperatorBase::in_list_t& ins,
                                   OperatorBase::out_list_t& outs) {
  int b, n, m;
  SSAttrs(attr).get<int>("b", b).get<int>("n", n).get<int>("m", m).done();

  auto unknown_tensor = buildATensor(ctx, ins[0]);
  auto known_tensor = buildATensor(ctx, ins[1]);

  auto dist2_tensor = buildATensor(ctx, outs[0]);
  auto idx_tensor = buildATensor(ctx, outs[1]);

  three_nn_forward(unknown_tensor, known_tensor, dist2_tensor, idx_tensor, b, n,
                   m);
}

PARROTS_EXTENSION_REGISTER(three_nn_forward)
    .attr("b")
    .attr("n")
    .attr("m")
    .input(2)
    .output(2)
    .apply(three_nn_forward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/three_nn_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef THREE_NN_PYTORCH_H
#define THREE_NN_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
                      Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
                      int m);
#endif  // THREE_NN_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/tin_shift.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output) {
  DISPATCH_DEVICE_IMPL(tin_shift_forward_impl, input, shift, output);
}

void tin_shift_backward_impl(Tensor grad_output, Tensor shift,
                             Tensor grad_input) {
  DISPATCH_DEVICE_IMPL(tin_shift_backward_impl, grad_output, shift, grad_input);
}

void tin_shift_forward(Tensor input, Tensor shift, Tensor output) {
  tin_shift_forward_impl(input, shift, output);
}

void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) {
  tin_shift_backward_impl(grad_output, shift, grad_input);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/tin_shift_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "tin_shift_pytorch.h"
using namespace parrots;

#ifdef MMCV_WITH_CUDA
void tin_shift_forward_cuda_parrots(CudaContext &ctx, const SSElement &attr,
                                    const OperatorBase::in_list_t &ins,
                                    OperatorBase::out_list_t &outs) {
  const auto &input = buildATensor(ctx, ins[0]);
  const auto &shift = buildATensor(ctx, ins[1]);
  auto output = buildATensor(ctx, outs[0]);
  tin_shift_forward_cuda(input, shift, output);
}

void tin_shift_backward_cuda_parrots(CudaContext &ctx, const SSElement &attr,
                                     const OperatorBase::in_list_t &ins,
                                     OperatorBase::out_list_t &outs) {
  const auto &grad_output = buildATensor(ctx, ins[0]);
  const auto &shift = buildATensor(ctx, ins[1]);
  auto grad_input = buildATensor(ctx, outs[0]);
  tin_shift_backward_cuda(grad_output, shift, grad_input);
}

PARROTS_EXTENSION_REGISTER(tin_shift_forward)
    .input(2)
    .output(1)
    .apply(tin_shift_forward_cuda_parrots)
    .done();

PARROTS_EXTENSION_REGISTER(tin_shift_backward)
    .input(2)
    .output(1)
    .apply(tin_shift_backward_cuda_parrots)
    .done();
#endif


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/tin_shift_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef TIN_SHIFT_PYTORCH_H
#define TIN_SHIFT_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output);

void tin_shift_backward_cuda(Tensor grad_output, Tensor shift,
                             Tensor grad_input);
#endif  // TIN_SHIFT_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/upfirdn2d.cpp
================================================
// Modified from
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.cpp

/*
Copyright (c) 2021, NVIDIA Corporation. All rights reserved.

NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
Augmentation (ADA)
=======================================================================

1. Definitions

"Licensor" means any person or entity that distributes its Work.

"Software" means the original work of authorship made available under
this License.

"Work" means the Software and any additions to or derivative works of
the Software that are made available under this License.

The terms "reproduce," "reproduction," "derivative works," and
"distribution" have the meaning as provided under U.S. copyright law;
provided, however, that for the purposes of this License, derivative
works shall not include works that remain separable from, or merely
link (or bind by name) to the interfaces of, the Work.

Works, including the Software, are "made available" under this License
by including in or with the Work either (a) a copyright notice
referencing the applicability of this License to the Work, or (b) a
copy of this License.

2. License Grants

    2.1 Copyright Grant. Subject to the terms and conditions of this
    License, each Licensor grants to you a perpetual, worldwide,
    non-exclusive, royalty-free, copyright license to reproduce,
    prepare derivative works of, publicly display, publicly perform,
    sublicense and distribute its Work and any resulting derivative
    works in any form.

3. Limitations

    3.1 Redistribution. You may reproduce or distribute the Work only
    if (a) you do so under this License, (b) you include a complete
    copy of this License with your distribution, and (c) you retain
    without modification any copyright, patent, trademark, or
    attribution notices that are present in the Work.

    3.2 Derivative Works. You may specify that additional or different
    terms apply to the use, reproduction, and distribution of your
    derivative works of the Work ("Your Terms") only if (a) Your Terms
    provide that the use limitation in Section 3.3 applies to your
    derivative works, and (b) you identify the specific derivative
    works that are subject to Your Terms. Notwithstanding Your Terms,
    this License (including the redistribution requirements in Section
    3.1) will continue to apply to the Work itself.

    3.3 Use Limitation. The Work and any derivative works thereof only
    may be used or intended for use non-commercially. Notwithstanding
    the foregoing, NVIDIA and its affiliates may use the Work and any
    derivative works commercially. As used herein, "non-commercially"
    means for research or evaluation purposes only.

    3.4 Patent Claims. If you bring or threaten to bring a patent claim
    against any Licensor (including any claim, cross-claim or
    counterclaim in a lawsuit) to enforce any patents that you allege
    are infringed by any Work, then your rights under this License from
    such Licensor (including the grant in Section 2.1) will terminate
    immediately.

    3.5 Trademarks. This License does not grant any rights to use any
    Licensor’s or its affiliates’ names, logos, or trademarks, except
    as necessary to reproduce the notices described in this License.

    3.6 Termination. If you violate any term of this License, then your
    rights under this License (including the grant in Section 2.1) will
    terminate immediately.

4. Disclaimer of Warranty.

THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
THIS LICENSE.

5. Limitation of Liability.

EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
THE POSSIBILITY OF SUCH DAMAGES.

=======================================================================
*/

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input,
                                const torch::Tensor& kernel, int up_x, int up_y,
                                int down_x, int down_y, int pad_x0, int pad_x1,
                                int pad_y0, int pad_y1) {
  return DISPATCH_DEVICE_IMPL(upfirdn2d_op_impl, input, kernel, up_x, up_y,
                              down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1);
}

torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel,
                        int up_x, int up_y, int down_x, int down_y, int pad_x0,
                        int pad_x1, int pad_y0, int pad_y1) {
  return upfirdn2d_op_impl(input, kernel, up_x, up_y, down_x, down_y, pad_x0,
                           pad_x1, pad_y0, pad_y1);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/upfirdn2d_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <torch/extension.h>

#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
using namespace at;
using namespace parrots;

torch::Tensor upfirdn2d(const Tensor &input, const Tensor &kernel, int up_x,
                        int up_y, int down_x, int down_y, int pad_x0,
                        int pad_x1, int pad_y0, int pad_y1);

void upfirdn2d_parrots(CudaContext &ctx, const SSElement &attr,
                       const OperatorBase::in_list_t &ins,
                       OperatorBase::out_list_t &outs) {
  int up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1;
  const auto &input = buildATensor(ctx, ins[0]);
  const auto &kernel = buildATensor(ctx, ins[1]);
  SSAttrs(attr)
      .get("up_x", up_x)
      .get("up_y", up_y)
      .get("down_x", down_x)
      .get("down_y", down_y)
      .get("pad_x0", pad_x0)
      .get("pad_x1", pad_x1)
      .get("pad_y0", pad_y0)
      .get("pad_y1", pad_y1)
      .done();
  auto out = upfirdn2d(input, kernel, up_x, up_y, down_x, down_y, pad_x0,
                       pad_x1, pad_y0, pad_y1);
  updateDArray(ctx, out, outs[0]);
}

PARROTS_EXTENSION_REGISTER(upfirdn2d)
    .attr("up_x")
    .attr("up_y")
    .attr("down_x")
    .attr("down_y")
    .attr("pad_x0")
    .attr("pad_x1")
    .attr("pad_y0")
    .attr("pad_y1")
    .input(2)
    .output(1)
    .apply(upfirdn2d_parrots)
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/voxelization.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels,
                               at::Tensor &coors,
                               at::Tensor &num_points_per_voxel,
                               const std::vector<float> voxel_size,
                               const std::vector<float> coors_range,
                               const int max_points, const int max_voxels,
                               const int NDim = 3) {
  return DISPATCH_DEVICE_IMPL(hard_voxelize_forward_impl, points, voxels, coors,
                              num_points_per_voxel, voxel_size, coors_range,
                              max_points, max_voxels, NDim);
}

void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors,
                                   const std::vector<float> voxel_size,
                                   const std::vector<float> coors_range,
                                   const int NDim = 3) {
  DISPATCH_DEVICE_IMPL(dynamic_voxelize_forward_impl, points, coors, voxel_size,
                       coors_range, NDim);
}

void hard_voxelize_forward(const at::Tensor &points,
                           const at::Tensor &voxel_size,
                           const at::Tensor &coors_range, at::Tensor &voxels,
                           at::Tensor &coors, at::Tensor &num_points_per_voxel,
                           at::Tensor &voxel_num, const int max_points,
                           const int max_voxels, const int NDim = 3) {
  int64_t *voxel_num_data = voxel_num.data_ptr<int64_t>();
  std::vector<float> voxel_size_v(
      voxel_size.data_ptr<float>(),
      voxel_size.data_ptr<float>() + voxel_size.numel());
  std::vector<float> coors_range_v(
      coors_range.data_ptr<float>(),
      coors_range.data_ptr<float>() + coors_range.numel());

  *voxel_num_data = hard_voxelize_forward_impl(
      points, voxels, coors, num_points_per_voxel, voxel_size_v, coors_range_v,
      max_points, max_voxels, NDim);
}

void dynamic_voxelize_forward(const at::Tensor &points,
                              const at::Tensor &voxel_size,
                              const at::Tensor &coors_range, at::Tensor &coors,
                              const int NDim = 3) {
  std::vector<float> voxel_size_v(
      voxel_size.data_ptr<float>(),
      voxel_size.data_ptr<float>() + voxel_size.numel());
  std::vector<float> coors_range_v(
      coors_range.data_ptr<float>(),
      coors_range.data_ptr<float>() + coors_range.numel());
  dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v,
                                NDim);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/voxelization_parrots.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>

#include "voxelization_pytorch.h"

using namespace parrots;

#ifdef MMCV_WITH_CUDA
void hard_voxelize_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
                                        const OperatorBase::in_list_t& ins,
                                        OperatorBase::out_list_t& outs) {
  int max_points, max_voxels, NDim;
  SSAttrs(attr)
      .get<int>("max_points", max_points)
      .get<int>("max_voxels", max_voxels)
      .get<int>("NDim", NDim)
      .done();
  const auto& points = buildATensor(ctx, ins[0]);
  const auto& voxel_size = buildATensor(ctx, ins[1]);
  const auto& coors_range = buildATensor(ctx, ins[2]);

  auto voxels = buildATensor(ctx, outs[0]);
  auto coors = buildATensor(ctx, outs[1]);
  auto num_points_per_voxel = buildATensor(ctx, outs[2]);
  auto voxel_num = buildATensor(ctx, outs[3]);

  hard_voxelize_forward(points, voxel_size, coors_range, voxels, coors,
                        num_points_per_voxel, voxel_num, max_points, max_voxels,
                        NDim);
}

void dynamic_voxelize_forward_cuda_parrots(CudaContext& ctx,
                                           const SSElement& attr,
                                           const OperatorBase::in_list_t& ins,
                                           OperatorBase::out_list_t& outs) {
  int NDim;
  SSAttrs(attr).get<int>("NDim", NDim).done();
  const auto& points = buildATensor(ctx, ins[0]);
  const auto& voxel_size = buildATensor(ctx, ins[1]);
  const auto& coors_range = buildATensor(ctx, ins[2]);

  auto coors = buildATensor(ctx, outs[0]);

  dynamic_voxelize_forward(points, voxel_size, coors_range, coors, NDim);
}
#endif

void hard_voxelize_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
                                       const OperatorBase::in_list_t& ins,
                                       OperatorBase::out_list_t& outs) {
  int max_points, max_voxels, NDim;
  SSAttrs(attr)
      .get<int>("max_points", max_points)
      .get<int>("max_voxels", max_voxels)
      .get<int>("NDim", NDim)
      .done();
  const auto& points = buildATensor(ctx, ins[0]);
  const auto& voxel_size = buildATensor(ctx, ins[1]);
  const auto& coors_range = buildATensor(ctx, ins[2]);

  auto voxels = buildATensor(ctx, outs[0]);
  auto coors = buildATensor(ctx, outs[1]);
  auto num_points_per_voxel = buildATensor(ctx, outs[2]);
  auto voxel_num = buildATensor(ctx, outs[3]);

  hard_voxelize_forward(points, voxel_size, coors_range, voxels, coors,
                        num_points_per_voxel, voxel_num, max_points, max_voxels,
                        NDim);
}

void dynamic_voxelize_forward_cpu_parrots(HostContext& ctx,
                                          const SSElement& attr,
                                          const OperatorBase::in_list_t& ins,
                                          OperatorBase::out_list_t& outs) {
  int NDim;
  SSAttrs(attr).get<int>("NDim", NDim).done();
  const auto& points = buildATensor(ctx, ins[0]);
  const auto& voxel_size = buildATensor(ctx, ins[1]);
  const auto& coors_range = buildATensor(ctx, ins[2]);

  auto coors = buildATensor(ctx, outs[0]);

  dynamic_voxelize_forward(points, voxel_size, coors_range, coors, NDim);
}

PARROTS_EXTENSION_REGISTER(hard_voxelize_forward)
    .attr("max_points")
    .attr("max_voxels")
    .attr("NDim")
    .input(3)
    .output(4)
    .apply(hard_voxelize_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(hard_voxelize_forward_cuda_parrots)
#endif
    .done();

PARROTS_EXTENSION_REGISTER(dynamic_voxelize_forward)
    .attr("NDim")
    .input(3)
    .output(1)
    .apply(dynamic_voxelize_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
    .apply(dynamic_voxelize_forward_cuda_parrots)
#endif
    .done();


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/parrots/voxelization_pytorch.h
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef VOXELIZATION_PYTORCH_H
#define VOXELIZATION_PYTORCH_H
#include <torch/extension.h>
using namespace at;

void hard_voxelize_forward(const at::Tensor &points,
                           const at::Tensor &voxel_size,
                           const at::Tensor &coors_range, at::Tensor &voxels,
                           at::Tensor &coors, at::Tensor &num_points_per_voxel,
                           at::Tensor &voxel_num, const int max_points,
                           const int max_voxels, const int NDim = 3);

void dynamic_voxelize_forward(const at::Tensor &points,
                              const at::Tensor &voxel_size,
                              const at::Tensor &coors_range, at::Tensor &coors,
                              const int NDim = 3);

#endif  // VOXELIZATION_PYTORCH_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/active_rotated_filter.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/ActiveRotatingFilter.h

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void active_rotated_filter_forward_impl(const Tensor input,
                                        const Tensor indices, Tensor output) {
  DISPATCH_DEVICE_IMPL(active_rotated_filter_forward_impl, input, indices,
                       output);
}

void active_rotated_filter_backward_impl(const Tensor grad_out,
                                         const Tensor indices, Tensor grad_in) {
  DISPATCH_DEVICE_IMPL(active_rotated_filter_backward_impl, grad_out, indices,
                       grad_in);
}

void active_rotated_filter_forward(const Tensor input, const Tensor indices,
                                   Tensor output) {
  active_rotated_filter_forward_impl(input, indices, output);
}

void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices,
                                    Tensor grad_in) {
  active_rotated_filter_backward_impl(grad_out, indices, grad_in);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/assign_score_withk.cpp
================================================
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
                                     int aggregate, const Tensor& points,
                                     const Tensor& centers,
                                     const Tensor& scores,
                                     const Tensor& knn_idx, Tensor& output) {
  DISPATCH_DEVICE_IMPL(assign_score_withk_forward_impl, B, N0, N1, M, K, O,
                       aggregate, points, centers, scores, knn_idx, output);
}

void assign_score_withk_backward_impl(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
    Tensor& grad_centers, Tensor& grad_scores) {
  DISPATCH_DEVICE_IMPL(assign_score_withk_backward_impl, B, N0, N1, M, K, O,
                       aggregate, grad_out, points, centers, scores, knn_idx,
                       grad_points, grad_centers, grad_scores);
}

void assign_score_withk_forward(const Tensor& points, const Tensor& centers,
                                const Tensor& scores, const Tensor& knn_idx,
                                Tensor& output, int B, int N0, int N1, int M,
                                int K, int O, int aggregate) {
  assign_score_withk_forward_impl(B, N0, N1, M, K, O, aggregate, points,
                                  centers, scores, knn_idx, output);
}

void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
                                 const Tensor& centers, const Tensor& scores,
                                 const Tensor& knn_idx, Tensor& grad_points,
                                 Tensor& grad_centers, Tensor& grad_scores,
                                 int B, int N0, int N1, int M, int K, int O,
                                 int aggregate) {
  assign_score_withk_backward_impl(B, N0, N1, M, K, O, aggregate, grad_out,
                                   points, centers, scores, knn_idx,
                                   grad_points, grad_centers, grad_scores);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/ball_query.cpp
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void ball_query_forward_impl(int b, int n, int m, float min_radius,
                             float max_radius, int nsample,
                             const Tensor new_xyz, const Tensor xyz,
                             Tensor idx) {
  DISPATCH_DEVICE_IMPL(ball_query_forward_impl, b, n, m, min_radius, max_radius,
                       nsample, new_xyz, xyz, idx);
}

void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor,
                        Tensor idx_tensor, int b, int n, int m,
                        float min_radius, float max_radius, int nsample) {
  ball_query_forward_impl(b, n, m, min_radius, max_radius, nsample,
                          new_xyz_tensor, xyz_tensor, idx_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
                        const int mode, const bool aligned, const int offset) {
  DISPATCH_DEVICE_IMPL(bbox_overlaps_impl, bboxes1, bboxes2, ious, mode,
                       aligned, offset);
}

void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
                   const int mode, const bool aligned, const int offset) {
  bbox_overlaps_impl(bboxes1, bboxes2, ious, mode, aligned, offset);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/border_align.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void border_align_forward_impl(const Tensor &input, const Tensor &boxes,
                               Tensor output, Tensor argmax_idx,
                               const int pool_size) {
  DISPATCH_DEVICE_IMPL(border_align_forward_impl, input, boxes, output,
                       argmax_idx, pool_size);
}

void border_align_backward_impl(const Tensor &grad_output, const Tensor &boxes,
                                const Tensor &argmax_idx, Tensor grad_input,
                                const int pool_size) {
  DISPATCH_DEVICE_IMPL(border_align_backward_impl, grad_output, boxes,
                       argmax_idx, grad_input, pool_size);
}

void border_align_forward(const Tensor &input, const Tensor &boxes,
                          Tensor output, Tensor argmax_idx,
                          const int pool_size) {
  border_align_forward_impl(input, boxes, output, argmax_idx, pool_size);
}

void border_align_backward(const Tensor &grad_output, const Tensor &boxes,
                           const Tensor &argmax_idx, Tensor grad_input,
                           const int pool_size) {
  border_align_backward_impl(grad_output, boxes, argmax_idx, grad_input,
                             pool_size);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/box_iou_rotated.cpp
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                          const int mode_flag, const bool aligned) {
  DISPATCH_DEVICE_IMPL(box_iou_rotated_impl, boxes1, boxes2, ious, mode_flag,
                       aligned);
}

// Interface for Python
// inline is needed to prevent multiple function definitions when this header is
// included by different cpps
void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                     const int mode_flag, const bool aligned) {
  box_iou_rotated_impl(boxes1, boxes2, ious, mode_flag, aligned);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/carafe.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures,
                         Tensor routput, Tensor rmasks, Tensor output,
                         int kernel_size, int group_size, int scale_factor) {
  DISPATCH_DEVICE_IMPL(carafe_forward_impl, features, masks, rfeatures, routput,
                       rmasks, output, kernel_size, group_size, scale_factor);
}

void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks,
                          Tensor rtop_grad, Tensor rbottom_grad_hs,
                          Tensor rbottom_grad, Tensor rmask_grad,
                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
                          int group_size, int scale_factor) {
  DISPATCH_DEVICE_IMPL(carafe_backward_impl, top_grad, rfeatures, masks,
                       rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad,
                       bottom_grad, mask_grad, kernel_size, group_size,
                       scale_factor);
}

void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures,
                    Tensor routput, Tensor rmasks, Tensor output,
                    int kernel_size, int group_size, int scale_factor) {
  carafe_forward_impl(features, masks, rfeatures, routput, rmasks, output,
                      kernel_size, group_size, scale_factor);
}

void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
                     Tensor rtop_grad, Tensor rbottom_grad_hs,
                     Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad,
                     Tensor mask_grad, int kernel_size, int group_size,
                     int scale_factor) {
  carafe_backward_impl(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs,
                       rbottom_grad, rmask_grad, bottom_grad, mask_grad,
                       kernel_size, group_size, scale_factor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/carafe_naive.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output,
                               int kernel_size, int group_size,
                               int scale_factor) {
  DISPATCH_DEVICE_IMPL(carafe_naive_forward_impl, features, masks, output,
                       kernel_size, group_size, scale_factor);
}

void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks,
                                Tensor bottom_grad, Tensor mask_grad,
                                int kernel_size, int group_size,
                                int scale_factor) {
  DISPATCH_DEVICE_IMPL(carafe_naive_backward_impl, top_grad, features, masks,
                       bottom_grad, mask_grad, kernel_size, group_size,
                       scale_factor);
}

void carafe_naive_forward(Tensor features, Tensor masks, Tensor output,
                          int kernel_size, int group_size, int scale_factor) {
  carafe_naive_forward_impl(features, masks, output, kernel_size, group_size,
                            scale_factor);
}

void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks,
                           Tensor bottom_grad, Tensor mask_grad,
                           int kernel_size, int group_size, int scale_factor) {
  carafe_naive_backward_impl(top_grad, features, masks, bottom_grad, mask_grad,
                             kernel_size, group_size, scale_factor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/contour_expand.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// It is modified from https://github.com/whai362/PSENet
#include <iostream>
#include <queue>

#include "pytorch_cpp_helper.hpp"

using namespace std;

class Point2d {
 public:
  int x;
  int y;

  Point2d() : x(0), y(0) {}
  Point2d(int _x, int _y) : x(_x), y(_y) {}
};

void kernel_dilate(const uint8_t *data, IntArrayRef data_shape,
                   const int *label_map, int &label_num, int &min_area,
                   vector<vector<int>> &text_line) {
  std::vector<int> area(label_num + 1);
  int kernel_num = data_shape[0];
  int height = data_shape[1];
  int width = data_shape[2];

  for (int x = 0; x < height; ++x) {
    for (int y = 0; y < width; ++y) {
      int label = label_map[x * width + y];
      if (label == 0) continue;
      area[label] += 1;
    }
  }

  queue<Point2d> queue, next_queue;
  for (int x = 0; x < height; ++x) {
    vector<int> row(width);
    for (int y = 0; y < width; ++y) {
      int label = label_map[x * width + y];
      if (label == 0) continue;
      if (area[label] < min_area) continue;

      Point2d point(x, y);
      queue.push(point);
      row[y] = label;
    }
    text_line.emplace_back(row);
  }

  int dx[] = {-1, 1, 0, 0};
  int dy[] = {0, 0, -1, 1};
  vector<int> kernel_step(kernel_num);
  std::for_each(kernel_step.begin(), kernel_step.end(),
                [=](int &k) { return k * height * width; });

  for (int kernel_id = kernel_num - 2; kernel_id >= 0; --kernel_id) {
    while (!queue.empty()) {
      Point2d point = queue.front();
      queue.pop();
      int x = point.x;
      int y = point.y;
      int label = text_line[x][y];

      bool is_edge = true;
      for (int d = 0; d < 4; ++d) {
        int tmp_x = x + dx[d];
        int tmp_y = y + dy[d];

        if (tmp_x < 0 || tmp_x >= height) continue;
        if (tmp_y < 0 || tmp_y >= width) continue;
        int kernel_value = data[kernel_step[kernel_id] + tmp_x * width + tmp_y];
        if (kernel_value == 0) continue;
        if (text_line[tmp_x][tmp_y] > 0) continue;

        Point2d point(tmp_x, tmp_y);
        queue.push(point);
        text_line[tmp_x][tmp_y] = label;
        is_edge = false;
      }

      if (is_edge) {
        next_queue.push(point);
      }
    }
    swap(queue, next_queue);
  }
}

std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
                                             Tensor internal_kernel_label,
                                             int min_kernel_area,
                                             int kernel_num) {
  kernel_mask = kernel_mask.contiguous();
  internal_kernel_label = internal_kernel_label.contiguous();
  assert(kernel_mask.dim() == 3);
  assert(internal_kernel_label.dim() == 2);
  assert(kernel_mask.size(1) == internal_kernel_label.size(0));
  assert(kernel_mask.size(2) == internal_kernel_label.size(1));
  CHECK_CPU_INPUT(kernel_mask);
  CHECK_CPU_INPUT(internal_kernel_label);
  auto ptr_data = kernel_mask.data_ptr<uint8_t>();
  IntArrayRef data_shape = kernel_mask.sizes();

  auto data_label_map = internal_kernel_label.data_ptr<int32_t>();
  vector<vector<int>> text_line;

  kernel_dilate(ptr_data, data_shape, data_label_map, kernel_num,
                min_kernel_area, text_line);

  return text_line;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/convex_iou.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/SDL-GuoZonghao/BeyondBoundingBox/tree/main/mmdet/ops/iou/src
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void convex_iou_impl(const Tensor pointsets, const Tensor polygons,
                     Tensor ious) {
  DISPATCH_DEVICE_IMPL(convex_iou_impl, pointsets, polygons, ious);
}

void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious) {
  convex_iou_impl(pointsets, polygons, ious);
}

void convex_giou_impl(const Tensor pointsets, const Tensor polygons,
                      Tensor output) {
  DISPATCH_DEVICE_IMPL(convex_giou_impl, pointsets, polygons, output);
}

void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output) {
  convex_giou_impl(pointsets, polygons, output);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/corner_pool.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/princeton-vl/CornerNet-Lite/tree/master/core/models/py_utils/_cpools/src
#include "pytorch_cpp_helper.hpp"

Tensor bottom_pool_forward(Tensor input) {
  // Initialize output
  Tensor output = at::zeros_like(input);
  // Get height
  int64_t height = input.size(2);
  output.copy_(input);

  for (int64_t ind = 1; ind < height; ind <<= 1) {
    Tensor max_temp = at::slice(output, 2, ind, height);
    Tensor cur_temp = at::slice(output, 2, ind, height).clone();
    Tensor next_temp = at::slice(output, 2, 0, height - ind).clone();
    at::max_out(max_temp, cur_temp, next_temp);
  }

  return output;
}

Tensor bottom_pool_backward(Tensor input, Tensor grad_output) {
  auto output = at::zeros_like(input);

  int32_t batch = input.size(0);
  int32_t channel = input.size(1);
  int32_t height = input.size(2);
  int32_t width = input.size(3);

  auto max_val = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kFloat));
  auto max_ind = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kLong));

  auto input_temp = input.select(2, 0);
  max_val.copy_(input_temp);

  max_ind.fill_(0);

  auto output_temp = output.select(2, 0);
  auto grad_output_temp = grad_output.select(2, 0);
  output_temp.copy_(grad_output_temp);

  auto un_max_ind = max_ind.unsqueeze(2);
  auto gt_mask = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kBool));
  auto max_temp = torch::zeros({batch, channel, width},
                               at::device(at::kCUDA).dtype(at::kFloat));
  for (int32_t ind = 0; ind < height - 1; ++ind) {
    input_temp = input.select(2, ind + 1);
    at::gt_out(gt_mask, input_temp, max_val);

    at::masked_select_out(max_temp, input_temp, gt_mask);
    max_val.masked_scatter_(gt_mask, max_temp);
    max_ind.masked_fill_(gt_mask, ind + 1);

    grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2);
    output.scatter_add_(2, un_max_ind, grad_output_temp);
  }

  return output;
}

Tensor left_pool_forward(Tensor input) {
  // Initialize output
  Tensor output = at::zeros_like(input);
  // Get width
  int64_t width = input.size(3);
  output.copy_(input);

  for (int64_t ind = 1; ind < width; ind <<= 1) {
    Tensor max_temp = at::slice(output, 3, 0, width - ind);
    Tensor cur_temp = at::slice(output, 3, 0, width - ind).clone();
    Tensor next_temp = at::slice(output, 3, ind, width).clone();
    at::max_out(max_temp, cur_temp, next_temp);
  }

  return output;
}

Tensor left_pool_backward(Tensor input, Tensor grad_output) {
  auto output = at::zeros_like(input);

  int32_t batch = input.size(0);
  int32_t channel = input.size(1);
  int32_t height = input.size(2);
  int32_t width = input.size(3);

  auto max_val = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kFloat));
  auto max_ind = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kLong));

  auto input_temp = input.select(3, width - 1);
  max_val.copy_(input_temp);

  max_ind.fill_(width - 1);

  auto output_temp = output.select(3, width - 1);
  auto grad_output_temp = grad_output.select(3, width - 1);
  output_temp.copy_(grad_output_temp);

  auto un_max_ind = max_ind.unsqueeze(3);
  auto gt_mask = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kBool));
  auto max_temp = torch::zeros({batch, channel, height},
                               at::device(at::kCUDA).dtype(at::kFloat));
  for (int32_t ind = 1; ind < width; ++ind) {
    input_temp = input.select(3, width - ind - 1);
    at::gt_out(gt_mask, input_temp, max_val);

    at::masked_select_out(max_temp, input_temp, gt_mask);
    max_val.masked_scatter_(gt_mask, max_temp);
    max_ind.masked_fill_(gt_mask, width - ind - 1);

    grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3);
    output.scatter_add_(3, un_max_ind, grad_output_temp);
  }

  return output;
}

Tensor right_pool_forward(Tensor input) {
  // Initialize output
  Tensor output = at::zeros_like(input);
  // Get width
  int64_t width = input.size(3);
  output.copy_(input);

  for (int64_t ind = 1; ind < width; ind <<= 1) {
    Tensor max_temp = at::slice(output, 3, ind, width);
    Tensor cur_temp = at::slice(output, 3, ind, width).clone();
    Tensor next_temp = at::slice(output, 3, 0, width - ind).clone();
    at::max_out(max_temp, cur_temp, next_temp);
  }

  return output;
}

Tensor right_pool_backward(Tensor input, Tensor grad_output) {
  Tensor output = at::zeros_like(input);

  int32_t batch = input.size(0);
  int32_t channel = input.size(1);
  int32_t height = input.size(2);
  int32_t width = input.size(3);

  auto max_val = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kFloat));
  auto max_ind = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kLong));

  auto input_temp = input.select(3, 0);
  max_val.copy_(input_temp);

  max_ind.fill_(0);

  auto output_temp = output.select(3, 0);
  auto grad_output_temp = grad_output.select(3, 0);
  output_temp.copy_(grad_output_temp);

  auto un_max_ind = max_ind.unsqueeze(3);
  auto gt_mask = torch::zeros({batch, channel, height},
                              at::device(at::kCUDA).dtype(at::kBool));
  auto max_temp = torch::zeros({batch, channel, height},
                               at::device(at::kCUDA).dtype(at::kFloat));
  for (int32_t ind = 0; ind < width - 1; ++ind) {
    input_temp = input.select(3, ind + 1);
    at::gt_out(gt_mask, input_temp, max_val);

    at::masked_select_out(max_temp, input_temp, gt_mask);
    max_val.masked_scatter_(gt_mask, max_temp);
    max_ind.masked_fill_(gt_mask, ind + 1);

    grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3);
    output.scatter_add_(3, un_max_ind, grad_output_temp);
  }

  return output;
}

Tensor top_pool_forward(Tensor input) {
  // Initialize output
  Tensor output = at::zeros_like(input);
  // Get height
  int64_t height = input.size(2);
  output.copy_(input);

  for (int64_t ind = 1; ind < height; ind <<= 1) {
    Tensor max_temp = at::slice(output, 2, 0, height - ind);
    Tensor cur_temp = at::slice(output, 2, 0, height - ind).clone();
    Tensor next_temp = at::slice(output, 2, ind, height).clone();
    at::max_out(max_temp, cur_temp, next_temp);
  }

  return output;
}

Tensor top_pool_backward(Tensor input, Tensor grad_output) {
  auto output = at::zeros_like(input);

  int32_t batch = input.size(0);
  int32_t channel = input.size(1);
  int32_t height = input.size(2);
  int32_t width = input.size(3);

  auto max_val = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kFloat));
  auto max_ind = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kLong));

  auto input_temp = input.select(2, height - 1);
  max_val.copy_(input_temp);

  max_ind.fill_(height - 1);

  auto output_temp = output.select(2, height - 1);
  auto grad_output_temp = grad_output.select(2, height - 1);
  output_temp.copy_(grad_output_temp);

  auto un_max_ind = max_ind.unsqueeze(2);
  auto gt_mask = torch::zeros({batch, channel, width},
                              at::device(at::kCUDA).dtype(at::kBool));
  auto max_temp = torch::zeros({batch, channel, width},
                               at::device(at::kCUDA).dtype(at::kFloat));
  for (int32_t ind = 1; ind < height; ++ind) {
    input_temp = input.select(2, height - ind - 1);
    at::gt_out(gt_mask, input_temp, max_val);

    at::masked_select_out(max_temp, input_temp, gt_mask);
    max_val.masked_scatter_(gt_mask, max_temp);
    max_ind.masked_fill_(gt_mask, height - ind - 1);

    grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2);
    output.scatter_add_(2, un_max_ind, grad_output_temp);
  }

  return output;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/correlation.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
#include <iostream>

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output,
                              int kH, int kW, int patchH, int patchW, int padH,
                              int padW, int dilationH, int dilationW,
                              int dilation_patchH, int dilation_patchW, int dH,
                              int dW) {
  DISPATCH_DEVICE_IMPL(correlation_forward_impl, input1, input2, output, kH, kW,
                       patchH, patchW, padH, padW, dilationH, dilationW,
                       dilation_patchH, dilation_patchW, dH, dW);
}

void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2,
                               Tensor grad_input1, Tensor grad_input2, int kH,
                               int kW, int patchH, int patchW, int padH,
                               int padW, int dilationH, int dilationW,
                               int dilation_patchH, int dilation_patchW, int dH,
                               int dW) {
  DISPATCH_DEVICE_IMPL(correlation_backward_impl, grad_output, input1, input2,
                       grad_input1, grad_input2, kH, kW, patchH, patchW, padH,
                       padW, dilationH, dilationW, dilation_patchH,
                       dilation_patchW, dH, dW);
}

void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH,
                         int kW, int patchH, int patchW, int padH, int padW,
                         int dilationH, int dilationW, int dilation_patchH,
                         int dilation_patchW, int dH, int dW) {
  correlation_forward_impl(input1, input2, output, kH, kW, patchH, patchW, padH,
                           padW, dilationH, dilationW, dilation_patchH,
                           dilation_patchW, dH, dW);
}

void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
                          Tensor grad_input1, Tensor grad_input2, int kH,
                          int kW, int patchH, int patchW, int padH, int padW,
                          int dilationH, int dilationW, int dilation_patchH,
                          int dilation_patchW, int dH, int dW) {
  correlation_backward_impl(grad_output, input1, input2, grad_input1,
                            grad_input2, kH, kW, patchH, patchW, padH, padW,
                            dilationH, dilationW, dilation_patchH,
                            dilation_patchW, dH, dW);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/active_rotated_filter.cpp
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/cpu/ActiveRotatingFilter_cpu.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

template <typename T>
void active_rotated_filter_forward_cpu_kernel(
    const T* weightData, const int* indicesData, const int num_output_planes,
    const int num_input_planes, const int num_orientations, const int kH,
    const int kW, const int num_rotations, T* outputData) {
  const int nEntry = num_orientations * kH * kW;
  int i, j, l;
  int k;

#pragma omp parallel for private(i, j, l, k)
  for (i = 0; i < num_output_planes; i++) {
    for (j = 0; j < num_input_planes; j++) {
      for (l = 0; l < nEntry; l++) {
        int weightIndex = i * num_input_planes * nEntry + j * nEntry + l;
        T val = *(weightData + weightIndex);
        for (k = 0; k < num_rotations; k++) {
          int index = (int)(*(indicesData + l * num_rotations + k)) - 1;
          T* target = outputData +
                      i * (num_rotations * num_input_planes * nEntry) +
                      k * (num_input_planes * nEntry) + j * (nEntry) + index;
          *target = val;
        }
      }
    }
  }
}

template <typename T>
void active_rotated_filter_backward_cpu_kernel(
    const T* gradOutputData, const int* indicesData,
    const int num_output_planes, const int num_input_planes,
    const int num_orientations, const int kH, const int kW,
    const int num_rotations, T* gradInputData) {
  const int nEntry = num_orientations * kH * kW;
  int i, j, l;
  int k;

#pragma omp parallel for private(i, j, l, k)
  for (i = 0; i < num_output_planes; i++) {
    for (j = 0; j < num_input_planes; j++) {
      for (l = 0; l < nEntry; l++) {
        int gradInputIndex = i * num_input_planes * nEntry + j * nEntry + l;
        T* val = gradInputData + gradInputIndex;
        *val = 0;
        for (k = 0; k < num_rotations; k++) {
          int index = (int)(*(indicesData + l * num_rotations + k)) - 1;
          const T* target =
              gradOutputData + i * (num_rotations * num_input_planes * nEntry) +
              k * (num_input_planes * nEntry) + j * (nEntry) + index;
          *val = *val + *target;
        }
      }
    }
  }
}

void ActiveRotatedFilterForwardCPULauncher(const Tensor input,
                                           const Tensor indices,
                                           Tensor output) {
  const int num_output_planes = input.size(0);
  const int num_input_planes = input.size(1);
  const int num_orientations = input.size(2);
  const int kH = input.size(3);
  const int kW = input.size(4);
  const int num_rotations = indices.size(3);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "active_rotated_filter_forward_cpu_kernel", [&] {
        active_rotated_filter_forward_cpu_kernel<scalar_t>(
            input.data_ptr<scalar_t>(), indices.data_ptr<int>(),
            num_output_planes, num_input_planes, num_orientations, kH, kW,
            num_rotations, output.data_ptr<scalar_t>());
      });
}

void ActiveRotatedFilterBackwardCPULauncher(const Tensor grad_out,
                                            const Tensor indices,
                                            Tensor grad_in) {
  const int num_orientations = indices.size(0);
  const int kH = indices.size(1);
  const int kW = indices.size(2);
  const int num_rotations = indices.size(3);
  const int num_output_planes = grad_out.size(0) / num_rotations;
  const int num_input_planes = grad_out.size(1) / num_orientations;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_out.scalar_type(), "active_rotated_filter_backward_cpu_kernel", [&] {
        active_rotated_filter_backward_cpu_kernel<scalar_t>(
            grad_out.data_ptr<scalar_t>(), indices.data_ptr<int>(),
            num_output_planes, num_input_planes, num_orientations, kH, kW,
            num_rotations, grad_in.data_ptr<scalar_t>());
      });
}

void active_rotated_filter_forward_cpu(const Tensor input, const Tensor indices,
                                       Tensor output) {
  ActiveRotatedFilterForwardCPULauncher(input, indices, output);
}

void active_rotated_filter_backward_cpu(const Tensor grad_out,
                                        const Tensor indices, Tensor grad_in) {
  ActiveRotatedFilterBackwardCPULauncher(grad_out, indices, grad_in);
}

void active_rotated_filter_forward_impl(const Tensor input,
                                        const Tensor indices, Tensor output);

void active_rotated_filter_backward_impl(const Tensor grad_out,
                                         const Tensor indices, Tensor grad_in);

REGISTER_DEVICE_IMPL(active_rotated_filter_forward_impl, CPU,
                     active_rotated_filter_forward_cpu);
REGISTER_DEVICE_IMPL(active_rotated_filter_backward_impl, CPU,
                     active_rotated_filter_backward_cpu);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/box_iou_rotated.cpp
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
#include "box_iou_rotated_utils.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

template <typename T>
void box_iou_rotated_cpu_kernel(const Tensor boxes1, const Tensor boxes2,
                                Tensor ious, const int mode_flag,
                                const bool aligned) {
  int output_size = ious.numel();
  auto num_boxes1 = boxes1.size(0);
  auto num_boxes2 = boxes2.size(0);

  if (aligned) {
    for (int i = 0; i < output_size; i++) {
      ious[i] = single_box_iou_rotated<T>(boxes1[i].data_ptr<T>(),
                                          boxes2[i].data_ptr<T>(), mode_flag);
    }
  } else {
    for (int i = 0; i < num_boxes1; i++) {
      for (int j = 0; j < num_boxes2; j++) {
        ious[i * num_boxes2 + j] = single_box_iou_rotated<T>(
            boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>(), mode_flag);
      }
    }
  }
}

void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                         const int mode_flag, const bool aligned) {
  box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious, mode_flag, aligned);
}

void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                          const int mode_flag, const bool aligned);
REGISTER_DEVICE_IMPL(box_iou_rotated_impl, CPU, box_iou_rotated_cpu);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/deform_conv.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

template <typename T>
T deformable_im2col_bilinear_cpu(const T *input, const int data_width,
                                 const int height, const int width, T h, T w) {
  if (h <= -1 || height <= h || w <= -1 || width <= w) {
    return 0;
  }

  int h_low = floor(h);
  int w_low = floor(w);
  int h_high = h_low + 1;
  int w_high = w_low + 1;

  T lh = h - h_low;
  T lw = w - w_low;
  T hh = 1 - lh, hw = 1 - lw;

  T v1 = 0;
  if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low];
  T v2 = 0;
  if (h_low >= 0 && w_high <= width - 1)
    v2 = input[h_low * data_width + w_high];
  T v3 = 0;
  if (h_high <= height - 1 && w_low >= 0)
    v3 = input[h_high * data_width + w_low];
  T v4 = 0;
  if (h_high <= height - 1 && w_high <= width - 1)
    v4 = input[h_high * data_width + w_high];

  T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;

  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
  return val;
}

template <typename T>
T get_gradient_weight_cpu(T argmax_h, T argmax_w, const int h, const int w,
                          const int height, const int width) {
  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
      argmax_w >= width) {
    // empty
    return 0;
  }

  int argmax_h_low = floor(argmax_h);
  int argmax_w_low = floor(argmax_w);
  int argmax_h_high = argmax_h_low + 1;
  int argmax_w_high = argmax_w_low + 1;

  T weight = 0;
  if (h == argmax_h_low && w == argmax_w_low)
    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
  if (h == argmax_h_low && w == argmax_w_high)
    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
  if (h == argmax_h_high && w == argmax_w_low)
    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
  if (h == argmax_h_high && w == argmax_w_high)
    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
  return weight;
}

template <typename T>
T get_coordinate_weight_cpu(T argmax_h, T argmax_w, const int height,
                            const int width, const T *im_data,
                            const int data_width, const int bp_dir) {
  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
      argmax_w >= width) {
    // empty
    return 0;
  }

  int argmax_h_low = floor(argmax_h);
  int argmax_w_low = floor(argmax_w);
  int argmax_h_high = argmax_h_low + 1;
  int argmax_w_high = argmax_w_low + 1;

  T weight = 0;

  if (bp_dir == 0) {
    if (argmax_h_low >= 0 && argmax_w_low >= 0)
      weight += -1 * (argmax_w_low + 1 - argmax_w) *
                im_data[argmax_h_low * data_width + argmax_w_low];
    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
      weight += -1 * (argmax_w - argmax_w_low) *
                im_data[argmax_h_low * data_width + argmax_w_high];
    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
      weight += (argmax_w_low + 1 - argmax_w) *
                im_data[argmax_h_high * data_width + argmax_w_low];
    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
      weight += (argmax_w - argmax_w_low) *
                im_data[argmax_h_high * data_width + argmax_w_high];
  } else if (bp_dir == 1) {
    if (argmax_h_low >= 0 && argmax_w_low >= 0)
      weight += -1 * (argmax_h_low + 1 - argmax_h) *
                im_data[argmax_h_low * data_width + argmax_w_low];
    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
      weight += (argmax_h_low + 1 - argmax_h) *
                im_data[argmax_h_low * data_width + argmax_w_high];
    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
      weight += -1 * (argmax_h - argmax_h_low) *
                im_data[argmax_h_high * data_width + argmax_w_low];
    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
      weight += (argmax_h - argmax_h_low) *
                im_data[argmax_h_high * data_width + argmax_w_high];
  }

  return weight;
}

template <typename T>
void deformable_im2col_cpu_kernel(
    const int n, const T *data_im, const T *data_offset, const int height,
    const int width, const int kernel_h, const int kernel_w, const int pad_h,
    const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
    const int channel_per_deformable_group, const int batch_size,
    const int num_channels, const int deformable_group, const int height_col,
    const int width_col, T *data_col) {
  for (int index = 0; index < n; index++) {
    // index index of output matrix
    const int w_col = index % width_col;
    const int h_col = (index / width_col) % height_col;
    const int b_col = (index / width_col / height_col) % batch_size;
    const int c_im = (index / width_col / height_col) / batch_size;
    const int c_col = c_im * kernel_h * kernel_w;

    // compute deformable group index
    const int deformable_group_index = c_im / channel_per_deformable_group;

    const int h_in = h_col * stride_h - pad_h;
    const int w_in = w_col * stride_w - pad_w;
    T *data_col_ptr =
        data_col +
        ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
    const T *data_im_ptr =
        data_im + (b_col * num_channels + c_im) * height * width;
    const T *data_offset_ptr =
        data_offset + (b_col * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;

    for (int i = 0; i < kernel_h; ++i) {
      for (int j = 0; j < kernel_w; ++j) {
        const int data_offset_h_ptr =
            ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
        const int data_offset_w_ptr =
            ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
            w_col;
        const T offset_h = data_offset_ptr[data_offset_h_ptr];
        const T offset_w = data_offset_ptr[data_offset_w_ptr];
        T val = static_cast<T>(0);
        const T h_im = h_in + i * dilation_h + offset_h;
        const T w_im = w_in + j * dilation_w + offset_w;
        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
          val = deformable_im2col_bilinear_cpu(data_im_ptr, width, height,
                                               width, h_im, w_im);
        *data_col_ptr = val;
        data_col_ptr += batch_size * height_col * width_col;
      }
    }
  }
}

template <typename T>
void deformable_col2im_cpu_kernel(
    const int n, const T *data_col, const T *data_offset, const int channels,
    const int height, const int width, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
    const int channel_per_deformable_group, const int batch_size,
    const int deformable_group, const int height_col, const int width_col,
    T *grad_im) {
  for (int index = 0; index < n; index++) {
    const int j = (index / width_col / height_col / batch_size) % kernel_w;
    const int i =
        (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
    const int c =
        index / width_col / height_col / batch_size / kernel_w / kernel_h;
    // compute the start and end of the output

    const int deformable_group_index = c / channel_per_deformable_group;

    int w_out = index % width_col;
    int h_out = (index / width_col) % height_col;
    int b = (index / width_col / height_col) % batch_size;
    int w_in = w_out * stride_w - pad_w;
    int h_in = h_out * stride_h - pad_h;

    const T *data_offset_ptr =
        data_offset + (b * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;
    const int data_offset_h_ptr =
        ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
    const int data_offset_w_ptr =
        ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
    const T offset_h = data_offset_ptr[data_offset_h_ptr];
    const T offset_w = data_offset_ptr[data_offset_w_ptr];
    const T cur_inv_h_data = h_in + i * dilation_h + offset_h;
    const T cur_inv_w_data = w_in + j * dilation_w + offset_w;

    const T cur_top_grad = data_col[index];
    const int cur_h = (int)cur_inv_h_data;
    const int cur_w = (int)cur_inv_w_data;
    for (int dy = -2; dy <= 2; dy++) {
      for (int dx = -2; dx <= 2; dx++) {
        if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
            cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
            abs(cur_inv_w_data - (cur_w + dx)) < 1) {
          int cur_bottom_grad_pos =
              ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
          T weight =
              get_gradient_weight_cpu(cur_inv_h_data, cur_inv_w_data,
                                      cur_h + dy, cur_w + dx, height, width);
          *(grad_im + cur_bottom_grad_pos) += weight * cur_top_grad;
        }
      }
    }
  }
}

template <typename T>
void deformable_col2im_coord_cpu_kernel(
    const int n, const T *data_col, const T *data_im, const T *data_offset,
    const int channels, const int height, const int width, const int kernel_h,
    const int kernel_w, const int pad_h, const int pad_w, const int stride_h,
    const int stride_w, const int dilation_h, const int dilation_w,
    const int channel_per_deformable_group, const int batch_size,
    const int offset_channels, const int deformable_group, const int height_col,
    const int width_col, T *grad_offset) {
  for (int index = 0; index < n; index++) {
    T val = 0;
    int w = index % width_col;
    int h = (index / width_col) % height_col;
    int c = (index / width_col / height_col) % offset_channels;
    int b = (index / width_col / height_col) / offset_channels;
    // compute the start and end of the output

    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
    const int col_step = kernel_h * kernel_w;
    int cnt = 0;
    const T *data_col_ptr = data_col + deformable_group_index *
                                           channel_per_deformable_group *
                                           batch_size * width_col * height_col;
    const T *data_im_ptr =
        data_im + (b * deformable_group + deformable_group_index) *
                      channel_per_deformable_group / kernel_h / kernel_w *
                      height * width;
    const T *data_offset_ptr =
        data_offset + (b * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;

    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;

    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
         col_c += col_step) {
      const int col_pos =
          (((col_c * batch_size + b) * height_col) + h) * width_col + w;
      const int bp_dir = offset_c % 2;

      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
      int i =
          (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
      int w_out = col_pos % width_col;
      int h_out = (col_pos / width_col) % height_col;
      int w_in = w_out * stride_w - pad_w;
      int h_in = h_out * stride_h - pad_h;
      const int data_offset_h_ptr =
          (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
      const int data_offset_w_ptr =
          (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
           w_out);
      const T offset_h = data_offset_ptr[data_offset_h_ptr];
      const T offset_w = data_offset_ptr[data_offset_w_ptr];
      T inv_h = h_in + i * dilation_h + offset_h;
      T inv_w = w_in + j * dilation_w + offset_w;
      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
        inv_h = inv_w = -2;
      const T weight = get_coordinate_weight_cpu(
          inv_h, inv_w, height, width, data_im_ptr + cnt * height * width,
          width, bp_dir);
      val += weight * data_col_ptr[col_pos];
      cnt += 1;
    }

    grad_offset[index] = val;
  }
}

void deformable_im2col_cpu(Tensor data_im, Tensor data_offset,
                           const int channels, const int height,
                           const int width, const int ksize_h,
                           const int ksize_w, const int pad_h, const int pad_w,
                           const int stride_h, const int stride_w,
                           const int dilation_h, const int dilation_w,
                           const int parallel_imgs, const int deformable_group,
                           Tensor data_col) {
  int height_col =
      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
  int width_col =
      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
  int num_kernels = channels * height_col * width_col * parallel_imgs;
  int channel_per_deformable_group = channels / deformable_group;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_im.scalar_type(), "deformable_im2col_cpu", [&] {
        deformable_im2col_cpu_kernel<scalar_t>(
            num_kernels, data_im.data_ptr<scalar_t>(),
            data_offset.data_ptr<scalar_t>(), height, width, ksize_h, ksize_w,
            pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
            channel_per_deformable_group, parallel_imgs, channels,
            deformable_group, height_col, width_col,
            data_col.data_ptr<scalar_t>());
      });
}

void deformable_col2im_cpu(Tensor data_col, Tensor data_offset,
                           const int channels, const int height,
                           const int width, const int ksize_h,
                           const int ksize_w, const int pad_h, const int pad_w,
                           const int stride_h, const int stride_w,
                           const int dilation_h, const int dilation_w,
                           const int parallel_imgs, const int deformable_group,
                           Tensor grad_im) {
  // todo: make sure parallel_imgs is passed in correctly
  int height_col =
      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
  int width_col =
      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
  int num_kernels =
      channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;
  int channel_per_deformable_group = channels / deformable_group;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();

        deformable_col2im_cpu_kernel<scalar_t>(
            num_kernels, data_col_, data_offset_, channels, height, width,
            ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
            dilation_w, channel_per_deformable_group, parallel_imgs,
            deformable_group, height_col, width_col, grad_im_);
      }));
}

void deformable_col2im_coord_cpu(
    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
    const int height, const int width, const int ksize_h, const int ksize_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int parallel_imgs,
    const int deformable_group, Tensor grad_offset) {
  int height_col =
      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
  int width_col =
      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
  int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w *
                    deformable_group * parallel_imgs;
  int channel_per_deformable_group =
      channels * ksize_h * ksize_w / deformable_group;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_col.scalar_type(), "deformable_col2im_coord_cpu", ([&] {
        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();

        deformable_col2im_coord_cpu_kernel<scalar_t>(
            num_kernels, data_col_, data_im_, data_offset_, channels, height,
            width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w,
            dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs,
            2 * ksize_h * ksize_w * deformable_group, deformable_group,
            height_col, width_col, grad_offset_);
      }));
}

void deformable_im2col_impl(Tensor data_im, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor data_col);

void deformable_col2im_impl(Tensor data_col, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor grad_im);

void deformable_col2im_coord_impl(
    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
    const int height, const int width, const int ksize_h, const int ksize_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int parallel_imgs,
    const int deformable_group, Tensor grad_offset);

REGISTER_DEVICE_IMPL(deformable_im2col_impl, CPU, deformable_im2col_cpu);
REGISTER_DEVICE_IMPL(deformable_col2im_impl, CPU, deformable_col2im_cpu);
REGISTER_DEVICE_IMPL(deformable_col2im_coord_impl, CPU,
                     deformable_col2im_coord_cpu);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/modulated_deform_conv.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

template <typename T>
T dmcn_im2col_bilinear_cpu(const T *input, const int data_width,
                           const int height, const int width, T h, T w) {
  int h_low = floorf(h);
  int w_low = floorf(w);
  int h_high = h_low + 1;
  int w_high = w_low + 1;

  T lh = h - h_low;
  T lw = w - w_low;
  T hh = 1 - lh, hw = 1 - lw;

  T v1 = 0;
  if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low];
  T v2 = 0;
  if (h_low >= 0 && w_high <= width - 1)
    v2 = input[h_low * data_width + w_high];
  T v3 = 0;
  if (h_high <= height - 1 && w_low >= 0)
    v3 = input[h_high * data_width + w_low];
  T v4 = 0;
  if (h_high <= height - 1 && w_high <= width - 1)
    v4 = input[h_high * data_width + w_high];

  T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;

  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
  return val;
}

template <typename T>
T dmcn_get_gradient_weight_cpu(T argmax_h, T argmax_w, const int h, const int w,
                               const int height, const int width) {
  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
      argmax_w >= width) {
    // empty
    return 0;
  }

  int argmax_h_low = floorf(argmax_h);
  int argmax_w_low = floorf(argmax_w);
  int argmax_h_high = argmax_h_low + 1;
  int argmax_w_high = argmax_w_low + 1;

  T weight = 0;
  if (h == argmax_h_low && w == argmax_w_low)
    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
  if (h == argmax_h_low && w == argmax_w_high)
    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
  if (h == argmax_h_high && w == argmax_w_low)
    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
  if (h == argmax_h_high && w == argmax_w_high)
    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
  return weight;
}

template <typename T>
T dmcn_get_coordinate_weight_cpu(T argmax_h, T argmax_w, const int height,
                                 const int width, const T *im_data,
                                 const int data_width, const int bp_dir) {
  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
      argmax_w >= width) {
    // empty
    return 0;
  }

  int argmax_h_low = floorf(argmax_h);
  int argmax_w_low = floorf(argmax_w);
  int argmax_h_high = argmax_h_low + 1;
  int argmax_w_high = argmax_w_low + 1;

  T weight = 0;

  if (bp_dir == 0) {
    if (argmax_h_low >= 0 && argmax_w_low >= 0)
      weight += -1 * (argmax_w_low + 1 - argmax_w) *
                im_data[argmax_h_low * data_width + argmax_w_low];
    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
      weight += -1 * (argmax_w - argmax_w_low) *
                im_data[argmax_h_low * data_width + argmax_w_high];
    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
      weight += (argmax_w_low + 1 - argmax_w) *
                im_data[argmax_h_high * data_width + argmax_w_low];
    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
      weight += (argmax_w - argmax_w_low) *
                im_data[argmax_h_high * data_width + argmax_w_high];
  } else if (bp_dir == 1) {
    if (argmax_h_low >= 0 && argmax_w_low >= 0)
      weight += -1 * (argmax_h_low + 1 - argmax_h) *
                im_data[argmax_h_low * data_width + argmax_w_low];
    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
      weight += (argmax_h_low + 1 - argmax_h) *
                im_data[argmax_h_low * data_width + argmax_w_high];
    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
      weight += -1 * (argmax_h - argmax_h_low) *
                im_data[argmax_h_high * data_width + argmax_w_low];
    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
      weight += (argmax_h - argmax_h_low) *
                im_data[argmax_h_high * data_width + argmax_w_high];
  }

  return weight;
}

template <typename T>
void modulated_deformable_im2col_cpu_kernel(
    const int n, const T *data_im, const T *data_offset, const T *data_mask,
    const int height, const int width, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
    const int channel_per_deformable_group, const int batch_size,
    const int num_channels, const int deformable_group, const int height_col,
    const int width_col, T *data_col) {
  for (int index = 0; index < n; index++) {
    // index index of output matrix
    const int w_col = index % width_col;
    const int h_col = (index / width_col) % height_col;
    const int b_col = (index / width_col / height_col) % batch_size;
    const int c_im = (index / width_col / height_col) / batch_size;
    const int c_col = c_im * kernel_h * kernel_w;

    // compute deformable group index
    const int deformable_group_index = c_im / channel_per_deformable_group;

    const int h_in = h_col * stride_h - pad_h;
    const int w_in = w_col * stride_w - pad_w;

    T *data_col_ptr =
        data_col +
        ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
    const T *data_im_ptr =
        data_im + (b_col * num_channels + c_im) * height * width;
    const T *data_offset_ptr =
        data_offset + (b_col * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;

    const T *data_mask_ptr =
        data_mask + (b_col * deformable_group + deformable_group_index) *
                        kernel_h * kernel_w * height_col * width_col;

    for (int i = 0; i < kernel_h; ++i) {
      for (int j = 0; j < kernel_w; ++j) {
        const int data_offset_h_ptr =
            ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
        const int data_offset_w_ptr =
            ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
            w_col;
        const int data_mask_hw_ptr =
            ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
        const T offset_h = data_offset_ptr[data_offset_h_ptr];
        const T offset_w = data_offset_ptr[data_offset_w_ptr];
        const T mask = data_mask_ptr[data_mask_hw_ptr];
        T val = static_cast<T>(0);
        const T h_im = h_in + i * dilation_h + offset_h;
        const T w_im = w_in + j * dilation_w + offset_w;
        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
          val = dmcn_im2col_bilinear_cpu(data_im_ptr, width, height, width,
                                         h_im, w_im);
        *data_col_ptr = val * mask;
        data_col_ptr += batch_size * height_col * width_col;
      }
    }
  }
}

template <typename T>
void modulated_deformable_col2im_cpu_kernel(
    const int n, const T *data_col, const T *data_offset, const T *data_mask,
    const int channels, const int height, const int width, const int kernel_h,
    const int kernel_w, const int pad_h, const int pad_w, const int stride_h,
    const int stride_w, const int dilation_h, const int dilation_w,
    const int channel_per_deformable_group, const int batch_size,
    const int deformable_group, const int height_col, const int width_col,
    T *grad_im) {
  for (int index = 0; index < n; index++) {
    const int j = (index / width_col / height_col / batch_size) % kernel_w;
    const int i =
        (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
    const int c =
        index / width_col / height_col / batch_size / kernel_w / kernel_h;
    // compute the start and end of the output

    const int deformable_group_index = c / channel_per_deformable_group;

    int w_out = index % width_col;
    int h_out = (index / width_col) % height_col;
    int b = (index / width_col / height_col) % batch_size;
    int w_in = w_out * stride_w - pad_w;
    int h_in = h_out * stride_h - pad_h;

    const T *data_offset_ptr =
        data_offset + (b * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;
    const T *data_mask_ptr =
        data_mask + (b * deformable_group + deformable_group_index) * kernel_h *
                        kernel_w * height_col * width_col;
    const int data_offset_h_ptr =
        ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
    const int data_offset_w_ptr =
        ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
    const int data_mask_hw_ptr =
        ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
    const T offset_h = data_offset_ptr[data_offset_h_ptr];
    const T offset_w = data_offset_ptr[data_offset_w_ptr];
    const T mask = data_mask_ptr[data_mask_hw_ptr];
    const T cur_inv_h_data = h_in + i * dilation_h + offset_h;
    const T cur_inv_w_data = w_in + j * dilation_w + offset_w;

    const T cur_top_grad = data_col[index] * mask;
    const int cur_h = (int)cur_inv_h_data;
    const int cur_w = (int)cur_inv_w_data;
    for (int dy = -2; dy <= 2; dy++) {
      for (int dx = -2; dx <= 2; dx++) {
        if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
            cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
            abs(cur_inv_w_data - (cur_w + dx)) < 1) {
          int cur_bottom_grad_pos =
              ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
          T weight = dmcn_get_gradient_weight_cpu(cur_inv_h_data,
                                                  cur_inv_w_data, cur_h + dy,
                                                  cur_w + dx, height, width);
          *(grad_im + cur_bottom_grad_pos) += weight * cur_top_grad;
        }
      }
    }
  }
}

template <typename T>
void modulated_deformable_col2im_coord_cpu_kernel(
    const int n, const T *data_col, const T *data_im, const T *data_offset,
    const T *data_mask, const int channels, const int height, const int width,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int channel_per_deformable_group,
    const int batch_size, const int offset_channels, const int deformable_group,
    const int height_col, const int width_col, T *grad_offset, T *grad_mask) {
  for (int index = 0; index < n; index++) {
    T val = 0, mval = 0;
    int w = index % width_col;
    int h = (index / width_col) % height_col;
    int c = (index / width_col / height_col) % offset_channels;
    int b = (index / width_col / height_col) / offset_channels;
    // compute the start and end of the output

    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
    const int col_step = kernel_h * kernel_w;
    int cnt = 0;
    const T *data_col_ptr = data_col + deformable_group_index *
                                           channel_per_deformable_group *
                                           batch_size * width_col * height_col;
    const T *data_im_ptr =
        data_im + (b * deformable_group + deformable_group_index) *
                      channel_per_deformable_group / kernel_h / kernel_w *
                      height * width;
    const T *data_offset_ptr =
        data_offset + (b * deformable_group + deformable_group_index) * 2 *
                          kernel_h * kernel_w * height_col * width_col;
    const T *data_mask_ptr =
        data_mask + (b * deformable_group + deformable_group_index) * kernel_h *
                        kernel_w * height_col * width_col;

    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;

    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
         col_c += col_step) {
      const int col_pos =
          (((col_c * batch_size + b) * height_col) + h) * width_col + w;
      const int bp_dir = offset_c % 2;

      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
      int i =
          (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
      int w_out = col_pos % width_col;
      int h_out = (col_pos / width_col) % height_col;
      int w_in = w_out * stride_w - pad_w;
      int h_in = h_out * stride_h - pad_h;
      const int data_offset_h_ptr =
          (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
      const int data_offset_w_ptr =
          (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
           w_out);
      const int data_mask_hw_ptr =
          (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
      const T offset_h = data_offset_ptr[data_offset_h_ptr];
      const T offset_w = data_offset_ptr[data_offset_w_ptr];
      const T mask = data_mask_ptr[data_mask_hw_ptr];
      T inv_h = h_in + i * dilation_h + offset_h;
      T inv_w = w_in + j * dilation_w + offset_w;
      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
        inv_h = inv_w = -2;
      else
        mval += data_col_ptr[col_pos] *
                dmcn_im2col_bilinear_cpu(data_im_ptr + cnt * height * width,
                                         width, height, width, inv_h, inv_w);
      const T weight = dmcn_get_coordinate_weight_cpu(
          inv_h, inv_w, height, width, data_im_ptr + cnt * height * width,
          width, bp_dir);
      val += weight * data_col_ptr[col_pos] * mask;
      cnt += 1;
    }
    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);
    grad_offset[index] = val;
    if (offset_c % 2 == 0)
      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group +
      // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) *
      // height_col + h) * width_col + w], mask_req, mval);
      grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h *
                      kernel_w +
                  offset_c / 2) *
                     height_col +
                 h) *
                    width_col +
                w] = mval;
  }
}

void modulated_deformable_im2col_cpu(
    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor data_col) {
  // num_axes should be smaller than block size
  const int channel_per_deformable_group = channels / deformable_group;
  const int num_kernels = channels * batch_size * height_col * width_col;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_im.scalar_type(), "modulated_deformable_im2col_cpu", ([&] {
        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();

        modulated_deformable_im2col_cpu_kernel(
            num_kernels, data_im_, data_offset_, data_mask_, height_im,
            width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
            dilation_h, dilation_w, channel_per_deformable_group, batch_size,
            channels, deformable_group, height_col, width_col, data_col_);
      }));
}

void modulated_deformable_col2im_cpu(
    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor grad_im) {
  const int channel_per_deformable_group = channels / deformable_group;
  const int num_kernels =
      channels * kernel_h * kernel_w * batch_size * height_col * width_col;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_col.scalar_type(), "modulated_deformable_col2im_cpu", ([&] {
        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
        scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();

        modulated_deformable_col2im_cpu_kernel(
            num_kernels, data_col_, data_offset_, data_mask_, channels,
            height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h,
            stride_w, dilation_h, dilation_w, channel_per_deformable_group,
            batch_size, deformable_group, height_col, width_col, grad_im_);
      }));
}

void modulated_deformable_col2im_coord_cpu(
    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
    const Tensor data_mask, const int batch_size, const int channels,
    const int height_im, const int width_im, const int height_col,
    const int width_col, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int deformable_group,
    Tensor grad_offset, Tensor grad_mask) {
  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h *
                          kernel_w * deformable_group;
  const int channel_per_deformable_group =
      channels * kernel_h * kernel_w / deformable_group;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_col.scalar_type(), "modulated_deformable_col2im_coord_cpu", ([&] {
        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
        scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();
        scalar_t *grad_mask_ = grad_mask.data_ptr<scalar_t>();

        modulated_deformable_col2im_coord_cpu_kernel(
            num_kernels, data_col_, data_im_, data_offset_, data_mask_,
            channels, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w,
            stride_h, stride_w, dilation_h, dilation_w,
            channel_per_deformable_group, batch_size,
            2 * kernel_h * kernel_w * deformable_group, deformable_group,
            height_col, width_col, grad_offset_, grad_mask_);
      }));
}

void modulated_deformable_im2col_impl(
    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor data_col);

void modulated_deformable_col2im_impl(
    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor grad_im);

void modulated_deformable_col2im_coord_impl(
    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
    const Tensor data_mask, const int batch_size, const int channels,
    const int height_im, const int width_im, const int height_col,
    const int width_col, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int deformable_group,
    Tensor grad_offset, Tensor grad_mask);

REGISTER_DEVICE_IMPL(modulated_deformable_im2col_impl, CPU,
                     modulated_deformable_im2col_cpu);
REGISTER_DEVICE_IMPL(modulated_deformable_col2im_impl, CPU,
                     modulated_deformable_col2im_cpu);
REGISTER_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, CPU,
                     modulated_deformable_col2im_coord_cpu);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/nms.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

Tensor nms_cpu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
  if (boxes.numel() == 0) {
    return at::empty({0}, boxes.options().dtype(at::kLong));
  }
  auto x1_t = boxes.select(1, 0).contiguous();
  auto y1_t = boxes.select(1, 1).contiguous();
  auto x2_t = boxes.select(1, 2).contiguous();
  auto y2_t = boxes.select(1, 3).contiguous();

  Tensor areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset);

  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));

  auto nboxes = boxes.size(0);
  Tensor select_t = at::ones({nboxes}, boxes.options().dtype(at::kBool));

  auto select = select_t.data_ptr<bool>();
  auto order = order_t.data_ptr<int64_t>();
  auto x1 = x1_t.data_ptr<float>();
  auto y1 = y1_t.data_ptr<float>();
  auto x2 = x2_t.data_ptr<float>();
  auto y2 = y2_t.data_ptr<float>();
  auto areas = areas_t.data_ptr<float>();

  for (int64_t _i = 0; _i < nboxes; _i++) {
    if (select[_i] == false) continue;
    auto i = order[_i];
    auto ix1 = x1[i];
    auto iy1 = y1[i];
    auto ix2 = x2[i];
    auto iy2 = y2[i];
    auto iarea = areas[i];

    for (int64_t _j = _i + 1; _j < nboxes; _j++) {
      if (select[_j] == false) continue;
      auto j = order[_j];
      auto xx1 = std::max(ix1, x1[j]);
      auto yy1 = std::max(iy1, y1[j]);
      auto xx2 = std::min(ix2, x2[j]);
      auto yy2 = std::min(iy2, y2[j]);

      auto w = std::max(0.f, xx2 - xx1 + offset);
      auto h = std::max(0.f, yy2 - yy1 + offset);
      auto inter = w * h;
      auto ovr = inter / (iarea + areas[j] - inter);
      if (ovr > iou_threshold) select[_j] = false;
    }
  }
  return order_t.masked_select(select_t);
}

Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset);
REGISTER_DEVICE_IMPL(nms_impl, CPU, nms_cpu);

Tensor softnms_cpu(Tensor boxes, Tensor scores, Tensor dets,
                   float iou_threshold, float sigma, float min_score,
                   int method, int offset) {
  if (boxes.numel() == 0) {
    return at::empty({0}, boxes.options().dtype(at::kLong));
  }

  auto x1_t = boxes.select(1, 0).contiguous();
  auto y1_t = boxes.select(1, 1).contiguous();
  auto x2_t = boxes.select(1, 2).contiguous();
  auto y2_t = boxes.select(1, 3).contiguous();
  auto scores_t = scores.clone();

  Tensor areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset);

  auto nboxes = boxes.size(0);
  auto x1 = x1_t.data_ptr<float>();
  auto y1 = y1_t.data_ptr<float>();
  auto x2 = x2_t.data_ptr<float>();
  auto y2 = y2_t.data_ptr<float>();
  auto sc = scores_t.data_ptr<float>();
  auto areas = areas_t.data_ptr<float>();
  auto de = dets.data_ptr<float>();

  int64_t pos = 0;
  Tensor inds_t = at::arange(nboxes, boxes.options().dtype(at::kLong));
  auto inds = inds_t.data_ptr<int64_t>();

  for (int64_t i = 0; i < nboxes; i++) {
    auto max_score = sc[i];
    auto max_pos = i;

    pos = i + 1;
    // get max box
    while (pos < nboxes) {
      if (max_score < sc[pos]) {
        max_score = sc[pos];
        max_pos = pos;
      }
      pos = pos + 1;
    }
    // swap
    auto ix1 = de[i * 5 + 0] = x1[max_pos];
    auto iy1 = de[i * 5 + 1] = y1[max_pos];
    auto ix2 = de[i * 5 + 2] = x2[max_pos];
    auto iy2 = de[i * 5 + 3] = y2[max_pos];
    auto iscore = de[i * 5 + 4] = sc[max_pos];
    auto iarea = areas[max_pos];
    auto iind = inds[max_pos];
    x1[max_pos] = x1[i];
    y1[max_pos] = y1[i];
    x2[max_pos] = x2[i];
    y2[max_pos] = y2[i];
    sc[max_pos] = sc[i];
    areas[max_pos] = areas[i];
    inds[max_pos] = inds[i];
    x1[i] = ix1;
    y1[i] = iy1;
    x2[i] = ix2;
    y2[i] = iy2;
    sc[i] = iscore;
    areas[i] = iarea;
    inds[i] = iind;

    pos = i + 1;
    while (pos < nboxes) {
      auto xx1 = std::max(ix1, x1[pos]);
      auto yy1 = std::max(iy1, y1[pos]);
      auto xx2 = std::min(ix2, x2[pos]);
      auto yy2 = std::min(iy2, y2[pos]);

      auto w = std::max(0.f, xx2 - xx1 + offset);
      auto h = std::max(0.f, yy2 - yy1 + offset);
      auto inter = w * h;
      auto ovr = inter / (iarea + areas[pos] - inter);

      float weight = 1.;
      if (method == 0) {
        if (ovr >= iou_threshold) weight = 0;
      } else if (method == 1) {
        if (ovr >= iou_threshold) weight = 1 - ovr;
      } else if (method == 2) {
        weight = std::exp(-(ovr * ovr) / sigma);
      }
      sc[pos] *= weight;
      // if box score falls below threshold, discard the box by
      // swapping with last box update N
      if (sc[pos] < min_score) {
        x1[pos] = x1[nboxes - 1];
        y1[pos] = y1[nboxes - 1];
        x2[pos] = x2[nboxes - 1];
        y2[pos] = y2[nboxes - 1];
        sc[pos] = sc[nboxes - 1];
        areas[pos] = areas[nboxes - 1];
        inds[pos] = inds[nboxes - 1];
        nboxes = nboxes - 1;
        pos = pos - 1;
      }
      pos = pos + 1;
    }
  }
  return inds_t.slice(0, 0, nboxes);
}

Tensor softnms_impl(Tensor boxes, Tensor scores, Tensor dets,
                    float iou_threshold, float sigma, float min_score,
                    int method, int offset);
REGISTER_DEVICE_IMPL(softnms_impl, CPU, softnms_cpu);

std::vector<std::vector<int> > nms_match_cpu(Tensor dets, float iou_threshold) {
  auto x1_t = dets.select(1, 0).contiguous();
  auto y1_t = dets.select(1, 1).contiguous();
  auto x2_t = dets.select(1, 2).contiguous();
  auto y2_t = dets.select(1, 3).contiguous();
  auto scores = dets.select(1, 4).contiguous();

  at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t);

  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));

  auto ndets = dets.size(0);
  at::Tensor suppressed_t =
      at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));

  auto suppressed = suppressed_t.data_ptr<uint8_t>();
  auto order = order_t.data_ptr<int64_t>();
  auto x1 = x1_t.data_ptr<float>();
  auto y1 = y1_t.data_ptr<float>();
  auto x2 = x2_t.data_ptr<float>();
  auto y2 = y2_t.data_ptr<float>();
  auto areas = areas_t.data_ptr<float>();

  std::vector<int> keep;
  std::vector<std::vector<int> > matched;

  for (int64_t _i = 0; _i < ndets; _i++) {
    auto i = order[_i];
    if (suppressed[i] == 1) continue;
    keep.push_back(i);
    std::vector<int> v_i;
    auto ix1 = x1[i];
    auto iy1 = y1[i];
    auto ix2 = x2[i];
    auto iy2 = y2[i];
    auto iarea = areas[i];

    for (int64_t _j = _i + 1; _j < ndets; _j++) {
      auto j = order[_j];
      if (suppressed[j] == 1) continue;
      auto xx1 = std::max(ix1, x1[j]);
      auto yy1 = std::max(iy1, y1[j]);
      auto xx2 = std::min(ix2, x2[j]);
      auto yy2 = std::min(iy2, y2[j]);

      auto w = std::max(static_cast<float>(0), xx2 - xx1);
      auto h = std::max(static_cast<float>(0), yy2 - yy1);
      auto inter = w * h;
      auto ovr = inter / (iarea + areas[j] - inter);
      if (ovr >= iou_threshold) {
        suppressed[j] = 1;
        v_i.push_back(j);
      }
    }
    matched.push_back(v_i);
  }
  for (size_t i = 0; i < keep.size(); i++)
    matched[i].insert(matched[i].begin(), keep[i]);
  return matched;
}

std::vector<std::vector<int> > nms_match_impl(Tensor dets, float iou_threshold);
REGISTER_DEVICE_IMPL(nms_match_impl, CPU, nms_match_cpu);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/nms_rotated.cpp
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
#include "box_iou_rotated_utils.hpp"
#include "pytorch_cpp_helper.hpp"

template <typename scalar_t>
Tensor nms_rotated_cpu_kernel(const Tensor dets, const Tensor scores,
                              const float iou_threshold) {
  // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
  // however, the code in this function is much shorter because
  // we delegate the IoU computation for rotated boxes to
  // the single_box_iou_rotated function in box_iou_rotated_utils.h
  AT_ASSERTM(!dets.is_cuda(), "dets must be a CPU tensor");
  AT_ASSERTM(!scores.is_cuda(), "scores must be a CPU tensor");
  AT_ASSERTM(dets.scalar_type() == scores.scalar_type(),
             "dets should have the same type as scores");

  if (dets.numel() == 0) {
    return at::empty({0}, dets.options().dtype(at::kLong));
  }

  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));

  auto ndets = dets.size(0);
  Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte));
  Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong));

  auto suppressed = suppressed_t.data_ptr<uint8_t>();
  auto keep = keep_t.data_ptr<int64_t>();
  auto order = order_t.data_ptr<int64_t>();

  int64_t num_to_keep = 0;

  for (int64_t _i = 0; _i < ndets; _i++) {
    auto i = order[_i];
    if (suppressed[i] == 1) {
      continue;
    }

    keep[num_to_keep++] = i;

    for (int64_t _j = _i + 1; _j < ndets; _j++) {
      auto j = order[_j];
      if (suppressed[j] == 1) {
        continue;
      }

      auto ovr = single_box_iou_rotated<scalar_t>(
          dets[i].data_ptr<scalar_t>(), dets[j].data_ptr<scalar_t>(), 0);
      if (ovr >= iou_threshold) {
        suppressed[j] = 1;
      }
    }
  }
  return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
}

Tensor nms_rotated_cpu(const Tensor dets, const Tensor scores,
                       const float iou_threshold) {
  auto result = at::empty({0}, dets.options());
  AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] {
    result = nms_rotated_cpu_kernel<scalar_t>(dets, scores, iou_threshold);
  });
  return result;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/pixel_group.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// It is modified from https://github.com/WenmuZhou/PAN.pytorch

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

std::vector<std::vector<float>> estimate_confidence(int32_t* label,
                                                    float* score, int label_num,
                                                    int height, int width) {
  std::vector<std::vector<float>> point_vector;
  for (int i = 0; i < label_num; i++) {
    std::vector<float> point;
    point.push_back(0);
    point.push_back(0);
    point_vector.push_back(point);
  }
  for (int y = 0; y < height; y++) {
    auto label_tmp = label + y * width;
    auto score_tmp = score + y * width;
    for (int x = 0; x < width; x++) {
      auto l = label_tmp[x];
      if (l > 0) {
        float confidence = score_tmp[x];
        point_vector[l].push_back(x);
        point_vector[l].push_back(y);
        point_vector[l][0] += confidence;
        point_vector[l][1] += 1;
      }
    }
  }
  for (size_t l = 0; l < point_vector.size(); l++)
    if (point_vector[l][1] > 0) {
      point_vector[l][0] /= point_vector[l][1];
    }
  return point_vector;
}
std::vector<std::vector<float>> pixel_group_cpu(
    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
    Tensor kernel_contour, int kernel_region_num, float dis_threshold) {
  assert(score.dim() == 2);
  assert(mask.dim() == 2);
  assert(embedding_dim.dim() == 3);
  int height = score.size(0);
  int width = score.size(1);
  assert(height == mask.size(0) == embedding.size(1) == kernel_label.size(1));
  assert(width == mask.size(1) == embedding.size(2) == kernel_label.size(2));

  auto threshold_square = dis_threshold * dis_threshold;
  auto ptr_score = score.data_ptr<float>();
  auto ptr_mask = mask.data_ptr<bool>();
  auto ptr_kernel_contour = kernel_contour.data_ptr<uint8_t>();
  auto ptr_embedding = embedding.data_ptr<float>();
  auto ptr_kernel_label = kernel_label.data_ptr<int32_t>();
  std::queue<std::tuple<int, int, int32_t>> contour_pixels;
  auto embedding_dim = embedding.size(2);
  std::vector<std::vector<float>> kernel_vector(
      kernel_region_num, std::vector<float>(embedding_dim + 1, 0));

  Tensor text_label;
  text_label = kernel_label.clone();
  auto ptr_text_label = text_label.data_ptr<int32_t>();

  for (int i = 0; i < height; i++) {
    auto ptr_embedding_tmp = ptr_embedding + i * width * embedding_dim;
    auto ptr_kernel_label_tmp = ptr_kernel_label + i * width;
    auto ptr_kernel_contour_tmp = ptr_kernel_contour + i * width;

    for (int j = 0, k = 0; j < width && k < width * embedding_dim;
         j++, k += embedding_dim) {
      int32_t label = ptr_kernel_label_tmp[j];
      if (label > 0) {
        for (int d = 0; d < embedding_dim; d++)
          kernel_vector[label][d] += ptr_embedding_tmp[k + d];
        kernel_vector[label][embedding_dim] += 1;
        // kernel pixel number
        if (ptr_kernel_contour_tmp[j]) {
          contour_pixels.push(std::make_tuple(i, j, label));
        }
      }
    }
  }
  for (int i = 0; i < kernel_region_num; i++) {
    for (int j = 0; j < embedding_dim; j++) {
      kernel_vector[i][j] /= kernel_vector[i][embedding_dim];
    }
  }
  int dx[4] = {-1, 1, 0, 0};
  int dy[4] = {0, 0, -1, 1};
  while (!contour_pixels.empty()) {
    auto query_pixel = contour_pixels.front();
    contour_pixels.pop();
    int y = std::get<0>(query_pixel);
    int x = std::get<1>(query_pixel);
    int32_t l = std::get<2>(query_pixel);
    auto kernel_cv = kernel_vector[l];
    for (int idx = 0; idx < 4; idx++) {
      int tmpy = y + dy[idx];
      int tmpx = x + dx[idx];
      auto ptr_text_label_tmp = ptr_text_label + tmpy * width;
      if (tmpy < 0 || tmpy >= height || tmpx < 0 || tmpx >= width) continue;
      if (!ptr_mask[tmpy * width + tmpx] || ptr_text_label_tmp[tmpx] > 0)
        continue;

      float dis = 0;
      auto ptr_embedding_tmp = ptr_embedding + tmpy * width * embedding_dim;
      for (size_t i = 0; i < embedding_dim; i++) {
        dis +=
            pow(kernel_cv[i] - ptr_embedding_tmp[tmpx * embedding_dim + i], 2);
        // ignore further computing if dis is big enough
        if (dis >= threshold_square) break;
      }
      if (dis >= threshold_square) continue;
      contour_pixels.push(std::make_tuple(tmpy, tmpx, l));
      ptr_text_label_tmp[tmpx] = l;
    }
  }

  return estimate_confidence(ptr_text_label, ptr_score, kernel_region_num,
                             height, width);
}
std::vector<std::vector<float>> pixel_group_impl(
    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
    Tensor kernel_contour, int kernel_region_num, float dis_threshold);
REGISTER_DEVICE_IMPL(pixel_group_impl, CPU, pixel_group_cpu);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/points_in_boxes.cpp
================================================
#include "pytorch_cpp_helper.hpp"

inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz,
                                      float &local_x, float &local_y) {
  float cosa = cos(-rz), sina = sin(-rz);
  local_x = shift_x * cosa + shift_y * (-sina);
  local_y = shift_x * sina + shift_y * cosa;
}

inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d,
                                 float &local_x, float &local_y) {
  // param pt: (x, y, z)
  // param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate,
  // cz in the bottom center
  float x = pt[0], y = pt[1], z = pt[2];
  float cx = box3d[0], cy = box3d[1], cz = box3d[2];
  float x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6];
  cz += z_size /
        2.0;  // shift to the center since cz in box3d is the bottom center

  if (fabsf(z - cz) > z_size / 2.0) return 0;
  lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y);
  float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) &
                  (local_y > -y_size / 2.0) & (local_y < y_size / 2.0);
  return in_flag;
}

void points_in_boxes_cpu_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                 Tensor pts_indices_tensor) {
  // params boxes: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate, z is the bottom center, each box DO NOT overlaps params pts:
  // (npoints, 3) [x, y, z] in LiDAR coordinate params pts_indices: (N, npoints)

  CHECK_CONTIGUOUS(boxes_tensor);
  CHECK_CONTIGUOUS(pts_tensor);
  CHECK_CONTIGUOUS(pts_indices_tensor);

  int boxes_num = boxes_tensor.size(0);
  int pts_num = pts_tensor.size(0);

  const float *boxes = boxes_tensor.data_ptr<float>();
  const float *pts = pts_tensor.data_ptr<float>();
  int *pts_indices = pts_indices_tensor.data_ptr<int>();

  float local_x = 0, local_y = 0;
  for (int i = 0; i < boxes_num; i++) {
    for (int j = 0; j < pts_num; j++) {
      int cur_in_flag =
          check_pt_in_box3d_cpu(pts + j * 3, boxes + i * 7, local_x, local_y);
      pts_indices[i * pts_num + j] = cur_in_flag;
    }
  }
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/psamask.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/hszhao/semseg/blob/master/lib/psa/src
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

#ifndef min
#define min(a, b) (((a) < (b)) ? (a) : (b))
#endif
#ifndef max
#define max(a, b) (((a) > (b)) ? (a) : (b))
#endif

void psamask_collect_forward(const int num_, const int h_feature,
                             const int w_feature, const int h_mask,
                             const int w_mask, const int half_h_mask,
                             const int half_w_mask, const Tensor mask_data,
                             Tensor buffer_data) {
  for (int n = 0; n < num_; n++) {
    for (int h = 0; h < h_feature; h++) {
      for (int w = 0; w < w_feature; w++) {
        // effective mask region : [hstart, hend) x [wstart, wend) with
        // mask-indexed
        const int hstart = max(0, half_h_mask - h);
        const int hend = min(h_mask, h_feature + half_h_mask - h);
        const int wstart = max(0, half_w_mask - w);
        const int wend = min(w_mask, w_feature + half_w_mask - w);
        // (hidx,                    widx                   ) with mask-indexed
        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
        // feature-indexed
        for (int hidx = hstart; hidx < hend; hidx++) {
          for (int widx = wstart; widx < wend; widx++) {
            buffer_data.view({-1})[(n * h_feature * w_feature +
                                    (hidx + h - half_h_mask) * w_feature +
                                    (widx + w - half_w_mask)) *
                                       h_feature * w_feature +
                                   h * w_feature + w] =
                mask_data.view(
                    {-1})[((n * h_mask * w_mask + hidx * w_mask + widx) *
                               h_feature +
                           h) *
                              w_feature +
                          w];
          }
        }
      }
    }
  }
}

void psamask_distribute_forward(const int num_, const int h_feature,
                                const int w_feature, const int h_mask,
                                const int w_mask, const int half_h_mask,
                                const int half_w_mask, const Tensor mask_data,
                                Tensor buffer_data) {
  for (int n = 0; n < num_; n++) {
    for (int h = 0; h < h_feature; h++) {
      for (int w = 0; w < w_feature; w++) {
        // effective mask region : [hstart, hend) x [wstart, wend) with
        // mask-indexed
        const int hstart = max(0, half_h_mask - h);
        const int hend = min(h_mask, h_feature + half_h_mask - h);
        const int wstart = max(0, half_w_mask - w);
        const int wend = min(w_mask, w_feature + half_w_mask - w);
        // (hidx,                    widx                   ) with mask-indexed
        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
        // feature-indexed
        for (int hidx = hstart; hidx < hend; hidx++) {
          for (int widx = wstart; widx < wend; widx++) {
            buffer_data.view(
                {-1})[(n * h_feature * w_feature + h * w_feature + w) *
                          h_feature * w_feature +
                      (hidx + h - half_h_mask) * w_feature +
                      (widx + w - half_w_mask)] =
                mask_data.view(
                    {-1})[((n * h_mask * w_mask + hidx * w_mask + widx) *
                               h_feature +
                           h) *
                              w_feature +
                          w];
          }
        }
      }
    }
  }
}

void psamask_collect_backward(const int num_, const int h_feature,
                              const int w_feature, const int h_mask,
                              const int w_mask, const int half_h_mask,
                              const int half_w_mask, const Tensor buffer_diff,
                              Tensor mask_diff) {
  for (int n = 0; n < num_; n++) {
    for (int h = 0; h < h_feature; h++) {
      for (int w = 0; w < w_feature; w++) {
        // effective mask region : [hstart, hend) x [wstart, wend) with
        // mask-indexed
        const int hstart = max(0, half_h_mask - h);
        const int hend = min(h_mask, h_feature + half_h_mask - h);
        const int wstart = max(0, half_w_mask - w);
        const int wend = min(w_mask, w_feature + half_w_mask - w);
        // (hidx,                    widx                   ) with mask-indexed
        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
        // feature-indexed
        for (int hidx = hstart; hidx < hend; hidx++) {
          for (int widx = wstart; widx < wend; widx++) {
            mask_diff.view({-1})[((n * h_mask * w_mask + hidx * w_mask + widx) *
                                      h_feature +
                                  h) *
                                     w_feature +
                                 w] =
                buffer_diff.view({-1})[(n * h_feature * w_feature +
                                        (hidx + h - half_h_mask) * w_feature +
                                        (widx + w - half_w_mask)) *
                                           h_feature * w_feature +
                                       h * w_feature + w];
          }
        }
      }
    }
  }
}

void psamask_distribute_backward(const int num_, const int h_feature,
                                 const int w_feature, const int h_mask,
                                 const int w_mask, const int half_h_mask,
                                 const int half_w_mask,
                                 const Tensor buffer_diff, Tensor mask_diff) {
  for (int n = 0; n < num_; n++) {
    for (int h = 0; h < h_feature; h++) {
      for (int w = 0; w < w_feature; w++) {
        // effective mask region : [hstart, hend) x [wstart, wend) with
        // mask-indexed
        const int hstart = max(0, half_h_mask - h);
        const int hend = min(h_mask, h_feature + half_h_mask - h);
        const int wstart = max(0, half_w_mask - w);
        const int wend = min(w_mask, w_feature + half_w_mask - w);
        // (hidx,                    widx                   ) with mask-indexed
        // (hidx + h - half_h_mask, widx + w - half_w_mask) with
        // feature-indexed
        for (int hidx = hstart; hidx < hend; hidx++) {
          for (int widx = wstart; widx < wend; widx++) {
            mask_diff.view({-1})[((n * h_mask * w_mask + hidx * w_mask + widx) *
                                      h_feature +
                                  h) *
                                     w_feature +
                                 w] =
                buffer_diff.view(
                    {-1})[(n * h_feature * w_feature + h * w_feature + w) *
                              h_feature * w_feature +
                          (hidx + h - half_h_mask) * w_feature +
                          (widx + w - half_w_mask)];
          }
        }
      }
    }
  }
}

void psamask_forward_cpu(const int psa_type, const Tensor input, Tensor output,
                         const int num_, const int h_feature,
                         const int w_feature, const int h_mask,
                         const int w_mask, const int half_h_mask,
                         const int half_w_mask) {
  if (psa_type == 0)
    psamask_collect_forward(num_, h_feature, w_feature, h_mask, w_mask,
                            half_h_mask, half_w_mask, input, output);
  else
    psamask_distribute_forward(num_, h_feature, w_feature, h_mask, w_mask,
                               half_h_mask, half_w_mask, input, output);
}

void psamask_backward_cpu(const int psa_type, const Tensor grad_output,
                          Tensor grad_input, const int num_,
                          const int h_feature, const int w_feature,
                          const int h_mask, const int w_mask,
                          const int half_h_mask, const int half_w_mask) {
  if (psa_type == 0)
    psamask_collect_backward(num_, h_feature, w_feature, h_mask, w_mask,
                             half_h_mask, half_w_mask, grad_output, grad_input);
  else
    psamask_distribute_backward(num_, h_feature, w_feature, h_mask, w_mask,
                                half_h_mask, half_w_mask, grad_output,
                                grad_input);
}

void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output,
                          const int num_, const int h_feature,
                          const int w_feature, const int h_mask,
                          const int w_mask, const int half_h_mask,
                          const int half_w_mask);

void psamask_backward_impl(const int psa_type, const Tensor grad_output,
                           Tensor grad_input, const int num_,
                           const int h_feature, const int w_feature,
                           const int h_mask, const int w_mask,
                           const int half_h_mask, const int half_w_mask);
REGISTER_DEVICE_IMPL(psamask_forward_impl, CPU, psamask_forward_cpu);
REGISTER_DEVICE_IMPL(psamask_backward_impl, CPU, psamask_backward_cpu);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/roi_align.cpp
================================================
// Modified from
// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlign
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

// implementation taken from Caffe2
template <typename T>
struct PreCalc {
  int pos1;
  int pos2;
  int pos3;
  int pos4;
  T w1;
  T w2;
  T w3;
  T w4;
};

template <typename T>
void pre_calc_for_bilinear_interpolate(
    const int height, const int width, const int pooled_height,
    const int pooled_width, const int iy_upper, const int ix_upper,
    T roi_start_h, T roi_start_w, T bin_size_h, T bin_size_w,
    int roi_bin_grid_h, int roi_bin_grid_w, std::vector<PreCalc<T>>& pre_calc) {
  int pre_calc_index = 0;
  for (int ph = 0; ph < pooled_height; ph++) {
    for (int pw = 0; pw < pooled_width; pw++) {
      for (int iy = 0; iy < iy_upper; iy++) {
        const T yy = roi_start_h + ph * bin_size_h +
                     static_cast<T>(iy + .5f) * bin_size_h /
                         static_cast<T>(roi_bin_grid_h);  // e.g., 0.5, 1.5
        for (int ix = 0; ix < ix_upper; ix++) {
          const T xx = roi_start_w + pw * bin_size_w +
                       static_cast<T>(ix + .5f) * bin_size_w /
                           static_cast<T>(roi_bin_grid_w);

          T x = xx;
          T y = yy;
          // deal with: inverse elements are out of feature map boundary
          if (y < -1.0 || y > height || x < -1.0 || x > width) {
            // empty
            PreCalc<T> pc;
            pc.pos1 = 0;
            pc.pos2 = 0;
            pc.pos3 = 0;
            pc.pos4 = 0;
            pc.w1 = 0;
            pc.w2 = 0;
            pc.w3 = 0;
            pc.w4 = 0;
            pre_calc[pre_calc_index] = pc;
            pre_calc_index += 1;
            continue;
          }

          if (y <= 0) {
            y = 0;
          }
          if (x <= 0) {
            x = 0;
          }

          int y_low = (int)y;
          int x_low = (int)x;
          int y_high;
          int x_high;

          if (y_low >= height - 1) {
            y_high = y_low = height - 1;
            y = (T)y_low;
          } else {
            y_high = y_low + 1;
          }

          if (x_low >= width - 1) {
            x_high = x_low = width - 1;
            x = (T)x_low;
          } else {
            x_high = x_low + 1;
          }

          T ly = y - y_low;
          T lx = x - x_low;
          T hy = 1. - ly, hx = 1. - lx;
          T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

          // save weights and indices
          PreCalc<T> pc;
          pc.pos1 = y_low * width + x_low;
          pc.pos2 = y_low * width + x_high;
          pc.pos3 = y_high * width + x_low;
          pc.pos4 = y_high * width + x_high;
          pc.w1 = w1;
          pc.w2 = w2;
          pc.w3 = w3;
          pc.w4 = w4;
          pre_calc[pre_calc_index] = pc;

          pre_calc_index += 1;
        }
      }
    }
  }
}

template <typename T>
void ROIAlignForward(const int nthreads, const T* input, const T* rois,
                     T* output, T* argmax_y, T* argmax_x,
                     const int pooled_height, const int pooled_width,
                     const T spatial_scale, const int sampling_ratio,
                     const int pool_mode,  // 0 - max pool, 1 - avg pool
                     const bool aligned, const int channels, const int height,
                     const int width) {
  int n_rois = nthreads / channels / pooled_width / pooled_height;
  // (n, c, ph, pw) is an element in the pooled output
  // can be parallelized using omp
  // #pragma omp parallel for num_threads(32)
  for (int n = 0; n < n_rois; n++) {
    int index_n = n * channels * pooled_width * pooled_height;

    const T* offset_rois = rois + n * 5;
    int roi_batch_ind = offset_rois[0];

    // Do not use rounding; this implementation detail is critical
    T offset = aligned ? (T)0.5 : (T)0.0;
    T roi_start_w = offset_rois[1] * spatial_scale - offset;
    T roi_start_h = offset_rois[2] * spatial_scale - offset;
    T roi_end_w = offset_rois[3] * spatial_scale - offset;
    T roi_end_h = offset_rois[4] * spatial_scale - offset;

    T roi_width = roi_end_w - roi_start_w;
    T roi_height = roi_end_h - roi_start_h;
    if (aligned) {
      AT_ASSERTM(roi_width >= 0 && roi_height >= 0,
                 "ROIs in ROIAlign cannot have non-negative size!");
    } else {  // for backward-compatibility only
      roi_width = std::max(roi_width, (T)1.);
      roi_height = std::max(roi_height, (T)1.);
    }
    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h = (sampling_ratio > 0)
                             ? sampling_ratio
                             : ceilf(roi_height / pooled_height);  // e.g., = 2
    int roi_bin_grid_w =
        (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width);

    // When the grid is empty, output zeros == 0/1, instead of NaN.
    const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1);  // e.g. = 4

    // we want to precalculate indices and weights shared by all channels,
    // this is the key point of optimization
    std::vector<PreCalc<T>> pre_calc(roi_bin_grid_h * roi_bin_grid_w *
                                     pooled_width * pooled_height);
    pre_calc_for_bilinear_interpolate(
        height, width, pooled_height, pooled_width, roi_bin_grid_h,
        roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w,
        roi_bin_grid_h, roi_bin_grid_w, pre_calc);

    for (int c = 0; c < channels; c++) {
      int index_n_c = index_n + c * pooled_width * pooled_height;
      const T* offset_input =
          input + (roi_batch_ind * channels + c) * height * width;
      int pre_calc_index = 0;

      for (int ph = 0; ph < pooled_height; ph++) {
        for (int pw = 0; pw < pooled_width; pw++) {
          int index = index_n_c + ph * pooled_width + pw;

          T output_val = 0.;
          T maxval = -10000;
          T maxidx_y = -1.f, maxidx_x = -1.f;
          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
            const T y = roi_start_h + ph * bin_size_h +
                        static_cast<T>(iy + .5f) * bin_size_h /
                            static_cast<T>(roi_bin_grid_h);
            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
              const T x = roi_start_w + pw * bin_size_w +
                          static_cast<T>(ix + .5f) * bin_size_w /
                              static_cast<T>(roi_bin_grid_w);
              PreCalc<T> pc = pre_calc[pre_calc_index];
              T val = pc.w1 * offset_input[pc.pos1] +
                      pc.w2 * offset_input[pc.pos2] +
                      pc.w3 * offset_input[pc.pos3] +
                      pc.w4 * offset_input[pc.pos4];
              if (val > maxval) {
                maxval = val;
                maxidx_y = y;
                maxidx_x = x;
              }
              output_val += val;
              pre_calc_index += 1;
            }
          }
          if (pool_mode == 0) {
            // We do max pooling inside a bin
            output[index] = maxval;
            argmax_y[index] = maxidx_y;
            argmax_x[index] = maxidx_x;
          } else if (pool_mode == 1) {
            // We do average (integral) pooling inside a bin
            output[index] = output_val / count;
          }  // if
        }    // for pw
      }      // for ph
    }        // for c
  }          // for n
}

template <typename T>
void bilinear_interpolate_gradient(const int height, const int width, T y, T x,
                                   T& w1, T& w2, T& w3, T& w4, int& x_low,
                                   int& x_high, int& y_low, int& y_high,
                                   const int index /* index for debug only*/) {
  // deal with cases that inverse elements are out of feature map boundary
  if (y < -1.0 || y > height || x < -1.0 || x > width) {
    // empty
    w1 = w2 = w3 = w4 = 0.;
    x_low = x_high = y_low = y_high = -1;
    return;
  }

  if (y <= 0) y = 0;
  if (x <= 0) x = 0;

  y_low = (int)y;
  x_low = (int)x;

  if (y_low >= height - 1) {
    y_high = y_low = height - 1;
    y = (T)y_low;
  } else {
    y_high = y_low + 1;
  }

  if (x_low >= width - 1) {
    x_high = x_low = width - 1;
    x = (T)x_low;
  } else {
    x_high = x_low + 1;
  }

  T ly = y - y_low;
  T lx = x - x_low;
  T hy = 1. - ly, hx = 1. - lx;

  // reference in forward
  // T v1 = input[y_low * width + x_low];
  // T v2 = input[y_low * width + x_high];
  // T v3 = input[y_high * width + x_low];
  // T v4 = input[y_high * width + x_high];
  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);

  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

  return;
}

template <class T>
inline void add(T* address, const T& val) {
  *address += val;
}

template <typename T>
void ROIAlignBackward(const int nthreads, const T* grad_output, const T* rois,
                      const T* argmax_y, const T* argmax_x, T* grad_input,
                      const int pooled_height, const int pooled_width,
                      const T spatial_scale, const int sampling_ratio,
                      const int pool_mode,  // 0 - max pool, 1 - avg pool
                      const bool aligned, const int channels, const int height,
                      const int width, const int n_stride, const int c_stride,
                      const int h_stride, const int w_stride) {
  for (int index = 0; index < nthreads; index++) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    const T* offset_rois = rois + n * 5;
    int roi_batch_ind = offset_rois[0];

    // Do not use rounding; this implementation detail is critical
    T offset = aligned ? (T)0.5 : (T)0.0;
    T roi_start_w = offset_rois[1] * spatial_scale - offset;
    T roi_start_h = offset_rois[2] * spatial_scale - offset;
    T roi_end_w = offset_rois[3] * spatial_scale - offset;
    T roi_end_h = offset_rois[4] * spatial_scale - offset;

    T roi_width = roi_end_w - roi_start_w;
    T roi_height = roi_end_h - roi_start_h;
    if (aligned) {
      AT_ASSERTM(roi_width >= 0 && roi_height >= 0,
                 "ROIs in ROIAlign do not have non-negative size!");
    } else {  // for backward-compatibility only
      roi_width = std::max(roi_width, (T)1.);
      roi_height = std::max(roi_height, (T)1.);
    }
    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

    T* offset_grad_input =
        grad_input + ((roi_batch_ind * channels + c) * height * width);

    int output_offset = n * n_stride + c * c_stride;
    const T* offset_grad_output = grad_output + output_offset;
    const T grad_output_this_bin =
        offset_grad_output[ph * h_stride + pw * w_stride];

    if (pool_mode == 0) {
      // We do max pooling inside a bin
      T y = argmax_y[index], x = argmax_x[index];
      if (y != -1.f) {
        T w1, w2, w3, w4;
        int x_low, x_high, y_low, y_high;
        bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
                                      x_low, x_high, y_low, y_high, index);

        T g1 = grad_output_this_bin * w1;
        T g2 = grad_output_this_bin * w2;
        T g3 = grad_output_this_bin * w3;
        T g4 = grad_output_this_bin * w4;

        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
          // atomic add is not needed for now since it is single threaded
          add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
          add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
          add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
          add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
        }  // if
      }    // mode
    } else if (pool_mode == 1) {
      // We do average (integral) pooling inside a bin
      // We use roi_bin_grid to sample the grid and mimic integral
      int roi_bin_grid_h =
          (sampling_ratio > 0)
              ? sampling_ratio
              : ceilf(roi_height / pooled_height);  // e.g., = 2
      int roi_bin_grid_w = (sampling_ratio > 0)
                               ? sampling_ratio
                               : ceilf(roi_width / pooled_width);

      const T count = roi_bin_grid_h * roi_bin_grid_w;  // e.g. = 4
      for (int iy = 0; iy < roi_bin_grid_h; iy++) {
        const T y = roi_start_h + ph * bin_size_h +
                    static_cast<T>(iy + .5f) * bin_size_h /
                        static_cast<T>(roi_bin_grid_h);  // e.g., 0.5, 1.5
        for (int ix = 0; ix < roi_bin_grid_w; ix++) {
          const T x = roi_start_w + pw * bin_size_w +
                      static_cast<T>(ix + .5f) * bin_size_w /
                          static_cast<T>(roi_bin_grid_w);

          T w1, w2, w3, w4;
          int x_low, x_high, y_low, y_high;

          bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
                                        x_low, x_high, y_low, y_high, index);

          T g1 = grad_output_this_bin * w1 / count;
          T g2 = grad_output_this_bin * w2 / count;
          T g3 = grad_output_this_bin * w3 / count;
          T g4 = grad_output_this_bin * w4 / count;

          if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
            // atomic add is not needed for now since it is single threaded
            add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
            add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
            add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
            add(offset_grad_input + y_high * width + x_high,
                static_cast<T>(g4));
          }  // if
        }    // ix
      }      // iy
    }        // mode
  }          // for
}  // ROIAlignBackward

void ROIAlignForwardCPULauncher(Tensor input, Tensor rois, Tensor output,
                                Tensor argmax_y, Tensor argmax_x,
                                int aligned_height, int aligned_width,
                                float spatial_scale, int sampling_ratio,
                                int pool_mode, bool aligned) {
  int output_size = output.numel();
  int channels = input.size(1);
  int height = input.size(2);
  int width = input.size(3);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "ROIAlign_forward", [&] {
        ROIAlignForward<scalar_t>(
            output_size, input.data_ptr<scalar_t>(), rois.data_ptr<scalar_t>(),
            output.data_ptr<scalar_t>(), argmax_y.data_ptr<scalar_t>(),
            argmax_x.data_ptr<scalar_t>(), aligned_height, aligned_width,
            static_cast<scalar_t>(spatial_scale), sampling_ratio, pool_mode,
            aligned, channels, height, width);
      });
}

void ROIAlignBackwardCPULauncher(Tensor grad_output, Tensor rois,
                                 Tensor argmax_y, Tensor argmax_x,
                                 Tensor grad_input, int aligned_height,
                                 int aligned_width, float spatial_scale,
                                 int sampling_ratio, int pool_mode,
                                 bool aligned) {
  int output_size = grad_output.numel();
  int channels = grad_input.size(1);
  int height = grad_input.size(2);
  int width = grad_input.size(3);

  // get stride values to ensure indexing into gradients is correct.
  int n_stride = grad_output.stride(0);
  int c_stride = grad_output.stride(1);
  int h_stride = grad_output.stride(2);
  int w_stride = grad_output.stride(3);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_output.scalar_type(), "ROIAlign_backward", [&] {
        ROIAlignBackward<scalar_t>(
            output_size, grad_output.data_ptr<scalar_t>(),
            rois.data_ptr<scalar_t>(), argmax_y.data_ptr<scalar_t>(),
            argmax_x.data_ptr<scalar_t>(), grad_input.data_ptr<scalar_t>(),
            aligned_height, aligned_width, static_cast<scalar_t>(spatial_scale),
            sampling_ratio, pool_mode, aligned, channels, height, width,
            n_stride, c_stride, h_stride, w_stride);
      });
}

void roi_align_forward_cpu(Tensor input, Tensor rois, Tensor output,
                           Tensor argmax_y, Tensor argmax_x, int aligned_height,
                           int aligned_width, float spatial_scale,
                           int sampling_ratio, int pool_mode, bool aligned) {
  ROIAlignForwardCPULauncher(input, rois, output, argmax_y, argmax_x,
                             aligned_height, aligned_width, spatial_scale,
                             sampling_ratio, pool_mode, aligned);
}

void roi_align_backward_cpu(Tensor grad_output, Tensor rois, Tensor argmax_y,
                            Tensor argmax_x, Tensor grad_input,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned) {
  ROIAlignBackwardCPULauncher(grad_output, rois, argmax_y, argmax_x, grad_input,
                              aligned_height, aligned_width, spatial_scale,
                              sampling_ratio, pool_mode, aligned);
}

void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned);

void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
                             Tensor argmax_x, Tensor grad_input,
                             int aligned_height, int aligned_width,
                             float spatial_scale, int sampling_ratio,
                             int pool_mode, bool aligned);

REGISTER_DEVICE_IMPL(roi_align_forward_impl, CPU, roi_align_forward_cpu);
REGISTER_DEVICE_IMPL(roi_align_backward_impl, CPU, roi_align_backward_cpu);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/roi_align_rotated.cpp
================================================
// Modified from
// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlignRotated
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

// implementation taken from Caffe2
template <typename T>
struct PreCalc {
  int pos1;
  int pos2;
  int pos3;
  int pos4;
  T w1;
  T w2;
  T w3;
  T w4;
};

template <typename T>
void pre_calc_for_bilinear_interpolate(
    const int height, const int width, const int pooled_height,
    const int pooled_width, const int iy_upper, const int ix_upper,
    T roi_start_h, T roi_start_w, T bin_size_h, T bin_size_w,
    int roi_bin_grid_h, int roi_bin_grid_w, T roi_center_h, T roi_center_w,
    T cos_theta, T sin_theta, std::vector<PreCalc<T>>& pre_calc) {
  int pre_calc_index = 0;
  for (int ph = 0; ph < pooled_height; ph++) {
    for (int pw = 0; pw < pooled_width; pw++) {
      for (int iy = 0; iy < iy_upper; iy++) {
        const T yy = roi_start_h + ph * bin_size_h +
                     static_cast<T>(iy + .5f) * bin_size_h /
                         static_cast<T>(roi_bin_grid_h);  // e.g., 0.5, 1.5
        for (int ix = 0; ix < ix_upper; ix++) {
          const T xx = roi_start_w + pw * bin_size_w +
                       static_cast<T>(ix + .5f) * bin_size_w /
                           static_cast<T>(roi_bin_grid_w);

          // Rotate by theta around the center and translate
          // In image space, (y, x) is the order for Right Handed System,
          // and this is essentially multiplying the point by a rotation matrix
          // to rotate it counterclockwise through angle theta.
          T y = yy * cos_theta - xx * sin_theta + roi_center_h;
          T x = yy * sin_theta + xx * cos_theta + roi_center_w;
          // deal with: inverse elements are out of feature map boundary
          if (y < -1.0 || y > height || x < -1.0 || x > width) {
            // empty
            PreCalc<T> pc;
            pc.pos1 = 0;
            pc.pos2 = 0;
            pc.pos3 = 0;
            pc.pos4 = 0;
            pc.w1 = 0;
            pc.w2 = 0;
            pc.w3 = 0;
            pc.w4 = 0;
            pre_calc[pre_calc_index] = pc;
            pre_calc_index += 1;
            continue;
          }

          if (y < 0) {
            y = 0;
          }
          if (x < 0) {
            x = 0;
          }

          int y_low = (int)y;
          int x_low = (int)x;
          int y_high;
          int x_high;

          if (y_low >= height - 1) {
            y_high = y_low = height - 1;
            y = (T)y_low;
          } else {
            y_high = y_low + 1;
          }

          if (x_low >= width - 1) {
            x_high = x_low = width - 1;
            x = (T)x_low;
          } else {
            x_high = x_low + 1;
          }

          T ly = y - y_low;
          T lx = x - x_low;
          T hy = 1. - ly, hx = 1. - lx;
          T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

          // save weights and indices
          PreCalc<T> pc;
          pc.pos1 = y_low * width + x_low;
          pc.pos2 = y_low * width + x_high;
          pc.pos3 = y_high * width + x_low;
          pc.pos4 = y_high * width + x_high;
          pc.w1 = w1;
          pc.w2 = w2;
          pc.w3 = w3;
          pc.w4 = w4;
          pre_calc[pre_calc_index] = pc;

          pre_calc_index += 1;
        }
      }
    }
  }
}

template <typename T>
void ROIAlignRotatedForward(const int nthreads, const T* input,
                            const T& spatial_scale, const bool aligned,
                            const bool clockwise, const int channels,
                            const int height, const int width,
                            const int pooled_height, const int pooled_width,
                            const int sampling_ratio, const T* rois,
                            T* output) {
  int n_rois = nthreads / channels / pooled_width / pooled_height;
  // (n, c, ph, pw) is an element in the pooled output
  // can be parallelized using omp
  // #pragma omp parallel for num_threads(32)
  for (int n = 0; n < n_rois; n++) {
    int index_n = n * channels * pooled_width * pooled_height;

    const T* current_roi = rois + n * 6;
    int roi_batch_ind = current_roi[0];

    // Do not use rounding; this implementation detail is critical
    T offset = aligned ? (T)0.5 : (T)0.0;
    T roi_center_w = current_roi[1] * spatial_scale - offset;
    T roi_center_h = current_roi[2] * spatial_scale - offset;
    T roi_width = current_roi[3] * spatial_scale;
    T roi_height = current_roi[4] * spatial_scale;
    T theta = current_roi[5];
    if (clockwise) {
      theta = -theta;  // If clockwise, the angle needs to be reversed.
    }
    T cos_theta = cos(theta);
    T sin_theta = sin(theta);

    if (aligned) {
      AT_ASSERTM(roi_width >= 0 && roi_height >= 0,
                 "ROIs in ROIAlignRotated do not have non-negative size!");
    } else {  // for backward-compatibility only
      roi_width = std::max(roi_width, (T)1.);
      roi_height = std::max(roi_height, (T)1.);
    }

    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h = (sampling_ratio > 0)
                             ? sampling_ratio
                             : ceilf(roi_height / pooled_height);  // e.g., = 2
    int roi_bin_grid_w =
        (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width);

    // We do average (integral) pooling inside a bin
    const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1);  // e.g. = 4

    // we want to precalculate indices and weights shared by all channels,
    // this is the key point of optimization
    std::vector<PreCalc<T>> pre_calc(roi_bin_grid_h * roi_bin_grid_w *
                                     pooled_width * pooled_height);

    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
    // Appropriate translation needs to be applied after.
    T roi_start_h = -roi_height / 2.0;
    T roi_start_w = -roi_width / 2.0;

    pre_calc_for_bilinear_interpolate(
        height, width, pooled_height, pooled_width, roi_bin_grid_h,
        roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w,
        roi_bin_grid_h, roi_bin_grid_w, roi_center_h, roi_center_w, cos_theta,
        sin_theta, pre_calc);

    for (int c = 0; c < channels; c++) {
      int index_n_c = index_n + c * pooled_width * pooled_height;
      const T* offset_input =
          input + (roi_batch_ind * channels + c) * height * width;
      int pre_calc_index = 0;

      for (int ph = 0; ph < pooled_height; ph++) {
        for (int pw = 0; pw < pooled_width; pw++) {
          int index = index_n_c + ph * pooled_width + pw;

          T output_val = 0.;
          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
              PreCalc<T> pc = pre_calc[pre_calc_index];
              output_val += pc.w1 * offset_input[pc.pos1] +
                            pc.w2 * offset_input[pc.pos2] +
                            pc.w3 * offset_input[pc.pos3] +
                            pc.w4 * offset_input[pc.pos4];

              pre_calc_index += 1;
            }
          }
          output_val /= count;

          output[index] = output_val;
        }  // for pw
      }    // for ph
    }      // for c
  }        // for n
}

template <typename T>
void bilinear_interpolate_gradient(const int height, const int width, T y, T x,
                                   T& w1, T& w2, T& w3, T& w4, int& x_low,
                                   int& x_high, int& y_low, int& y_high) {
  // deal with cases that inverse elements are out of feature map boundary
  if (y < -1.0 || y > height || x < -1.0 || x > width) {
    // empty
    w1 = w2 = w3 = w4 = 0.;
    x_low = x_high = y_low = y_high = -1;
    return;
  }

  if (y < 0) {
    y = 0;
  }

  if (x < 0) {
    x = 0;
  }

  y_low = (int)y;
  x_low = (int)x;

  if (y_low >= height - 1) {
    y_high = y_low = height - 1;
    y = (T)y_low;
  } else {
    y_high = y_low + 1;
  }

  if (x_low >= width - 1) {
    x_high = x_low = width - 1;
    x = (T)x_low;
  } else {
    x_high = x_low + 1;
  }

  T ly = y - y_low;
  T lx = x - x_low;
  T hy = 1. - ly, hx = 1. - lx;

  // reference in forward
  // T v1 = input[y_low * width + x_low];
  // T v2 = input[y_low * width + x_high];
  // T v3 = input[y_high * width + x_low];
  // T v4 = input[y_high * width + x_high];
  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);

  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

  return;
}

template <class T>
inline void add(T* address, const T& val) {
  *address += val;
}

template <typename T>
void ROIAlignRotatedBackward(
    const int nthreads,
    // may not be contiguous. should index using n_stride, etc
    const T* grad_output, const T& spatial_scale, const bool aligned,
    const bool clockwise, const int channels, const int height, const int width,
    const int pooled_height, const int pooled_width, const int sampling_ratio,
    T* grad_input, const T* rois, const int n_stride, const int c_stride,
    const int h_stride, const int w_stride) {
  for (int index = 0; index < nthreads; index++) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    const T* current_roi = rois + n * 6;
    int roi_batch_ind = current_roi[0];

    // Do not use rounding; this implementation detail is critical
    T offset = aligned ? (T)0.5 : (T)0.0;
    T roi_center_w = current_roi[1] * spatial_scale - offset;
    T roi_center_h = current_roi[2] * spatial_scale - offset;
    T roi_width = current_roi[3] * spatial_scale;
    T roi_height = current_roi[4] * spatial_scale;
    T theta = current_roi[5];
    if (clockwise) {
      theta = -theta;  // If clockwise, the angle needs to be reversed.
    }
    T cos_theta = cos(theta);
    T sin_theta = sin(theta);

    if (aligned) {
      AT_ASSERTM(roi_width >= 0 && roi_height >= 0,
                 "ROIs in ROIAlignRotated do not have non-negative size!");
    } else {  // for backward-compatibility only
      roi_width = std::max(roi_width, (T)1.);
      roi_height = std::max(roi_height, (T)1.);
    }

    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

    T* offset_grad_input =
        grad_input + ((roi_batch_ind * channels + c) * height * width);

    int output_offset = n * n_stride + c * c_stride;
    const T* offset_grad_output = grad_output + output_offset;
    const T grad_output_this_bin =
        offset_grad_output[ph * h_stride + pw * w_stride];

    // We use roi_bin_grid to sample the grid and mimic integral
    int roi_bin_grid_h = (sampling_ratio > 0)
                             ? sampling_ratio
                             : ceilf(roi_height / pooled_height);  // e.g., = 2
    int roi_bin_grid_w =
        (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width);

    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
    // Appropriate translation needs to be applied after.
    T roi_start_h = -roi_height / 2.0;
    T roi_start_w = -roi_width / 2.0;

    // We do average (integral) pooling inside a bin
    const T count = roi_bin_grid_h * roi_bin_grid_w;  // e.g. = 4

    for (int iy = 0; iy < roi_bin_grid_h; iy++) {
      const T yy = roi_start_h + ph * bin_size_h +
                   static_cast<T>(iy + .5f) * bin_size_h /
                       static_cast<T>(roi_bin_grid_h);  // e.g., 0.5, 1.5
      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
        const T xx = roi_start_w + pw * bin_size_w +
                     static_cast<T>(ix + .5f) * bin_size_w /
                         static_cast<T>(roi_bin_grid_w);

        // Rotate by theta around the center and translate
        T y = yy * cos_theta - xx * sin_theta + roi_center_h;
        T x = yy * sin_theta + xx * cos_theta + roi_center_w;

        T w1, w2, w3, w4;
        int x_low, x_high, y_low, y_high;

        bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
                                      x_low, x_high, y_low, y_high);

        T g1 = grad_output_this_bin * w1 / count;
        T g2 = grad_output_this_bin * w2 / count;
        T g3 = grad_output_this_bin * w3 / count;
        T g4 = grad_output_this_bin * w4 / count;

        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
          // atomic add is not needed for now since it is single threaded
          add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
          add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
          add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
          add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
        }  // if
      }    // ix
    }      // iy
  }        // for
}  // ROIAlignRotatedBackward

void ROIAlignRotatedForwardCPULauncher(Tensor input, Tensor rois, Tensor output,
                                       int aligned_height, int aligned_width,
                                       float spatial_scale, int sampling_ratio,
                                       bool aligned, bool clockwise) {
  int output_size = output.numel();
  int channels = input.size(1);
  int height = input.size(2);
  int width = input.size(3);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "ROIAlignRotated_forward", [&] {
        ROIAlignRotatedForward<scalar_t>(
            output_size, input.data_ptr<scalar_t>(),
            static_cast<scalar_t>(spatial_scale), aligned, clockwise, channels,
            height, width, aligned_height, aligned_width, sampling_ratio,
            rois.data_ptr<scalar_t>(), output.data_ptr<scalar_t>());
      });
}

void ROIAlignRotatedBackwardCPULauncher(Tensor grad_output, Tensor rois,
                                        Tensor grad_input, int aligned_height,
                                        int aligned_width, float spatial_scale,
                                        int sampling_ratio, bool aligned,
                                        bool clockwise) {
  int channels = grad_input.size(1);
  int height = grad_input.size(2);
  int width = grad_input.size(3);

  // get stride values to ensure indexing into gradients is correct.
  int n_stride = grad_output.stride(0);
  int c_stride = grad_output.stride(1);
  int h_stride = grad_output.stride(2);
  int w_stride = grad_output.stride(3);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_output.scalar_type(), "ROIAlignRotated_backward", [&] {
        ROIAlignRotatedBackward<scalar_t>(
            grad_output.numel(), grad_output.data_ptr<scalar_t>(),
            static_cast<scalar_t>(spatial_scale), aligned, clockwise, channels,
            height, width, aligned_height, aligned_width, sampling_ratio,
            grad_input.data_ptr<scalar_t>(), rois.data_ptr<scalar_t>(),
            n_stride, c_stride, h_stride, w_stride);
      });
}

void roi_align_rotated_forward_cpu(Tensor input, Tensor rois, Tensor output,
                                   int aligned_height, int aligned_width,
                                   float spatial_scale, int sampling_ratio,
                                   bool aligned, bool clockwise) {
  ROIAlignRotatedForwardCPULauncher(input, rois, output, aligned_height,
                                    aligned_width, spatial_scale,
                                    sampling_ratio, aligned, clockwise);
}

void roi_align_rotated_backward_cpu(Tensor top_grad, Tensor rois,
                                    Tensor bottom_grad, int aligned_height,
                                    int aligned_width, float spatial_scale,
                                    int sampling_ratio, bool aligned,
                                    bool clockwise) {
  int size_rois = rois.size(1);
  if (size_rois != 6) {
    AT_ERROR("wrong roi size");
  }
  ROIAlignRotatedBackwardCPULauncher(
      top_grad, rois, bottom_grad, aligned_height, aligned_width, spatial_scale,
      sampling_ratio, aligned, clockwise);
}

void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output,
                                    int aligned_height, int aligned_width,
                                    float spatial_scale, int sample_ratio,
                                    bool aligned, bool clockwise);

void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
                                     Tensor bottom_grad, int aligned_height,
                                     int aligned_width, float spatial_scale,
                                     int sample_ratio, bool aligned,
                                     bool clockwise);
REGISTER_DEVICE_IMPL(roi_align_rotated_forward_impl, CPU,
                     roi_align_rotated_forward_cpu);
REGISTER_DEVICE_IMPL(roi_align_rotated_backward_impl, CPU,
                     roi_align_rotated_backward_cpu);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cpu/voxelization.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

template <typename T, typename T_int>
void dynamic_voxelize_forward_cpu_kernel(
    const torch::TensorAccessor<T, 2> points,
    torch::TensorAccessor<T_int, 2> coors, const std::vector<float> voxel_size,
    const std::vector<float> coors_range, const std::vector<int> grid_size,
    const int num_points, const int num_features, const int NDim) {
  const int ndim_minus_1 = NDim - 1;
  bool failed = false;
  // int coor[NDim];
  int* coor = new int[NDim]();
  int c;

  for (int i = 0; i < num_points; ++i) {
    failed = false;
    for (int j = 0; j < NDim; ++j) {
      c = floor((points[i][j] - coors_range[j]) / voxel_size[j]);
      // necessary to rm points out of range
      if ((c < 0 || c >= grid_size[j])) {
        failed = true;
        break;
      }
      coor[ndim_minus_1 - j] = c;
    }

    if (failed)
      memset(&coors[i][0], -1, NDim * sizeof(T_int));
    else
      memcpy(&coors[i][0], &coor[0], NDim * sizeof(T_int));
  }

  delete[] coor;
}

template <typename T, typename T_int>
void hard_voxelize_forward_cpu_kernel(
    const torch::TensorAccessor<T, 2> points,
    torch::TensorAccessor<T, 3> voxels, torch::TensorAccessor<T_int, 2> coors,
    torch::TensorAccessor<T_int, 1> num_points_per_voxel,
    torch::TensorAccessor<T_int, 3> coor_to_voxelidx, int& voxel_num,
    const std::vector<float> voxel_size, const std::vector<float> coors_range,
    const std::vector<int> grid_size, const int max_points,
    const int max_voxels, const int num_points, const int num_features,
    const int NDim) {
  // declare a temp coors
  at::Tensor temp_coors = at::zeros(
      {num_points, NDim}, at::TensorOptions().dtype(at::kInt).device(at::kCPU));

  // First use dynamic voxelization to get coors,
  // then check max points/voxels constraints
  dynamic_voxelize_forward_cpu_kernel<T, int>(
      points, temp_coors.accessor<int, 2>(), voxel_size, coors_range, grid_size,
      num_points, num_features, NDim);

  int voxelidx, num;
  auto coor = temp_coors.accessor<int, 2>();

  for (int i = 0; i < num_points; ++i) {
    // T_int* coor = temp_coors.data_ptr<int>() + i * NDim;

    if (coor[i][0] == -1) continue;

    voxelidx = coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]];

    // record voxel
    if (voxelidx == -1) {
      voxelidx = voxel_num;
      if (max_voxels != -1 && voxel_num >= max_voxels) continue;
      voxel_num += 1;

      coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]] = voxelidx;
      memcpy(&coors[voxelidx][0], &coor[i][0], NDim * sizeof(T_int));
    }

    // put points into voxel
    num = num_points_per_voxel[voxelidx];
    if (max_points == -1 || num < max_points) {
      memcpy(&voxels[voxelidx][num][0], &points[i][0],
             num_features * sizeof(T));
      num_points_per_voxel[voxelidx] += 1;
    }
  }

  return;
}

void dynamic_voxelize_forward_cpu(const at::Tensor& points, at::Tensor& coors,
                                  const std::vector<float> voxel_size,
                                  const std::vector<float> coors_range,
                                  const int NDim = 3) {
  // check device
  AT_ASSERTM(points.device().is_cpu(), "points must be a CPU tensor");

  std::vector<int> grid_size(NDim);
  const int num_points = points.size(0);
  const int num_features = points.size(1);

  for (int i = 0; i < NDim; ++i) {
    grid_size[i] =
        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
  }

  // coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      points.scalar_type(), "dynamic_voxelize_forward_cpu_kernel", [&] {
        dynamic_voxelize_forward_cpu_kernel<scalar_t, int>(
            points.accessor<scalar_t, 2>(), coors.accessor<int, 2>(),
            voxel_size, coors_range, grid_size, num_points, num_features, NDim);
      });
}

int hard_voxelize_forward_cpu(const at::Tensor& points, at::Tensor& voxels,
                              at::Tensor& coors,
                              at::Tensor& num_points_per_voxel,
                              const std::vector<float> voxel_size,
                              const std::vector<float> coors_range,
                              const int max_points, const int max_voxels,
                              const int NDim = 3) {
  // current version tooks about 0.02s_0.03s for one frame on cpu
  // check device
  AT_ASSERTM(points.device().is_cpu(), "points must be a CPU tensor");

  std::vector<int> grid_size(NDim);
  const int num_points = points.size(0);
  const int num_features = points.size(1);

  for (int i = 0; i < NDim; ++i) {
    grid_size[i] =
        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
  }

  // coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
  // printf("cpu coor_to_voxelidx size: [%d, %d, %d]\n", grid_size[2],
  // grid_size[1], grid_size[0]);
  at::Tensor coor_to_voxelidx =
      -at::ones({grid_size[2], grid_size[1], grid_size[0]}, coors.options());

  int voxel_num = 0;
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      points.scalar_type(), "hard_voxelize_forward_cpu_kernel", [&] {
        hard_voxelize_forward_cpu_kernel<scalar_t, int>(
            points.accessor<scalar_t, 2>(), voxels.accessor<scalar_t, 3>(),
            coors.accessor<int, 2>(), num_points_per_voxel.accessor<int, 1>(),
            coor_to_voxelidx.accessor<int, 3>(), voxel_num, voxel_size,
            coors_range, grid_size, max_points, max_voxels, num_points,
            num_features, NDim);
      });

  return voxel_num;
}

int hard_voxelize_forward_impl(const at::Tensor& points, at::Tensor& voxels,
                               at::Tensor& coors,
                               at::Tensor& num_points_per_voxel,
                               const std::vector<float> voxel_size,
                               const std::vector<float> coors_range,
                               const int max_points, const int max_voxels,
                               const int NDim);

void dynamic_voxelize_forward_impl(const at::Tensor& points, at::Tensor& coors,
                                   const std::vector<float> voxel_size,
                                   const std::vector<float> coors_range,
                                   const int NDim);
REGISTER_DEVICE_IMPL(hard_voxelize_forward_impl, CPU,
                     hard_voxelize_forward_cpu);
REGISTER_DEVICE_IMPL(dynamic_voxelize_forward_impl, CPU,
                     dynamic_voxelize_forward_cpu);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/active_rotated_filter_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/cuda/ActiveRotatingFilter_cuda.cu
#include "active_rotated_filter_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void ActiveRotatedFilterForwardCUDAKernelLauncher(const Tensor input,
                                                  const Tensor indices,
                                                  Tensor output) {
  int num_output_planes = input.size(0);
  int num_input_planes = input.size(1);
  int num_orientations = input.size(2);
  int kH = input.size(3);
  int kW = input.size(4);
  int num_rotations = indices.size(3);
  int nEntry = num_orientations * kH * kW;
  int output_size = input.numel();

  at::cuda::CUDAGuard device_guard(input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "active_rotated_filter_forward_cuda_kernel", [&] {
        active_rotated_filter_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, input.data_ptr<scalar_t>(),
                indices.data_ptr<int>(), num_input_planes, num_output_planes,
                num_orientations, num_rotations, nEntry,
                output.data_ptr<scalar_t>());
      });
  AT_CUDA_CHECK(cudaGetLastError());
}

void ActiveRotatedFilterBackwardCUDAKernelLauncher(const Tensor grad_out,
                                                   const Tensor indices,
                                                   Tensor grad_in) {
  int num_orientations = indices.size(0);
  int kH = indices.size(1);
  int kW = indices.size(2);
  int num_rotations = indices.size(3);
  int num_output_planes = grad_out.size(0) / num_rotations;
  int num_input_planes = grad_out.size(1) / num_orientations;
  int nEntry = num_orientations * kH * kW;
  int output_size = grad_in.numel();

  at::cuda::CUDAGuard device_guard(indices.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_out.scalar_type(), "active_rotated_filter_backward_cuda_kernel",
      [&] {
        active_rotated_filter_backward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, grad_out.data_ptr<scalar_t>(),
                indices.data_ptr<int>(), num_input_planes, num_output_planes,
                num_orientations, num_rotations, nEntry,
                grad_in.data_ptr<scalar_t>());
      });
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/assign_score_withk_cuda.cu
================================================
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu
#include <stdio.h>
#include <stdlib.h>

#include "assign_score_withk_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void AssignScoreWithKForwardCUDAKernelLauncher(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& points, const Tensor& centers, const Tensor& scores,
    const Tensor& knn_idx, Tensor& output) {
  at::cuda::CUDAGuard device_guard(points.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  dim3 blocks(GET_BLOCKS(B * O * N1 * K, THREADS_PER_BLOCK));
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      points.scalar_type(), "assign_score_withk_forward_cuda_kernel", [&] {
        assign_score_withk_forward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, stream>>>(
                B, N0, N1, M, K, O, aggregate, points.data_ptr<scalar_t>(),
                centers.data_ptr<scalar_t>(), scores.data_ptr<scalar_t>(),
                knn_idx.data_ptr<int64_t>(), output.data_ptr<scalar_t>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void AssignScoreWithKBackwardCUDAKernelLauncher(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
    Tensor& grad_centers, Tensor& grad_scores) {
  at::cuda::CUDAGuard device_guard(grad_out.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  dim3 blocks1(GET_BLOCKS(B * M * O, THREADS_PER_BLOCK));
  dim3 threads1(THREADS_PER_BLOCK);
  dim3 blocks2(GET_BLOCKS(B * N1 * K * M, THREADS_PER_BLOCK));
  dim3 threads2(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_out.scalar_type(), "assign_score_withk_points_backward_cuda_kernel",
      [&] {
        assign_score_withk_points_backward_cuda_kernel<scalar_t>
            <<<blocks1, threads1, 0, stream>>>(
                B, N0, N1, M, K, O, aggregate, grad_out.data_ptr<scalar_t>(),
                scores.data_ptr<scalar_t>(), knn_idx.data_ptr<int64_t>(),
                grad_points.data_ptr<scalar_t>(),
                grad_centers.data_ptr<scalar_t>());
      });

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_out.scalar_type(), "assign_score_withk_scores_backward_cuda_kernel",
      [&] {
        assign_score_withk_scores_backward_cuda_kernel<scalar_t>
            <<<blocks2, threads2, 0, stream>>>(
                B, N0, N1, M, K, O, aggregate, grad_out.data_ptr<scalar_t>(),
                points.data_ptr<scalar_t>(), centers.data_ptr<scalar_t>(),
                knn_idx.data_ptr<int64_t>(), grad_scores.data_ptr<scalar_t>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/ball_query_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query_gpu.cu

#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#include "ball_query_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void BallQueryForwardCUDAKernelLauncher(int b, int n, int m, float min_radius,
                                        float max_radius, int nsample,
                                        const Tensor new_xyz, const Tensor xyz,
                                        Tensor idx) {
  // new_xyz: (B, M, 3)
  // xyz: (B, N, 3)
  // output:
  //      idx: (B, M, nsample)

  at::cuda::CUDAGuard device_guard(new_xyz.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(m, THREADS_PER_BLOCK), b);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      new_xyz.scalar_type(), "ball_query_forward_cuda_kernel", [&] {
        ball_query_forward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, stream>>>(
                b, n, m, min_radius, max_radius, nsample,
                new_xyz.data_ptr<scalar_t>(), xyz.data_ptr<scalar_t>(),
                idx.data_ptr<int>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/bbox_overlaps_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "bbox_overlaps_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2,
                                    Tensor ious, const int mode,
                                    const bool aligned, const int offset) {
  int output_size = ious.numel();
  int num_bbox1 = bboxes1.size(0);
  int num_bbox2 = bboxes2.size(0);

  at::cuda::CUDAGuard device_guard(bboxes1.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      bboxes1.scalar_type(), "bbox_overlaps_cuda_kernel", ([&] {
        bbox_overlaps_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                bboxes1.data_ptr<scalar_t>(), bboxes2.data_ptr<scalar_t>(),
                ious.data_ptr<scalar_t>(), num_bbox1, num_bbox2, mode, aligned,
                offset);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/border_align_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "border_align_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void BorderAlignForwardCUDAKernelLauncher(const Tensor &input,
                                          const Tensor &boxes, Tensor output,
                                          Tensor argmax_idx,
                                          const int pool_size) {
  // shape assertion
  AT_ASSERTM(input.ndimension() == 4,
             "non-empty 4D(batch mode) tensor expected for input feature");
  AT_ASSERTM(boxes.ndimension() == 3,
             "boxes must be 3D tensor with size of [B, H*W, 4]");

  int batch_size = input.size(0);
  int feat_channels = input.size(1);
  int channels = feat_channels / 4;
  int height = input.size(2);
  int width = input.size(3);
  // shape [N, box_size, 4] for boxes. (x1, y1, x2, y2) format
  int box_size = boxes.size(1);
  // shape [N, channels, box_size, 4] for output
  int nthreads = batch_size * channels * box_size;

  at::cuda::CUDAGuard device_guard(input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  dim3 block(128, 4);
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "border_align_forward_cuda_kernel", [&] {
        border_align_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(nthreads), block, 0, stream>>>(
                nthreads, input.data_ptr<scalar_t>(),
                boxes.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
                argmax_idx.data_ptr<int>(), channels, box_size, height, width,
                pool_size);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void BorderAlignBackwardCUDAKernelLauncher(const Tensor &grad_output,
                                           const Tensor &boxes,
                                           const Tensor &argmax_idx,
                                           Tensor grad_input,
                                           const int pool_size) {
  int batch_size = grad_input.size(0);
  int feat_channels = grad_input.size(1);
  int channels = feat_channels / 4;
  int height = grad_input.size(2);
  int width = grad_input.size(3);
  int box_size = boxes.size(1);
  int nthreads = batch_size * channels * box_size;

  at::cuda::CUDAGuard device_guard(grad_output.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  dim3 block(128, 4);
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_output.scalar_type(), "border_align_backward_cuda_kernel", [&] {
        border_align_backward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(nthreads), block, 0, stream>>>(
                nthreads, grad_output.data_ptr<scalar_t>(),
                boxes.data_ptr<scalar_t>(), argmax_idx.data_ptr<int>(),
                grad_input.data_ptr<scalar_t>(), channels, box_size, height,
                width, pool_size);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/box_iou_rotated_cuda.cu
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
#include "box_iou_rotated_cuda.cuh"
#include "pytorch_cuda_helper.hpp"

void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                          const int mode_flag, const bool aligned) {
  using scalar_t = float;
  AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor");
  AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor");

  int output_size = ious.numel();
  int num_boxes1 = boxes1.size(0);
  int num_boxes2 = boxes2.size(0);

  at::cuda::CUDAGuard device_guard(boxes1.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  box_iou_rotated_cuda_kernel<scalar_t>
      <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
          num_boxes1, num_boxes2, boxes1.data_ptr<scalar_t>(),
          boxes2.data_ptr<scalar_t>(), (scalar_t*)ious.data_ptr<scalar_t>(),
          mode_flag, aligned);
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/carafe_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "carafe_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks,
                                     Tensor rfeatures, Tensor routput,
                                     Tensor rmasks, Tensor output,
                                     const int kernel_size,
                                     const int group_size,
                                     const int scale_factor) {
  const int batch_size = output.size(0);
  const int channels = output.size(1);
  const int output_height = output.size(2);
  const int output_width = output.size(3);

  const int input_height = features.size(2);
  const int input_width = features.size(3);

  const int mask_channels = masks.size(1);

  rfeatures.resize_({batch_size, input_height, input_width, channels});
  routput.resize_({batch_size, output_height, output_width, channels});
  rmasks.resize_({batch_size, output_height, output_width, mask_channels});

  // one warp per pixel
  at::cuda::CUDAGuard device_guard(features.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      features.scalar_type(), "NCHW2NHWC_Feature", ([&] {
        const scalar_t *bottom_data = features.data_ptr<scalar_t>();
        scalar_t *top_data = rfeatures.data_ptr<scalar_t>();
        const int dh = divideUP(channels, kTileDim);
        const int dw = divideUP(input_height * input_width, kTileDim);
        BatchTranspose2DCUDAKernel<scalar_t>
            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
                batch_size, channels, input_height * input_width, dh, dw,
                bottom_data, top_data);
      }));
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      features.scalar_type(), "NCHW2NHWC_Masks", ([&] {
        const scalar_t *bottom_data = masks.data_ptr<scalar_t>();
        scalar_t *top_data = rmasks.data_ptr<scalar_t>();
        const int dh = divideUP(mask_channels, kTileDim);
        const int dw = divideUP(output_height * output_width, kTileDim);
        BatchTranspose2DCUDAKernel<scalar_t>
            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
                batch_size, mask_channels, output_height * output_width, dh, dw,
                bottom_data, top_data);
      }));
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      features.scalar_type(), "CARAFELaucherForward", ([&] {
        const int num_kernels =
            batch_size * output_height * output_width * THREADS_PER_PIXEL;
        const scalar_t *bottom_data = rfeatures.data_ptr<scalar_t>();
        const scalar_t *bottom_masks = rmasks.data_ptr<scalar_t>();
        scalar_t *top_data = routput.data_ptr<scalar_t>();

        CARAFEForward<scalar_t><<<divideUP(num_kernels, THREADS_PER_BLOCK),
                                  THREADS_PER_BLOCK, 0, stream>>>(
            num_kernels, bottom_data, bottom_masks, kernel_size, group_size,
            scale_factor, channels, input_height, input_width, output_height,
            output_width, mask_channels, top_data);
      }));
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      features.scalar_type(), "NHWC2NCHW", ([&] {
        const scalar_t *bottom_data = routput.data_ptr<scalar_t>();
        scalar_t *top_data = output.data_ptr<scalar_t>();
        const int dh = divideUP(output_height * output_width, kTileDim);
        const int dw = divideUP(channels, kTileDim);
        BatchTranspose2DCUDAKernel<scalar_t>
            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
                batch_size, output_height * output_width, channels, dh, dw,
                bottom_data, top_data);
      }));

  AT_CUDA_CHECK(cudaGetLastError());
}

void CARAFEBackwardCUDAKernelLauncher(
    const Tensor top_grad, const Tensor rfeatures, const Tensor masks,
    Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad,
    Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad,
    const int kernel_size, const int group_size, const int scale_factor) {
  const int batch_size = top_grad.size(0);
  const int channels = top_grad.size(1);
  const int output_height = top_grad.size(2);
  const int output_width = top_grad.size(3);

  const int input_height = bottom_grad.size(2);
  const int input_width = bottom_grad.size(3);

  const int mask_channels = masks.size(1);

  rtop_grad.resize_({batch_size, output_height, output_width, channels});
  rbottom_grad.resize_({batch_size, input_height, input_width, channels});
  rbottom_grad_hs.resize_({batch_size, output_height, output_width, channels});
  rmask_grad.resize_({batch_size, output_height, output_width, mask_channels});

  at::cuda::CUDAGuard device_guard(top_grad.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      top_grad.scalar_type(), "NCHW2NHWC_Top_Grad", ([&] {
        const scalar_t *bottom_data = top_grad.data_ptr<scalar_t>();
        scalar_t *top_data = rtop_grad.data_ptr<scalar_t>();
        const int dh = divideUP(channels, kTileDim);
        const int dw = divideUP(output_height * output_width, kTileDim);
        BatchTranspose2DCUDAKernel<scalar_t>
            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
                batch_size, channels, output_height * output_width, dh, dw,
                bottom_data, top_data);
      }));

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      top_grad.scalar_type(), "CARAFELaucherBackward_Feature", ([&] {
        const int num_kernels =
            batch_size * output_height * output_width * THREADS_PER_PIXEL;
        const scalar_t *top_diff = rtop_grad.data_ptr<scalar_t>();
        const scalar_t *bottom_masks = masks.data_ptr<scalar_t>();
        scalar_t *bottom_diff = rbottom_grad_hs.data_ptr<scalar_t>();

        CARAFEBackward_Feature<scalar_t>
            <<<divideUP(num_kernels, THREADS_PER_BLOCK), THREADS_PER_BLOCK, 0,
               stream>>>(num_kernels, top_diff, bottom_masks, kernel_size,
                         group_size, scale_factor, channels, input_height,
                         input_width, output_height, output_width,
                         mask_channels, bottom_diff);
      }));
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      top_grad.scalar_type(), "FeatureSum", ([&] {
        const int num_kernels =
            batch_size * input_height * input_width * THREADS_PER_PIXEL;
        const scalar_t *bottom_diff_hs = rbottom_grad_hs.data_ptr<scalar_t>();
        scalar_t *bottom_diff = rbottom_grad.data_ptr<scalar_t>();

        FeatureSum<scalar_t>
            <<<divideUP(num_kernels, THREADS_PER_BLOCK), THREADS_PER_BLOCK, 0,
               stream>>>(num_kernels, bottom_diff_hs, scale_factor, channels,
                         input_height, input_width, bottom_diff);
      }));
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      top_grad.scalar_type(), "NHWC2NCHW_Bottom_Grad", ([&] {
        const scalar_t *bottom_data = rbottom_grad.data_ptr<scalar_t>();
        scalar_t *top_data = bottom_grad.data_ptr<scalar_t>();
        const int dh = divideUP(input_height * input_width, kTileDim);
        const int dw = divideUP(channels, kTileDim);
        BatchTranspose2DCUDAKernel<scalar_t>
            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
                batch_size, input_height * input_width, channels, dh, dw,
                bottom_data, top_data);
      }));

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      top_grad.scalar_type(), "CARAFELaucherBackward_Mask", ([&] {
        const int num_kernels = batch_size * output_height * output_width *
                                mask_channels * WARP_SIZE;
        const scalar_t *top_diff = rtop_grad.data_ptr<scalar_t>();
        const scalar_t *bottom_data = rfeatures.data_ptr<scalar_t>();
        scalar_t *mask_diff = rmask_grad.data_ptr<scalar_t>();

        CARAFEBackward_Mask<scalar_t>
            <<<divideUP(num_kernels, THREADS_PER_BLOCK), THREADS_PER_BLOCK, 0,
               stream>>>(num_kernels, top_diff, bottom_data, kernel_size,
                         group_size, scale_factor, channels, input_height,
                         input_width, output_height, output_width,
                         mask_channels, mask_diff);
      }));
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      top_grad.scalar_type(), "NHWC2NCHW_Mask_Grad", ([&] {
        const scalar_t *bottom_data = rmask_grad.data_ptr<scalar_t>();
        scalar_t *top_data = mask_grad.data_ptr<scalar_t>();
        const int dh = divideUP(output_height * output_width, kTileDim);
        const int dw = divideUP(mask_channels, kTileDim);
        BatchTranspose2DCUDAKernel<scalar_t>
            <<<batch_size * dh * dw, dim3(kTileDim, kBlockRows), 0, stream>>>(
                batch_size, output_height * output_width, mask_channels, dh, dw,
                bottom_data, top_data);
      }));

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/carafe_naive_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "carafe_naive_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features,
                                          const Tensor masks, Tensor output,
                                          const int kernel_size,
                                          const int group_size,
                                          const int scale_factor) {
  int output_size = output.numel();
  int channels = output.size(1);
  int height = output.size(2);
  int width = output.size(3);

  at::cuda::CUDAGuard device_guard(features.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      features.scalar_type(), "CARAFENAIVEForward", ([&] {
        carafe_naive_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, features.data_ptr<scalar_t>(),
                masks.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
                kernel_size, group_size, scale_factor, channels, height, width);
      }));

  AT_CUDA_CHECK(cudaGetLastError());
}

void CARAFENAIVEBackwardCUDAKernelLauncher(
    const Tensor top_grad, const Tensor features, const Tensor masks,
    Tensor bottom_grad, Tensor mask_grad, const int kernel_size,
    const int group_size, const int scale_factor) {
  int output_size = top_grad.numel();
  int channels = top_grad.size(1);
  int height = top_grad.size(2);
  int width = top_grad.size(3);

  at::cuda::CUDAGuard device_guard(top_grad.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      top_grad.scalar_type(), "CARAFENAIVEBackward", ([&] {
        carafe_naive_backward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, top_grad.data_ptr<scalar_t>(),
                features.data_ptr<scalar_t>(), masks.data_ptr<scalar_t>(),
                bottom_grad.data_ptr<scalar_t>(),
                mask_grad.data_ptr<scalar_t>(), kernel_size, group_size,
                scale_factor, channels, height, width);
      }));

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/convex_iou.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/SDL-GuoZonghao/BeyondBoundingBox/blob/main/mmdet/ops/iou/src/convex_iou_kernel.cu
#include "convex_iou_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void ConvexIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons,
                                 Tensor ious) {
  int output_size = ious.numel();
  int num_pointsets = pointsets.size(0);
  int num_polygons = polygons.size(0);

  at::cuda::CUDAGuard device_guard(pointsets.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      pointsets.scalar_type(), "convex_iou_cuda_kernel", ([&] {
        convex_iou_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK / 2, 0, stream>>>(
                num_pointsets, num_polygons, pointsets.data_ptr<scalar_t>(),
                polygons.data_ptr<scalar_t>(), ious.data_ptr<scalar_t>());
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}

void ConvexGIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons,
                                  Tensor output) {
  int output_size = output.numel();
  int num_pointsets = pointsets.size(0);
  int num_polygons = polygons.size(0);

  at::cuda::CUDAGuard device_guard(pointsets.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      pointsets.scalar_type(), "convex_giou_cuda_kernel", ([&] {
        convex_giou_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK / 2, 0, stream>>>(
                num_pointsets, num_polygons, pointsets.data_ptr<scalar_t>(),
                polygons.data_ptr<scalar_t>(), output.data_ptr<scalar_t>());
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/correlation_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/ClementPinard/Pytorch-Correlation-extension/blob/master/Correlation_Module/correlation_cuda_kernel.cu
// Original licence: Under MIT License

#include "correlation_cuda.cuh"
#include "pytorch_cuda_helper.hpp"

void CorrelationForwardCUDAKernelLauncher(Tensor input1, Tensor input2,
                                          Tensor output, int kH, int kW,
                                          int patchH, int patchW, int padH,
                                          int padW, int dilationH,
                                          int dilationW, int dilation_patchH,
                                          int dilation_patchW, int dH, int dW) {
  const int batch_size = input1.size(0);
  const int iH = input1.size(2);
  const int iW = input1.size(3);
  const int dilatedKH = (kH - 1) * dilationH + 1;
  const int dilatedKW = (kW - 1) * dilationW + 1;

  const auto oH = (iH + 2 * padH - dilatedKH) / dH + 1;
  const auto oW = (iW + 2 * padW - dilatedKW) / dW + 1;

  auto trInput1 = input1.permute({0, 2, 3, 1}).contiguous();
  auto trInput2 = input2.permute({0, 2, 3, 1}).contiguous();

  const int threads = THREADS_FORWARD;
  const dim3 blocks(batch_size, oH, oW);

  at::cuda::CUDAGuard device_guard(input1.device());

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input1.scalar_type(), "correlation_forward_cuda", ([&] {
        TensorAcc4R trInput1_acc =
            trInput1.packed_accessor32<scalar_t, 4, RestrictPtrTraits>();
        TensorAcc4R trInput2_acc =
            trInput2.packed_accessor32<scalar_t, 4, RestrictPtrTraits>();
        TensorAcc5R output_acc =
            output.packed_accessor32<scalar_t, 5, RestrictPtrTraits>();

        correlation_forward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
                trInput1_acc, trInput2_acc, output_acc, kH, kW, patchH, patchW,
                padH, padW, dilationH, dilationW, dilation_patchH,
                dilation_patchW, dH, dW);
      }));
}

void CorrelationBackwardCUDAKernelLauncher(
    Tensor grad_output, Tensor input1, Tensor input2, Tensor grad_input1,
    Tensor grad_input2, int kH, int kW, int patchH, int patchW, int padH,
    int padW, int dilationH, int dilationW, int dilation_patchH,
    int dilation_patchW, int dH, int dW) {
  const int batch_size = input1.size(0);
  const int iH = input1.size(2);
  const int iW = input1.size(3);
  const int C = input1.size(1);

  const dim3 blocks(C, iH, iW);
  const dim3 threads(THREADS_BACKWARD, THREADS_BACKWARD);

  at::cuda::CUDAGuard device_guard(input1.device());

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input1.scalar_type(), "correlation_backward_cuda", ([&] {
        TensorAcc4R input1_acc =
            input1.packed_accessor32<scalar_t, 4, RestrictPtrTraits>();
        TensorAcc4R input2_acc =
            input2.packed_accessor32<scalar_t, 4, RestrictPtrTraits>();
        TensorAcc4R grad_input1_acc =
            grad_input1.packed_accessor32<scalar_t, 4, RestrictPtrTraits>();
        TensorAcc4R grad_input2_acc =
            grad_input2.packed_accessor32<scalar_t, 4, RestrictPtrTraits>();
        TensorAcc5R grad_output_acc =
            grad_output.packed_accessor32<scalar_t, 5, RestrictPtrTraits>();

        for (int n = 0; n < batch_size; ++n) {
          correlation_backward_cuda_kernel_input1<scalar_t>
              <<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
                  grad_output_acc, input2_acc, grad_input1_acc, kH, kW, patchH,
                  patchW, padH, padW, dilationH, dilationW, dilation_patchH,
                  dilation_patchW, dH, dW, n);
        }

        for (int n = 0; n < batch_size; ++n) {
          correlation_backward_cuda_kernel_input2<scalar_t>
              <<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
                  grad_output_acc, input1_acc, grad_input2_acc, kH, kW, patchH,
                  patchW, padH, padW, dilationH, dilationW, dilation_patchH,
                  dilation_patchW, dH, dW, n);
        }
      }));
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/cudabind.cpp
================================================
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void AssignScoreWithKForwardCUDAKernelLauncher(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& points, const Tensor& centers, const Tensor& scores,
    const Tensor& knn_idx, Tensor& output);

void AssignScoreWithKBackwardCUDAKernelLauncher(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
    Tensor& grad_centers, Tensor& grad_scores);

void assign_score_withk_forward_cuda(int B, int N0, int N1, int M, int K, int O,
                                     int aggregate, const Tensor& points,
                                     const Tensor& centers,
                                     const Tensor& scores,
                                     const Tensor& knn_idx, Tensor& output) {
  AssignScoreWithKForwardCUDAKernelLauncher(
      B, N0, N1, M, K, O, aggregate, points, centers, scores, knn_idx, output);
};

void assign_score_withk_backward_cuda(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
    Tensor& grad_centers, Tensor& grad_scores) {
  AssignScoreWithKBackwardCUDAKernelLauncher(
      B, N0, N1, M, K, O, aggregate, grad_out, points, centers, scores, knn_idx,
      grad_points, grad_centers, grad_scores);
};

void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
                                     int aggregate, const Tensor& points,
                                     const Tensor& centers,
                                     const Tensor& scores,
                                     const Tensor& knn_idx, Tensor& output);

void assign_score_withk_backward_impl(
    int B, int N0, int N1, int M, int K, int O, int aggregate,
    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
    Tensor& grad_centers, Tensor& grad_scores);

REGISTER_DEVICE_IMPL(assign_score_withk_forward_impl, CUDA,
                     assign_score_withk_forward_cuda);
REGISTER_DEVICE_IMPL(assign_score_withk_backward_impl, CUDA,
                     assign_score_withk_backward_cuda);

void BallQueryForwardCUDAKernelLauncher(int b, int n, int m, float min_radius,
                                        float max_radius, int nsample,
                                        const Tensor new_xyz, const Tensor xyz,
                                        Tensor idx);

void ball_query_forward_cuda(int b, int n, int m, float min_radius,
                             float max_radius, int nsample,
                             const Tensor new_xyz, const Tensor xyz,
                             Tensor idx) {
  BallQueryForwardCUDAKernelLauncher(b, n, m, min_radius, max_radius, nsample,
                                     new_xyz, xyz, idx);
};

void ball_query_forward_impl(int b, int n, int m, float min_radius,
                             float max_radius, int nsample,
                             const Tensor new_xyz, const Tensor xyz,
                             Tensor idx);
REGISTER_DEVICE_IMPL(ball_query_forward_impl, CUDA, ball_query_forward_cuda);

void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2,
                                    Tensor ious, const int mode,
                                    const bool aligned, const int offset);

void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
                        const int mode, const bool aligned, const int offset) {
  BBoxOverlapsCUDAKernelLauncher(bboxes1, bboxes2, ious, mode, aligned, offset);
}

void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
                        const int mode, const bool aligned, const int offset);
REGISTER_DEVICE_IMPL(bbox_overlaps_impl, CUDA, bbox_overlaps_cuda);

void BorderAlignForwardCUDAKernelLauncher(const Tensor& input,
                                          const Tensor& boxes, Tensor output,
                                          Tensor argmax_idx,
                                          const int pool_size);

void BorderAlignBackwardCUDAKernelLauncher(const Tensor& grad_output,
                                           const Tensor& boxes,
                                           const Tensor& argmax_idx,
                                           Tensor grad_input,
                                           const int pool_size);

void border_align_forward_cuda(const Tensor& input, const Tensor& boxes,
                               Tensor output, Tensor argmax_idx,
                               const int pool_size) {
  BorderAlignForwardCUDAKernelLauncher(input, boxes, output, argmax_idx,
                                       pool_size);
}

void border_align_backward_cuda(const Tensor& grad_output, const Tensor& boxes,
                                const Tensor& argmax_idx, Tensor grad_input,
                                const int pool_size) {
  BorderAlignBackwardCUDAKernelLauncher(grad_output, boxes, argmax_idx,
                                        grad_input, pool_size);
}

void border_align_forward_impl(const Tensor& input, const Tensor& boxes,
                               Tensor output, Tensor argmax_idx,
                               const int pool_size);

void border_align_backward_impl(const Tensor& grad_output, const Tensor& boxes,
                                const Tensor& argmax_idx, Tensor grad_input,
                                const int pool_size);

REGISTER_DEVICE_IMPL(border_align_forward_impl, CUDA,
                     border_align_forward_cuda);
REGISTER_DEVICE_IMPL(border_align_backward_impl, CUDA,
                     border_align_backward_cuda);

void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                          const int mode_flag, const bool aligned);

void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                          const int mode_flag, const bool aligned);
REGISTER_DEVICE_IMPL(box_iou_rotated_impl, CUDA, box_iou_rotated_cuda);

void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks,
                                     Tensor rfeatures, Tensor routput,
                                     Tensor rmasks, Tensor output,
                                     const int kernel_size,
                                     const int group_size,
                                     const int scale_factor);

void CARAFEBackwardCUDAKernelLauncher(
    const Tensor top_grad, const Tensor rfeatures, const Tensor masks,
    Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad,
    Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad,
    const int kernel_size, const int group_size, const int scale_factor);

void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
                         Tensor routput, Tensor rmasks, Tensor output,
                         int kernel_size, int group_size, int scale_factor) {
  CARAFEForwardCUDAKernelLauncher(features, masks, rfeatures, routput, rmasks,
                                  output, kernel_size, group_size,
                                  scale_factor);
}

void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
                          Tensor rtop_grad, Tensor rbottom_grad_hs,
                          Tensor rbottom_grad, Tensor rmask_grad,
                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
                          int group_size, int scale_factor) {
  CARAFEBackwardCUDAKernelLauncher(top_grad, rfeatures, masks, rtop_grad,
                                   rbottom_grad_hs, rbottom_grad, rmask_grad,
                                   bottom_grad, mask_grad, kernel_size,
                                   group_size, scale_factor);
}

void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures,
                         Tensor routput, Tensor rmasks, Tensor output,
                         int kernel_size, int group_size, int scale_factor);

void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks,
                          Tensor rtop_grad, Tensor rbottom_grad_hs,
                          Tensor rbottom_grad, Tensor rmask_grad,
                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
                          int group_size, int scale_factor);

REGISTER_DEVICE_IMPL(carafe_forward_impl, CUDA, carafe_forward_cuda);
REGISTER_DEVICE_IMPL(carafe_backward_impl, CUDA, carafe_backward_cuda);

void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features,
                                          const Tensor masks, Tensor output,
                                          const int kernel_size,
                                          const int group_size,
                                          const int scale_factor);

void CARAFENAIVEBackwardCUDAKernelLauncher(
    const Tensor top_grad, const Tensor features, const Tensor masks,
    Tensor bottom_grad, Tensor mask_grad, const int kernel_size,
    const int group_size, const int scale_factor);

void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
                               int kernel_size, int group_size,
                               int scale_factor) {
  CARAFENAIVEForwardCUDAKernelLauncher(features, masks, output, kernel_size,
                                       group_size, scale_factor);
}

void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks,
                                Tensor bottom_grad, Tensor mask_grad,
                                int kernel_size, int group_size,
                                int scale_factor) {
  CARAFENAIVEBackwardCUDAKernelLauncher(top_grad, features, masks, bottom_grad,
                                        mask_grad, kernel_size, group_size,
                                        scale_factor);
}
void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output,
                               int kernel_size, int group_size,
                               int scale_factor);

void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks,
                                Tensor bottom_grad, Tensor mask_grad,
                                int kernel_size, int group_size,
                                int scale_factor);

REGISTER_DEVICE_IMPL(carafe_naive_forward_impl, CUDA,
                     carafe_naive_forward_cuda);
REGISTER_DEVICE_IMPL(carafe_naive_backward_impl, CUDA,
                     carafe_naive_backward_cuda);

void CorrelationForwardCUDAKernelLauncher(Tensor input1, Tensor input2,
                                          Tensor output, int kH, int kW,
                                          int patchH, int patchW, int padH,
                                          int padW, int dilationH,
                                          int dilationW, int dilation_patchH,
                                          int dilation_patchW, int dH, int dW);

void CorrelationBackwardCUDAKernelLauncher(Tensor grad_output, Tensor input1,
                                           Tensor input2, Tensor grad_input1,
                                           Tensor grad_input2, int kH, int kW,
                                           int patchH, int patchW, int padH,
                                           int padW, int dilationH,
                                           int dilationW, int dilation_patchH,
                                           int dilation_patchW, int dH, int dW);

void correlation_forward_cuda(Tensor input1, Tensor input2, Tensor output,
                              int kH, int kW, int patchH, int patchW, int padH,
                              int padW, int dilationH, int dilationW,
                              int dilation_patchH, int dilation_patchW, int dH,
                              int dW) {
  CorrelationForwardCUDAKernelLauncher(
      input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH,
      dilationW, dilation_patchH, dilation_patchW, dH, dW);
}

void correlation_backward_cuda(Tensor grad_output, Tensor input1, Tensor input2,
                               Tensor grad_input1, Tensor grad_input2, int kH,
                               int kW, int patchH, int patchW, int padH,
                               int padW, int dilationH, int dilationW,
                               int dilation_patchH, int dilation_patchW, int dH,
                               int dW) {
  CorrelationBackwardCUDAKernelLauncher(
      grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH,
      patchW, padH, padW, dilationH, dilationW, dilation_patchH,
      dilation_patchW, dH, dW);
}

void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output,
                              int kH, int kW, int patchH, int patchW, int padH,
                              int padW, int dilationH, int dilationW,
                              int dilation_patchH, int dilation_patchW, int dH,
                              int dW);

void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2,
                               Tensor grad_input1, Tensor grad_input2, int kH,
                               int kW, int patchH, int patchW, int padH,
                               int padW, int dilationH, int dilationW,
                               int dilation_patchH, int dilation_patchW, int dH,
                               int dW);

REGISTER_DEVICE_IMPL(correlation_forward_impl, CUDA, correlation_forward_cuda);
REGISTER_DEVICE_IMPL(correlation_backward_impl, CUDA,
                     correlation_backward_cuda);

void deformable_im2col_cuda(Tensor data_im, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor data_col);

void deformable_col2im_cuda(Tensor data_col, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor grad_im);

void deformable_col2im_coord_cuda(
    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
    const int height, const int width, const int ksize_h, const int ksize_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int parallel_imgs,
    const int deformable_group, Tensor grad_offset);

void deformable_im2col_impl(Tensor data_im, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor data_col);

void deformable_col2im_impl(Tensor data_col, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor grad_im);

void deformable_col2im_coord_impl(
    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
    const int height, const int width, const int ksize_h, const int ksize_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int parallel_imgs,
    const int deformable_group, Tensor grad_offset);

REGISTER_DEVICE_IMPL(deformable_im2col_impl, CUDA, deformable_im2col_cuda);
REGISTER_DEVICE_IMPL(deformable_col2im_impl, CUDA, deformable_col2im_cuda);
REGISTER_DEVICE_IMPL(deformable_col2im_coord_impl, CUDA,
                     deformable_col2im_coord_cuda);

void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois,
                                            Tensor offset, Tensor output,
                                            int pooled_height, int pooled_width,
                                            float spatial_scale,
                                            int sampling_ratio, float gamma);

void DeformRoIPoolBackwardCUDAKernelLauncher(
    Tensor grad_output, Tensor input, Tensor rois, Tensor offset,
    Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width,
    float spatial_scale, int sampling_ratio, float gamma);

void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset,
                                  Tensor output, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int sampling_ratio, float gamma) {
  DeformRoIPoolForwardCUDAKernelLauncher(input, rois, offset, output,
                                         pooled_height, pooled_width,
                                         spatial_scale, sampling_ratio, gamma);
}

void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input,
                                   Tensor rois, Tensor offset,
                                   Tensor grad_input, Tensor grad_offset,
                                   int pooled_height, int pooled_width,
                                   float spatial_scale, int sampling_ratio,
                                   float gamma) {
  DeformRoIPoolBackwardCUDAKernelLauncher(
      grad_output, input, rois, offset, grad_input, grad_offset, pooled_height,
      pooled_width, spatial_scale, sampling_ratio, gamma);
}

void deform_roi_pool_forward_impl(Tensor input, Tensor rois, Tensor offset,
                                  Tensor output, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int sampling_ratio, float gamma);

void deform_roi_pool_backward_impl(Tensor grad_output, Tensor input,
                                   Tensor rois, Tensor offset,
                                   Tensor grad_input, Tensor grad_offset,
                                   int pooled_height, int pooled_width,
                                   float spatial_scale, int sampling_ratio,
                                   float gamma);

REGISTER_DEVICE_IMPL(deform_roi_pool_forward_impl, CUDA,
                     deform_roi_pool_forward_cuda);
REGISTER_DEVICE_IMPL(deform_roi_pool_backward_impl, CUDA,
                     deform_roi_pool_backward_cuda);

void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target,
                                               Tensor weight, Tensor output,
                                               const float gamma,
                                               const float alpha);

void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target,
                                                Tensor weight,
                                                Tensor grad_input,
                                                const float gamma,
                                                const float alpha);

void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor softmax, Tensor target,
                                               Tensor weight, Tensor output,
                                               const float gamma,
                                               const float alpha);

void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor softmax, Tensor target,
                                                Tensor weight, Tensor buff,
                                                Tensor grad_input,
                                                const float gamma,
                                                const float alpha);

void sigmoid_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha) {
  SigmoidFocalLossForwardCUDAKernelLauncher(input, target, weight, output,
                                            gamma, alpha);
}

void sigmoid_focal_loss_backward_cuda(Tensor input, Tensor target,
                                      Tensor weight, Tensor grad_input,
                                      float gamma, float alpha) {
  SigmoidFocalLossBackwardCUDAKernelLauncher(input, target, weight, grad_input,
                                             gamma, alpha);
}

void softmax_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha) {
  SoftmaxFocalLossForwardCUDAKernelLauncher(input, target, weight, output,
                                            gamma, alpha);
}

void softmax_focal_loss_backward_cuda(Tensor input, Tensor target,
                                      Tensor weight, Tensor buff,
                                      Tensor grad_input, float gamma,
                                      float alpha) {
  SoftmaxFocalLossBackwardCUDAKernelLauncher(input, target, weight, buff,
                                             grad_input, gamma, alpha);
}

void sigmoid_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha);

void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target,
                                      Tensor weight, Tensor grad_input,
                                      float gamma, float alpha);

void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha);

void softmax_focal_loss_backward_impl(Tensor input, Tensor target,
                                      Tensor weight, Tensor buff,
                                      Tensor grad_input, float gamma,
                                      float alpha);

REGISTER_DEVICE_IMPL(sigmoid_focal_loss_forward_impl, CUDA,
                     sigmoid_focal_loss_forward_cuda);
REGISTER_DEVICE_IMPL(sigmoid_focal_loss_backward_impl, CUDA,
                     sigmoid_focal_loss_backward_cuda);
REGISTER_DEVICE_IMPL(softmax_focal_loss_forward_impl, CUDA,
                     softmax_focal_loss_forward_cuda);
REGISTER_DEVICE_IMPL(softmax_focal_loss_backward_impl, CUDA,
                     softmax_focal_loss_backward_cuda);

void FurthestPointSamplingForwardCUDAKernelLauncher(int b, int n, int m,
                                                    const float* dataset,
                                                    float* temp, int* idxs);

void FurthestPointSamplingWithDistForwardCUDAKernelLauncher(
    int b, int n, int m, const float* dataset, float* temp, int* idxs);

void furthest_point_sampling_forward_cuda(Tensor points_tensor,
                                          Tensor temp_tensor, Tensor idx_tensor,
                                          int b, int n, int m) {
  const float* dataset = points_tensor.data_ptr<float>();
  float* temp = temp_tensor.data_ptr<float>();
  int* idxs = idx_tensor.data_ptr<int>();
  FurthestPointSamplingForwardCUDAKernelLauncher(b, n, m, dataset, temp, idxs);
}

void furthest_point_sampling_with_dist_forward_cuda(Tensor points_tensor,
                                                    Tensor temp_tensor,
                                                    Tensor idx_tensor, int b,
                                                    int n, int m) {
  const float* dataset = points_tensor.data_ptr<float>();
  float* temp = temp_tensor.data_ptr<float>();
  int* idxs = idx_tensor.data_ptr<int>();
  FurthestPointSamplingWithDistForwardCUDAKernelLauncher(b, n, m, dataset, temp,
                                                         idxs);
}

void furthest_point_sampling_forward_impl(Tensor points_tensor,
                                          Tensor temp_tensor, Tensor idx_tensor,
                                          int b, int n, int m);

void furthest_point_sampling_with_dist_forward_impl(Tensor points_tensor,
                                                    Tensor temp_tensor,
                                                    Tensor idx_tensor, int b,
                                                    int n, int m);

REGISTER_DEVICE_IMPL(furthest_point_sampling_forward_impl, CUDA,
                     furthest_point_sampling_forward_cuda);
REGISTER_DEVICE_IMPL(furthest_point_sampling_with_dist_forward_impl, CUDA,
                     furthest_point_sampling_with_dist_forward_cuda);

torch::Tensor fused_bias_leakyrelu_op(const torch::Tensor& input,
                                      const torch::Tensor& bias,
                                      const torch::Tensor& refer, int act,
                                      int grad, float alpha, float scale);

torch::Tensor fused_bias_leakyrelu_op_impl(const torch::Tensor& input,
                                           const torch::Tensor& bias,
                                           const torch::Tensor& refer, int act,
                                           int grad, float alpha, float scale);
REGISTER_DEVICE_IMPL(fused_bias_leakyrelu_op_impl, CUDA,
                     fused_bias_leakyrelu_op);

void GatherPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                           const Tensor points,
                                           const Tensor idx, Tensor out);

void GatherPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                            const Tensor grad_out,
                                            const Tensor idx,
                                            Tensor grad_points);

void gather_points_forward_cuda(int b, int c, int n, int npoints,
                                const Tensor points, const Tensor idx,
                                Tensor out) {
  GatherPointsForwardCUDAKernelLauncher(b, c, n, npoints, points, idx, out);
};

void gather_points_backward_cuda(int b, int c, int n, int npoints,
                                 const Tensor grad_out, const Tensor idx,
                                 Tensor grad_points) {
  GatherPointsBackwardCUDAKernelLauncher(b, c, n, npoints, grad_out, idx,
                                         grad_points);
};

void gather_points_forward_impl(int b, int c, int n, int npoints,
                                const Tensor points, const Tensor idx,
                                Tensor out);

void gather_points_backward_impl(int b, int c, int n, int npoints,
                                 const Tensor grad_out, const Tensor idx,
                                 Tensor grad_points);

REGISTER_DEVICE_IMPL(gather_points_forward_impl, CUDA,
                     gather_points_forward_cuda);
REGISTER_DEVICE_IMPL(gather_points_backward_impl, CUDA,
                     gather_points_backward_cuda);

void GroupPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                          int nsample, const Tensor points,
                                          const Tensor idx, Tensor out);

void GroupPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                           int nsample, const Tensor grad_out,
                                           const Tensor idx,
                                           Tensor grad_points);

void group_points_forward_cuda(int b, int c, int n, int npoints, int nsample,
                               const Tensor points, const Tensor idx,
                               Tensor out) {
  GroupPointsForwardCUDAKernelLauncher(b, c, n, npoints, nsample, points, idx,
                                       out);
};

void group_points_backward_cuda(int b, int c, int n, int npoints, int nsample,
                                const Tensor grad_out, const Tensor idx,
                                Tensor grad_points) {
  GroupPointsBackwardCUDAKernelLauncher(b, c, n, npoints, nsample, grad_out,
                                        idx, grad_points);
};

void group_points_forward_impl(int b, int c, int n, int npoints, int nsample,
                               const Tensor points, const Tensor idx,
                               Tensor out);

void group_points_backward_impl(int b, int c, int n, int npoints, int nsample,
                                const Tensor grad_out, const Tensor idx,
                                Tensor grad_points);

REGISTER_DEVICE_IMPL(group_points_forward_impl, CUDA,
                     group_points_forward_cuda);
REGISTER_DEVICE_IMPL(group_points_backward_impl, CUDA,
                     group_points_backward_cuda);

void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a,
                                                   const Tensor boxes_a,
                                                   const int num_b,
                                                   const Tensor boxes_b,
                                                   Tensor ans_overlap);

void IoU3DBoxesIoUBevForwardCUDAKernelLauncher(const int num_a,
                                               const Tensor boxes_a,
                                               const int num_b,
                                               const Tensor boxes_b,
                                               Tensor ans_iou);

void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes,
                                       unsigned long long* mask, int boxes_num,
                                       float nms_overlap_thresh);

void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes,
                                             unsigned long long* mask,
                                             int boxes_num,
                                             float nms_overlap_thresh);

void iou3d_boxes_overlap_bev_forward_cuda(const int num_a, const Tensor boxes_a,
                                          const int num_b, const Tensor boxes_b,
                                          Tensor ans_overlap) {
  IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b,
                                                ans_overlap);
};

void iou3d_boxes_iou_bev_forward_cuda(const int num_a, const Tensor boxes_a,
                                      const int num_b, const Tensor boxes_b,
                                      Tensor ans_iou) {
  IoU3DBoxesIoUBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b,
                                            ans_iou);
};

void iou3d_nms_forward_cuda(const Tensor boxes, unsigned long long* mask,
                            int boxes_num, float nms_overlap_thresh) {
  IoU3DNMSForwardCUDAKernelLauncher(boxes, mask, boxes_num, nms_overlap_thresh);
};

void iou3d_nms_normal_forward_cuda(const Tensor boxes, unsigned long long* mask,
                                   int boxes_num, float nms_overlap_thresh) {
  IoU3DNMSNormalForwardCUDAKernelLauncher(boxes, mask, boxes_num,
                                          nms_overlap_thresh);
};

void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a,
                                          const int num_b, const Tensor boxes_b,
                                          Tensor ans_overlap);

void iou3d_boxes_iou_bev_forward_impl(const int num_a, const Tensor boxes_a,
                                      const int num_b, const Tensor boxes_b,
                                      Tensor ans_iou);

void iou3d_nms_forward_impl(const Tensor boxes, unsigned long long* mask,
                            int boxes_num, float nms_overlap_thresh);

void iou3d_nms_normal_forward_impl(const Tensor boxes, unsigned long long* mask,
                                   int boxes_num, float nms_overlap_thresh);

REGISTER_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, CUDA,
                     iou3d_boxes_overlap_bev_forward_cuda);
REGISTER_DEVICE_IMPL(iou3d_boxes_iou_bev_forward_impl, CUDA,
                     iou3d_boxes_iou_bev_forward_cuda);
REGISTER_DEVICE_IMPL(iou3d_nms_forward_impl, CUDA, iou3d_nms_forward_cuda);
REGISTER_DEVICE_IMPL(iou3d_nms_normal_forward_impl, CUDA,
                     iou3d_nms_normal_forward_cuda);

void KNNForwardCUDAKernelLauncher(int b, int n, int m, int nsample,
                                  const Tensor xyz, const Tensor new_xyz,
                                  Tensor idx, Tensor dist2);

void knn_forward_cuda(int b, int n, int m, int nsample, const Tensor xyz,
                      const Tensor new_xyz, Tensor idx, Tensor dist2) {
  KNNForwardCUDAKernelLauncher(b, n, m, nsample, xyz, new_xyz, idx, dist2);
}

void knn_forward_impl(int b, int n, int m, int nsample, const Tensor xyz,
                      const Tensor new_xyz, Tensor idx, Tensor dist2);
REGISTER_DEVICE_IMPL(knn_forward_impl, CUDA, knn_forward_cuda);

void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data,
                                           const Tensor mask_h_idx,
                                           const Tensor mask_w_idx,
                                           Tensor top_data, const int kernel_h,
                                           const int kernel_w, const int pad_h,
                                           const int pad_w);

void MaskedCol2imForwardCUDAKernelLauncher(const Tensor bottom_data,
                                           const Tensor mask_h_idx,
                                           const Tensor mask_w_idx,
                                           Tensor top_data, const int height,
                                           const int width, const int channels);

void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor col,
                                const int kernel_h, const int kernel_w,
                                const int pad_h, const int pad_w) {
  // im: (n, ic, h, w), kernel size (kh, kw)
  // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
  MaskedIm2colForwardCUDAKernelLauncher(im, mask_h_idx, mask_w_idx, col,
                                        kernel_h, kernel_w, pad_h, pad_w);
}

void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor im, int height,
                                int width, int channels) {
  // im: (n, ic, h, w), kernel size (kh, kw)
  // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
  MaskedCol2imForwardCUDAKernelLauncher(col, mask_h_idx, mask_w_idx, im, height,
                                        width, channels);
}

void masked_im2col_forward_impl(const Tensor im, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor col,
                                const int kernel_h, const int kernel_w,
                                const int pad_h, const int pad_w);

void masked_col2im_forward_impl(const Tensor col, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor im, int height,
                                int width, int channels);

REGISTER_DEVICE_IMPL(masked_im2col_forward_impl, CUDA,
                     masked_im2col_forward_cuda);
REGISTER_DEVICE_IMPL(masked_col2im_forward_impl, CUDA,
                     masked_col2im_forward_cuda);

void modulated_deformable_im2col_cuda(
    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor data_col);

void modulated_deformable_col2im_cuda(
    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor grad_im);

void modulated_deformable_col2im_coord_cuda(
    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
    const Tensor data_mask, const int batch_size, const int channels,
    const int height_im, const int width_im, const int height_col,
    const int width_col, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int deformable_group,
    Tensor grad_offset, Tensor grad_mask);

void modulated_deformable_im2col_impl(
    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor data_col);

void modulated_deformable_col2im_impl(
    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor grad_im);

void modulated_deformable_col2im_coord_impl(
    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
    const Tensor data_mask, const int batch_size, const int channels,
    const int height_im, const int width_im, const int height_col,
    const int width_col, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int deformable_group,
    Tensor grad_offset, Tensor grad_mask);

REGISTER_DEVICE_IMPL(modulated_deformable_im2col_impl, CUDA,
                     modulated_deformable_im2col_cuda);
REGISTER_DEVICE_IMPL(modulated_deformable_col2im_impl, CUDA,
                     modulated_deformable_col2im_cuda);
REGISTER_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, CUDA,
                     modulated_deformable_col2im_coord_cuda);

Tensor ms_deform_attn_cuda_forward(const Tensor& value,
                                   const Tensor& spatial_shapes,
                                   const Tensor& level_start_index,
                                   const Tensor& sampling_loc,
                                   const Tensor& attn_weight,
                                   const int im2col_step);

void ms_deform_attn_cuda_backward(
    const Tensor& value, const Tensor& spatial_shapes,
    const Tensor& level_start_index, const Tensor& sampling_loc,
    const Tensor& attn_weight, const Tensor& grad_output, Tensor& grad_value,
    Tensor& grad_sampling_loc, Tensor& grad_attn_weight, const int im2col_step);

Tensor ms_deform_attn_impl_forward(const Tensor& value,
                                   const Tensor& spatial_shapes,
                                   const Tensor& level_start_index,
                                   const Tensor& sampling_loc,
                                   const Tensor& attn_weight,
                                   const int im2col_step);

void ms_deform_attn_impl_backward(
    const Tensor& value, const Tensor& spatial_shapes,
    const Tensor& level_start_index, const Tensor& sampling_loc,
    const Tensor& attn_weight, const Tensor& grad_output, Tensor& grad_value,
    Tensor& grad_sampling_loc, Tensor& grad_attn_weight, const int im2col_step);

REGISTER_DEVICE_IMPL(ms_deform_attn_impl_forward, CUDA,
                     ms_deform_attn_cuda_forward);
REGISTER_DEVICE_IMPL(ms_deform_attn_impl_backward, CUDA,
                     ms_deform_attn_cuda_backward);

Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold,
                             int offset);

Tensor nms_cuda(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
  return NMSCUDAKernelLauncher(boxes, scores, iou_threshold, offset);
}

Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset);
REGISTER_DEVICE_IMPL(nms_impl, CUDA, nms_cuda);

void PointsInBoxesPartForwardCUDAKernelLauncher(int batch_size, int boxes_num,
                                                int pts_num, const Tensor boxes,
                                                const Tensor pts,
                                                Tensor box_idx_of_points);

void PointsInBoxesAllForwardCUDAKernelLauncher(int batch_size, int boxes_num,
                                               int pts_num, const Tensor boxes,
                                               const Tensor pts,
                                               Tensor box_idx_of_points);

void points_in_boxes_part_forward_cuda(int batch_size, int boxes_num,
                                       int pts_num, const Tensor boxes,
                                       const Tensor pts,
                                       Tensor box_idx_of_points) {
  PointsInBoxesPartForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num,
                                             boxes, pts, box_idx_of_points);
};

void points_in_boxes_all_forward_cuda(int batch_size, int boxes_num,
                                      int pts_num, const Tensor boxes,
                                      const Tensor pts,
                                      Tensor box_idx_of_points) {
  PointsInBoxesAllForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num,
                                            boxes, pts, box_idx_of_points);
};

void points_in_boxes_part_forward_impl(int batch_size, int boxes_num,
                                       int pts_num, const Tensor boxes,
                                       const Tensor pts,
                                       Tensor box_idx_of_points);

void points_in_boxes_all_forward_impl(int batch_size, int boxes_num,
                                      int pts_num, const Tensor boxes,
                                      const Tensor pts,
                                      Tensor box_idx_of_points);
REGISTER_DEVICE_IMPL(points_in_boxes_part_forward_impl, CUDA,
                     points_in_boxes_part_forward_cuda);
REGISTER_DEVICE_IMPL(points_in_boxes_all_forward_impl, CUDA,
                     points_in_boxes_all_forward_cuda);

void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input,
                                      Tensor output, const int num_,
                                      const int h_feature, const int w_feature,
                                      const int h_mask, const int w_mask,
                                      const int half_h_mask,
                                      const int half_w_mask);

void PSAMaskBackwardCUDAKernelLauncher(
    const int psa_type, const Tensor grad_output, Tensor grad_input,
    const int num_, const int h_feature, const int w_feature, const int h_mask,
    const int w_mask, const int half_h_mask, const int half_w_mask);

void psamask_forward_cuda(const int psa_type, const Tensor input, Tensor output,
                          const int num_, const int h_feature,
                          const int w_feature, const int h_mask,
                          const int w_mask, const int half_h_mask,
                          const int half_w_mask) {
  PSAMaskForwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature,
                                   w_feature, h_mask, w_mask, half_h_mask,
                                   half_w_mask);
}

void psamask_backward_cuda(const int psa_type, const Tensor grad_output,
                           Tensor grad_input, const int num_,
                           const int h_feature, const int w_feature,
                           const int h_mask, const int w_mask,
                           const int half_h_mask, const int half_w_mask) {
  PSAMaskBackwardCUDAKernelLauncher(psa_type, grad_output, grad_input, num_,
                                    h_feature, w_feature, h_mask, w_mask,
                                    half_h_mask, half_w_mask);
}

void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output,
                          const int num_, const int h_feature,
                          const int w_feature, const int h_mask,
                          const int w_mask, const int half_h_mask,
                          const int half_w_mask);

void psamask_backward_impl(const int psa_type, const Tensor grad_output,
                           Tensor grad_input, const int num_,
                           const int h_feature, const int w_feature,
                           const int h_mask, const int w_mask,
                           const int half_h_mask, const int half_w_mask);
REGISTER_DEVICE_IMPL(psamask_forward_impl, CUDA, psamask_forward_cuda);
REGISTER_DEVICE_IMPL(psamask_backward_impl, CUDA, psamask_backward_cuda);

void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
                                       Tensor argmax_y, Tensor argmax_x,
                                       int aligned_height, int aligned_width,
                                       float spatial_scale, int sampling_ratio,
                                       int pool_mode, bool aligned);

void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
                                        Tensor argmax_y, Tensor argmax_x,
                                        Tensor grad_input, int aligned_height,
                                        int aligned_width, float spatial_scale,
                                        int sampling_ratio, int pool_mode,
                                        bool aligned);

void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned) {
  ROIAlignForwardCUDAKernelLauncher(
      input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width,
      spatial_scale, sampling_ratio, pool_mode, aligned);
}

void roi_align_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax_y,
                             Tensor argmax_x, Tensor grad_input,
                             int aligned_height, int aligned_width,
                             float spatial_scale, int sampling_ratio,
                             int pool_mode, bool aligned) {
  ROIAlignBackwardCUDAKernelLauncher(
      grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height,
      aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned);
}

void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned);

void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
                             Tensor argmax_x, Tensor grad_input,
                             int aligned_height, int aligned_width,
                             float spatial_scale, int sampling_ratio,
                             int pool_mode, bool aligned);

REGISTER_DEVICE_IMPL(roi_align_forward_impl, CUDA, roi_align_forward_cuda);
REGISTER_DEVICE_IMPL(roi_align_backward_impl, CUDA, roi_align_backward_cuda);

void ROIAlignRotatedForwardCUDAKernelLauncher(
    const at::Tensor features, const at::Tensor rois, const float spatial_scale,
    const int sample_num, const bool aligned, const bool clockwise,
    const int channels, const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, at::Tensor output);

void ROIAlignRotatedBackwardCUDAKernelLauncher(
    const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale,
    const int sample_num, const bool aligned, const bool clockwise,
    const int channels, const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, at::Tensor bottom_grad);

void roi_align_rotated_forward_cuda(Tensor features, Tensor rois, Tensor output,
                                    int aligned_height, int aligned_width,
                                    float spatial_scale, int sample_ratio,
                                    bool aligned, bool clockwise) {
  // Number of ROIs
  int num_rois = rois.size(0);
  int size_rois = rois.size(1);

  if (size_rois != 6) {
    AT_ERROR("wrong roi size");
  }

  int num_channels = features.size(1);
  int data_height = features.size(2);
  int data_width = features.size(3);
  ROIAlignRotatedForwardCUDAKernelLauncher(
      features, rois, spatial_scale, sample_ratio, aligned, clockwise,
      num_channels, data_height, data_width, num_rois, aligned_height,
      aligned_width, output);
}

void roi_align_rotated_backward_cuda(Tensor top_grad, Tensor rois,
                                     Tensor bottom_grad, int aligned_height,
                                     int aligned_width, float spatial_scale,
                                     int sample_ratio, bool aligned,
                                     bool clockwise) {
  // Number of ROIs
  int num_rois = rois.size(0);
  int size_rois = rois.size(1);
  if (size_rois != 6) {
    AT_ERROR("wrong roi size");
  }

  int num_channels = bottom_grad.size(1);
  int data_height = bottom_grad.size(2);
  int data_width = bottom_grad.size(3);
  ROIAlignRotatedBackwardCUDAKernelLauncher(
      top_grad, rois, spatial_scale, sample_ratio, aligned, clockwise,
      num_channels, data_height, data_width, num_rois, aligned_height,
      aligned_width, bottom_grad);
}

void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output,
                                    int aligned_height, int aligned_width,
                                    float spatial_scale, int sample_ratio,
                                    bool aligned, bool clockwise);

void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
                                     Tensor bottom_grad, int aligned_height,
                                     int aligned_width, float spatial_scale,
                                     int sample_ratio, bool aligned,
                                     bool clockwise);
REGISTER_DEVICE_IMPL(roi_align_rotated_forward_impl, CUDA,
                     roi_align_rotated_forward_cuda);
REGISTER_DEVICE_IMPL(roi_align_rotated_backward_impl, CUDA,
                     roi_align_rotated_backward_cuda);

void RiROIAlignRotatedForwardCUDAKernelLauncher(
    const at::Tensor features, const at::Tensor rois, const float spatial_scale,
    const int num_samples, const bool clockwise, const int channels,
    const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, const int num_orientations,
    at::Tensor output);

void RiROIAlignRotatedBackwardCUDAKernelLauncher(
    const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale,
    const int num_samples, const bool clockwise, const int channels,
    const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, const int num_orientations,
    at::Tensor bottom_grad);

void riroi_align_rotated_forward_cuda(Tensor features, Tensor rois,
                                      Tensor output, int pooled_height,
                                      int pooled_width, float spatial_scale,
                                      int num_samples, int num_orientations,
                                      bool clockwise) {
  // Number of ROIs
  int num_rois = rois.size(0);
  int size_rois = rois.size(1);
  if (size_rois != 6) {
    AT_ERROR("wrong roi size");
  }
  CHECK_CONTIGUOUS(features);
  CHECK_CONTIGUOUS(rois);
  int num_channels = features.size(1) / num_orientations;
  int data_height = features.size(2);
  int data_width = features.size(3);
  RiROIAlignRotatedForwardCUDAKernelLauncher(
      features, rois, spatial_scale, num_samples, clockwise, num_channels,
      data_height, data_width, num_rois, pooled_height, pooled_width,
      num_orientations, output);
}

void riroi_align_rotated_backward_cuda(Tensor top_grad, Tensor rois,
                                       Tensor bottom_grad, int pooled_height,
                                       int pooled_width, float spatial_scale,
                                       int num_samples, int num_orientations,
                                       bool clockwise) {
  // Number of ROIs
  int num_rois = rois.size(0);
  int size_rois = rois.size(1);
  if (size_rois != 6) {
    AT_ERROR("wrong roi size");
  }
  CHECK_CONTIGUOUS(top_grad);
  CHECK_CONTIGUOUS(rois);
  int num_channels = bottom_grad.size(1) / num_orientations;
  int data_height = bottom_grad.size(2);
  int data_width = bottom_grad.size(3);
  RiROIAlignRotatedBackwardCUDAKernelLauncher(
      top_grad, rois, spatial_scale, num_samples, clockwise, num_channels,
      data_height, data_width, num_rois, pooled_height, pooled_width,
      num_orientations, bottom_grad);
}

void riroi_align_rotated_forward_impl(Tensor features, Tensor rois,
                                      Tensor output, int pooled_height,
                                      int pooled_width, float spatial_scale,
                                      int num_samples, int num_orientations,
                                      bool clockwise);

void riroi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
                                       Tensor bottom_grad, int pooled_height,
                                       int pooled_width, float spatial_scale,
                                       int num_samples, int num_orientations,
                                       bool clockwise);

REGISTER_DEVICE_IMPL(riroi_align_rotated_forward_impl, CUDA,
                     riroi_align_rotated_forward_cuda);
REGISTER_DEVICE_IMPL(riroi_align_rotated_backward_impl, CUDA,
                     riroi_align_rotated_backward_cuda);

void RoiawarePool3dForwardCUDAKernelLauncher(
    int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
    int out_y, int out_z, const Tensor rois, const Tensor pts,
    const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels,
    Tensor pooled_features, int pool_method);

void RoiawarePool3dBackwardCUDAKernelLauncher(
    int boxes_num, int out_x, int out_y, int out_z, int channels,
    int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax,
    const Tensor grad_out, Tensor grad_in, int pool_method);

void roiaware_pool3d_forward_cuda(int boxes_num, int pts_num, int channels,
                                  int max_pts_each_voxel, int out_x, int out_y,
                                  int out_z, const Tensor rois,
                                  const Tensor pts, const Tensor pts_feature,
                                  Tensor argmax, Tensor pts_idx_of_voxels,
                                  Tensor pooled_features, int pool_method) {
  RoiawarePool3dForwardCUDAKernelLauncher(
      boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
      rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features,
      pool_method);
};

void roiaware_pool3d_backward_cuda(int boxes_num, int out_x, int out_y,
                                   int out_z, int channels,
                                   int max_pts_each_voxel,
                                   const Tensor pts_idx_of_voxels,
                                   const Tensor argmax, const Tensor grad_out,
                                   Tensor grad_in, int pool_method) {
  RoiawarePool3dBackwardCUDAKernelLauncher(
      boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel,
      pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method);
};

void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels,
                                  int max_pts_each_voxel, int out_x, int out_y,
                                  int out_z, const Tensor rois,
                                  const Tensor pts, const Tensor pts_feature,
                                  Tensor argmax, Tensor pts_idx_of_voxels,
                                  Tensor pooled_features, int pool_method);

void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y,
                                   int out_z, int channels,
                                   int max_pts_each_voxel,
                                   const Tensor pts_idx_of_voxels,
                                   const Tensor argmax, const Tensor grad_out,
                                   Tensor grad_in, int pool_method);

REGISTER_DEVICE_IMPL(roiaware_pool3d_forward_impl, CUDA,
                     roiaware_pool3d_forward_cuda);
REGISTER_DEVICE_IMPL(roiaware_pool3d_backward_impl, CUDA,
                     roiaware_pool3d_backward_cuda);

void RoIPointPool3dForwardCUDAKernelLauncher(
    int batch_size, int pts_num, int boxes_num, int feature_in_len,
    int sampled_pts_num, const Tensor xyz, const Tensor boxes3d,
    const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag);

void roipoint_pool3d_forward_cuda(int batch_size, int pts_num, int boxes_num,
                                  int feature_in_len, int sampled_pts_num,
                                  const Tensor xyz, const Tensor boxes3d,
                                  const Tensor pts_feature,
                                  Tensor pooled_features,
                                  Tensor pooled_empty_flag) {
  RoIPointPool3dForwardCUDAKernelLauncher(
      batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz,
      boxes3d, pts_feature, pooled_features, pooled_empty_flag);
};

void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num,
                                  int feature_in_len, int sampled_pts_num,
                                  const Tensor xyz, const Tensor boxes3d,
                                  const Tensor pts_feature,
                                  Tensor pooled_features,
                                  Tensor pooled_empty_flag);
REGISTER_DEVICE_IMPL(roipoint_pool3d_forward_impl, CUDA,
                     roipoint_pool3d_forward_cuda);

void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
                                      Tensor argmax, int pooled_height,
                                      int pooled_width, float spatial_scale);

void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
                                       Tensor argmax, Tensor grad_input,
                                       int pooled_height, int pooled_width,
                                       float spatial_scale);

void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output,
                           Tensor argmax, int pooled_height, int pooled_width,
                           float spatial_scale) {
  ROIPoolForwardCUDAKernelLauncher(input, rois, output, argmax, pooled_height,
                                   pooled_width, spatial_scale);
}

void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax,
                            Tensor grad_input, int pooled_height,
                            int pooled_width, float spatial_scale) {
  ROIPoolBackwardCUDAKernelLauncher(grad_output, rois, argmax, grad_input,
                                    pooled_height, pooled_width, spatial_scale);
}

void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
                           Tensor argmax, int pooled_height, int pooled_width,
                           float spatial_scale);
void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax,
                            Tensor grad_input, int pooled_height,
                            int pooled_width, float spatial_scale);
REGISTER_DEVICE_IMPL(roi_pool_forward_impl, CUDA, roi_pool_forward_cuda);
REGISTER_DEVICE_IMPL(roi_pool_backward_impl, CUDA, roi_pool_backward_cuda);

typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;

std::vector<at::Tensor> DynamicPointToVoxelForwardCUDAKernelLauncher(
    const at::Tensor& feats, const at::Tensor& coors,
    const reduce_t reduce_type);

void DynamicPointToVoxelBackwardCUDAKernelLauncher(
    at::Tensor& grad_feats, const at::Tensor& grad_reduced_feats,
    const at::Tensor& feats, const at::Tensor& reduced_feats,
    const at::Tensor& coors_map, const at::Tensor& reduce_count,
    const reduce_t reduce_type);

std::vector<torch::Tensor> dynamic_point_to_voxel_forward_cuda(
    const torch::Tensor& feats, const torch::Tensor& coors,
    const reduce_t reduce_type) {
  return DynamicPointToVoxelForwardCUDAKernelLauncher(feats, coors,
                                                      reduce_type);
};

void dynamic_point_to_voxel_backward_cuda(
    torch::Tensor& grad_feats, const torch::Tensor& grad_reduced_feats,
    const torch::Tensor& feats, const torch::Tensor& reduced_feats,
    const torch::Tensor& coors_idx, const torch::Tensor& reduce_count,
    const reduce_t reduce_type) {
  DynamicPointToVoxelBackwardCUDAKernelLauncher(grad_feats, grad_reduced_feats,
                                                feats, reduced_feats, coors_idx,
                                                reduce_count, reduce_type);
};

std::vector<torch::Tensor> dynamic_point_to_voxel_forward_impl(
    const torch::Tensor& feats, const torch::Tensor& coors,
    const reduce_t reduce_type);

void dynamic_point_to_voxel_backward_impl(
    torch::Tensor& grad_feats, const torch::Tensor& grad_reduced_feats,
    const torch::Tensor& feats, const torch::Tensor& reduced_feats,
    const torch::Tensor& coors_idx, const torch::Tensor& reduce_count,
    const reduce_t reduce_type);

REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_forward_impl, CUDA,
                     dynamic_point_to_voxel_forward_cuda);
REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_backward_impl, CUDA,
                     dynamic_point_to_voxel_backward_cuda);

void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean);

void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean,
                                        Tensor var);

void SyncBNForwardOutputCUDAKernelLauncher(
    const Tensor input, const Tensor mean, const Tensor var,
    Tensor running_mean, Tensor running_var, const Tensor weight,
    const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps,
    float momentum, int group_size);

void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output,
                                           const Tensor norm,
                                           Tensor grad_weight,
                                           Tensor grad_bias);

void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output,
                                          const Tensor weight,
                                          const Tensor grad_weight,
                                          const Tensor grad_bias,
                                          const Tensor norm, const Tensor std,
                                          Tensor grad_input);

void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean) {
  SyncBNForwardMeanCUDAKernelLauncher(input, mean);
}

void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean,
                              Tensor var) {
  SyncBNForwardVarCUDAKernelLauncher(input, mean, var);
}

void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean,
                                 const Tensor var, Tensor running_mean,
                                 Tensor running_var, const Tensor weight,
                                 const Tensor bias, Tensor norm, Tensor std,
                                 Tensor output, float eps, float momentum,
                                 int group_size) {
  SyncBNForwardOutputCUDAKernelLauncher(input, mean, var, running_mean,
                                        running_var, weight, bias, norm, std,
                                        output, eps, momentum, group_size);
}

void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm,
                                 Tensor grad_weight, Tensor grad_bias) {
  SyncBNBackwardParamCUDAKernelLauncher(grad_output, norm, grad_weight,
                                        grad_bias);
}

void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight,
                                const Tensor grad_weight,
                                const Tensor grad_bias, const Tensor norm,
                                const Tensor std, Tensor grad_input) {
  SyncBNBackwardDataCUDAKernelLauncher(grad_output, weight, grad_weight,
                                       grad_bias, norm, std, grad_input);
}

void sync_bn_forward_mean_impl(const Tensor input, Tensor mean);

void sync_bn_forward_var_impl(const Tensor input, const Tensor mean,
                              Tensor var);

void sync_bn_forward_output_impl(const Tensor input, const Tensor mean,
                                 const Tensor var, Tensor running_mean,
                                 Tensor running_var, const Tensor weight,
                                 const Tensor bias, Tensor norm, Tensor std,
                                 Tensor output, float eps, float momentum,
                                 int group_size);

void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm,
                                 Tensor grad_weight, Tensor grad_bias);

void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight,
                                const Tensor grad_weight,
                                const Tensor grad_bias, const Tensor norm,
                                const Tensor std, Tensor grad_input);

REGISTER_DEVICE_IMPL(sync_bn_forward_mean_impl, CUDA,
                     sync_bn_forward_mean_cuda);
REGISTER_DEVICE_IMPL(sync_bn_forward_var_impl, CUDA, sync_bn_forward_var_cuda);
REGISTER_DEVICE_IMPL(sync_bn_forward_output_impl, CUDA,
                     sync_bn_forward_output_cuda);
REGISTER_DEVICE_IMPL(sync_bn_backward_param_impl, CUDA,
                     sync_bn_backward_param_cuda);
REGISTER_DEVICE_IMPL(sync_bn_backward_data_impl, CUDA,
                     sync_bn_backward_data_cuda);

void ThreeInterpolateForwardCUDAKernelLauncher(int b, int c, int m, int n,
                                               const Tensor points,
                                               const Tensor idx,
                                               const Tensor weight, Tensor out);

void ThreeInterpolateBackwardCUDAKernelLauncher(int b, int c, int n, int m,
                                                const Tensor grad_out,
                                                const Tensor idx,
                                                const Tensor weight,
                                                Tensor grad_points);

void three_interpolate_forward_cuda(int b, int c, int m, int n,
                                    const Tensor points, const Tensor idx,
                                    const Tensor weight, Tensor out) {
  ThreeInterpolateForwardCUDAKernelLauncher(b, c, m, n, points, idx, weight,
                                            out);
};

void three_interpolate_backward_cuda(int b, int c, int n, int m,
                                     const Tensor grad_out, const Tensor idx,
                                     const Tensor weight, Tensor grad_points) {
  ThreeInterpolateBackwardCUDAKernelLauncher(b, c, n, m, grad_out, idx, weight,
                                             grad_points);
};

void three_interpolate_forward_impl(int b, int c, int m, int n,
                                    const Tensor points, const Tensor idx,
                                    const Tensor weight, Tensor out);

void three_interpolate_backward_impl(int b, int c, int n, int m,
                                     const Tensor grad_out, const Tensor idx,
                                     const Tensor weight, Tensor grad_points);
REGISTER_DEVICE_IMPL(three_interpolate_forward_impl, CUDA,
                     three_interpolate_forward_cuda);
REGISTER_DEVICE_IMPL(three_interpolate_backward_impl, CUDA,
                     three_interpolate_backward_cuda);

void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown,
                                      const Tensor known, Tensor dist2,
                                      Tensor idx);

void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown,
                           const Tensor known, Tensor dist2, Tensor idx) {
  ThreeNNForwardCUDAKernelLauncher(b, n, m, unknown, known, dist2, idx);
};

void three_nn_forward_impl(int b, int n, int m, const Tensor unknown,
                           const Tensor known, Tensor dist2, Tensor idx);
REGISTER_DEVICE_IMPL(three_nn_forward_impl, CUDA, three_nn_forward_cuda);

void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift,
                                       Tensor output);

void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift,
                                        Tensor grad_input);

void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output) {
  TINShiftForwardCUDAKernelLauncher(input, shift, output);
}

void tin_shift_backward_cuda(Tensor grad_output, Tensor shift,
                             Tensor grad_input) {
  TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input);
}

void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output);
void tin_shift_backward_impl(Tensor grad_output, Tensor shift,
                             Tensor grad_input);
REGISTER_DEVICE_IMPL(tin_shift_forward_impl, CUDA, tin_shift_forward_cuda);
REGISTER_DEVICE_IMPL(tin_shift_backward_impl, CUDA, tin_shift_backward_cuda);

torch::Tensor upfirdn2d_op(const torch::Tensor& input,
                           const torch::Tensor& kernel, int up_x, int up_y,
                           int down_x, int down_y, int pad_x0, int pad_x1,
                           int pad_y0, int pad_y1);

torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input,
                                const torch::Tensor& kernel, int up_x, int up_y,
                                int down_x, int down_y, int pad_x0, int pad_x1,
                                int pad_y0, int pad_y1);
REGISTER_DEVICE_IMPL(upfirdn2d_op_impl, CUDA, upfirdn2d_op);

int HardVoxelizeForwardCUDAKernelLauncher(
    const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors,
    at::Tensor& num_points_per_voxel, const std::vector<float> voxel_size,
    const std::vector<float> coors_range, const int max_points,
    const int max_voxels, const int NDim = 3);

void DynamicVoxelizeForwardCUDAKernelLauncher(
    const at::Tensor& points, at::Tensor& coors,
    const std::vector<float> voxel_size, const std::vector<float> coors_range,
    const int NDim = 3);

int hard_voxelize_forward_cuda(const at::Tensor& points, at::Tensor& voxels,
                               at::Tensor& coors,
                               at::Tensor& num_points_per_voxel,
                               const std::vector<float> voxel_size,
                               const std::vector<float> coors_range,
                               const int max_points, const int max_voxels,
                               const int NDim) {
  return HardVoxelizeForwardCUDAKernelLauncher(
      points, voxels, coors, num_points_per_voxel, voxel_size, coors_range,
      max_points, max_voxels, NDim);
};

void dynamic_voxelize_forward_cuda(const at::Tensor& points, at::Tensor& coors,
                                   const std::vector<float> voxel_size,
                                   const std::vector<float> coors_range,
                                   const int NDim) {
  DynamicVoxelizeForwardCUDAKernelLauncher(points, coors, voxel_size,
                                           coors_range, NDim);
};

int hard_voxelize_forward_impl(const at::Tensor& points, at::Tensor& voxels,
                               at::Tensor& coors,
                               at::Tensor& num_points_per_voxel,
                               const std::vector<float> voxel_size,
                               const std::vector<float> coors_range,
                               const int max_points, const int max_voxels,
                               const int NDim);

void dynamic_voxelize_forward_impl(const at::Tensor& points, at::Tensor& coors,
                                   const std::vector<float> voxel_size,
                                   const std::vector<float> coors_range,
                                   const int NDim);

REGISTER_DEVICE_IMPL(hard_voxelize_forward_impl, CUDA,
                     hard_voxelize_forward_cuda);
REGISTER_DEVICE_IMPL(dynamic_voxelize_forward_impl, CUDA,
                     dynamic_voxelize_forward_cuda);

void RotatedFeatureAlignForwardCUDAKernelLauncher(const Tensor features,
                                                  const Tensor best_bboxes,
                                                  const float spatial_scale,
                                                  const int points,
                                                  Tensor output);

void RotatedFeatureAlignBackwardCUDAKernelLauncher(const Tensor top_grad,
                                                   const Tensor best_bboxes,
                                                   const float spatial_scale,
                                                   const int points,
                                                   Tensor bottom_grad);

void rotated_feature_align_forward_cuda(const Tensor features,
                                        const Tensor best_bboxes,
                                        const float spatial_scale,
                                        const int points, Tensor output) {
  RotatedFeatureAlignForwardCUDAKernelLauncher(features, best_bboxes,
                                               spatial_scale, points, output);
};

void rotated_feature_align_backward_cuda(const Tensor top_grad,
                                         const Tensor best_bboxes,
                                         const float spatial_scale,
                                         const int points, Tensor bottom_grad) {
  RotatedFeatureAlignBackwardCUDAKernelLauncher(
      top_grad, best_bboxes, spatial_scale, points, bottom_grad);
};

void rotated_feature_align_forward_impl(const Tensor features,
                                        const Tensor best_bboxes,
                                        const float spatial_scale,
                                        const int points, Tensor output);

void rotated_feature_align_backward_impl(const Tensor top_grad,
                                         const Tensor best_bboxes,
                                         const float spatial_scale,
                                         const int points, Tensor bottom_grad);

REGISTER_DEVICE_IMPL(rotated_feature_align_forward_impl, CUDA,
                     rotated_feature_align_forward_cuda);
REGISTER_DEVICE_IMPL(rotated_feature_align_backward_impl, CUDA,
                     rotated_feature_align_backward_cuda);

void PointsInPolygonsForwardCUDAKernelLauncher(const at::Tensor points,
                                               const at::Tensor polygons,
                                               const int rows, const int cols,
                                               at::Tensor output);

void points_in_polygons_forward_cuda(const Tensor points, const Tensor polygons,
                                     Tensor output, const int rows,
                                     const int cols) {
  PointsInPolygonsForwardCUDAKernelLauncher(points, polygons, rows, cols,
                                            output);
};

void points_in_polygons_forward_impl(const Tensor points, const Tensor polygons,
                                     Tensor output, const int rows,
                                     const int cols);

REGISTER_DEVICE_IMPL(points_in_polygons_forward_impl, CUDA,
                     points_in_polygons_forward_cuda);

void MinAreaPolygonsCUDAKernelLauncher(const Tensor pointsets, Tensor polygons);

void min_area_polygons_cuda(const Tensor pointsets, Tensor polygons) {
  MinAreaPolygonsCUDAKernelLauncher(pointsets, polygons);
}

void min_area_polygons_impl(const Tensor pointsets, Tensor polygons);

REGISTER_DEVICE_IMPL(min_area_polygons_impl, CUDA, min_area_polygons_cuda);

void ActiveRotatedFilterForwardCUDAKernelLauncher(const Tensor input,
                                                  const Tensor indices,
                                                  Tensor output);

void ActiveRotatedFilterBackwardCUDAKernelLauncher(const Tensor grad_out,
                                                   const Tensor indices,
                                                   Tensor grad_in);

void active_rotated_filter_forward_cuda(const Tensor input,
                                        const Tensor indices, Tensor output) {
  ActiveRotatedFilterForwardCUDAKernelLauncher(input, indices, output);
};

void active_rotated_filter_backward_cuda(const Tensor grad_out,
                                         const Tensor indices, Tensor grad_in) {
  ActiveRotatedFilterBackwardCUDAKernelLauncher(grad_out, indices, grad_in);
};

void active_rotated_filter_forward_impl(const Tensor input,
                                        const Tensor indices, Tensor output);

void active_rotated_filter_backward_impl(const Tensor grad_out,
                                         const Tensor indices, Tensor grad_in);

REGISTER_DEVICE_IMPL(active_rotated_filter_forward_impl, CUDA,
                     active_rotated_filter_forward_cuda);
REGISTER_DEVICE_IMPL(active_rotated_filter_backward_impl, CUDA,
                     active_rotated_filter_backward_cuda);

void ConvexIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons,
                                 Tensor ious);

void ConvexGIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons,
                                  Tensor output);

void convex_iou_cuda(const Tensor pointsets, const Tensor polygons,
                     Tensor ious) {
  ConvexIoUCUDAKernelLauncher(pointsets, polygons, ious);
}

void convex_giou_cuda(const Tensor pointsets, const Tensor polygons,
                      Tensor output) {
  ConvexGIoUCUDAKernelLauncher(pointsets, polygons, output);
}

void convex_iou_impl(const Tensor pointsets, const Tensor polygons,
                     Tensor ious);

void convex_giou_impl(const Tensor pointsets, const Tensor polygons,
                      Tensor output);

REGISTER_DEVICE_IMPL(convex_iou_impl, CUDA, convex_iou_cuda);
REGISTER_DEVICE_IMPL(convex_giou_impl, CUDA, convex_giou_cuda);


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/deform_conv_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "deform_conv_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void deformable_im2col_cuda(Tensor data_im, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor data_col) {
  // num_axes should be smaller than block size
  // todo: check parallel_imgs is correctly passed in
  int height_col =
      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
  int width_col =
      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
  int num_kernels = channels * height_col * width_col * parallel_imgs;
  int channel_per_deformable_group = channels / deformable_group;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_im.scalar_type(), "deformable_im2col_gpu", ([&] {
        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();

        deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels),
                                       THREADS_PER_BLOCK, 0,
                                       at::cuda::getCurrentCUDAStream()>>>(
            num_kernels, data_im_, data_offset_, height, width, ksize_h,
            ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
            channel_per_deformable_group, parallel_imgs, channels,
            deformable_group, height_col, width_col, data_col_);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}

void deformable_col2im_cuda(Tensor data_col, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor grad_im) {
  // todo: make sure parallel_imgs is passed in correctly
  int height_col =
      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
  int width_col =
      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
  int num_kernels =
      channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;
  int channel_per_deformable_group = channels / deformable_group;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();

        deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels),
                                       THREADS_PER_BLOCK, 0,
                                       at::cuda::getCurrentCUDAStream()>>>(
            num_kernels, data_col_, data_offset_, channels, height, width,
            ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
            dilation_w, channel_per_deformable_group, parallel_imgs,
            deformable_group, height_col, width_col, grad_im_);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}

void deformable_col2im_coord_cuda(
    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
    const int height, const int width, const int ksize_h, const int ksize_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int parallel_imgs,
    const int deformable_group, Tensor grad_offset) {
  int height_col =
      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
  int width_col =
      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
  int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w *
                    deformable_group * parallel_imgs;
  int channel_per_deformable_group =
      channels * ksize_h * ksize_w / deformable_group;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] {
        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();

        deformable_col2im_coord_gpu_kernel<<<
            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0,
            at::cuda::getCurrentCUDAStream()>>>(
            num_kernels, data_col_, data_im_, data_offset_, channels, height,
            width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w,
            dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs,
            2 * ksize_h * ksize_w * deformable_group, deformable_group,
            height_col, width_col, grad_offset_);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/deform_roi_pool_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "deform_roi_pool_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois,
                                            Tensor offset, Tensor output,
                                            int pooled_height, int pooled_width,
                                            float spatial_scale,
                                            int sampling_ratio, float gamma) {
  int output_size = output.numel();
  int channels = input.size(1);
  int height = input.size(2);
  int width = input.size(3);

  at::cuda::CUDAGuard device_guard(input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "deform_roi_pool_forward_cuda_kernel", [&] {
        deform_roi_pool_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, input.data_ptr<scalar_t>(),
                rois.data_ptr<scalar_t>(), offset.data_ptr<scalar_t>(),
                output.data_ptr<scalar_t>(), pooled_height, pooled_width,
                static_cast<scalar_t>(spatial_scale), sampling_ratio,
                static_cast<scalar_t>(gamma), channels, height, width);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void DeformRoIPoolBackwardCUDAKernelLauncher(
    Tensor grad_output, Tensor input, Tensor rois, Tensor offset,
    Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width,
    float spatial_scale, int sampling_ratio, float gamma) {
  int output_size = grad_output.numel();
  int channels = grad_input.size(1);
  int height = grad_input.size(2);
  int width = grad_input.size(3);

  at::cuda::CUDAGuard device_guard(grad_output.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_output.scalar_type(), "deform_roi_pool_backward_cuda_kernel", [&] {
        deform_roi_pool_backward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, grad_output.data_ptr<scalar_t>(),
                input.data_ptr<scalar_t>(), rois.data_ptr<scalar_t>(),
                offset.data_ptr<scalar_t>(), grad_input.data_ptr<scalar_t>(),
                grad_offset.data_ptr<scalar_t>(), pooled_height, pooled_width,
                static_cast<scalar_t>(spatial_scale), sampling_ratio,
                static_cast<scalar_t>(gamma), channels, height, width);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/focal_loss_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cuda_helper.hpp"
#include "sigmoid_focal_loss_cuda_kernel.cuh"
#include "softmax_focal_loss_cuda_kernel.cuh"

void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target,
                                               Tensor weight, Tensor output,
                                               const float gamma,
                                               const float alpha) {
  int output_size = output.numel();
  int num_classes = input.size(1);
  AT_ASSERTM(target.max().item<int64_t>() <= (int64_t)num_classes,
             "target label should smaller or equal than num classes");
  at::cuda::CUDAGuard device_guard(input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "sigmoid_focal_loss_forward_cuda_kernel", [&] {
        sigmoid_focal_loss_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, input.data_ptr<scalar_t>(),
                target.data_ptr<int64_t>(), weight.data_ptr<scalar_t>(),
                output.data_ptr<scalar_t>(), gamma, alpha, num_classes);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target,
                                                Tensor weight,
                                                Tensor grad_input,
                                                const float gamma,
                                                const float alpha) {
  int output_size = grad_input.numel();
  int num_classes = input.size(1);

  at::cuda::CUDAGuard device_guard(grad_input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "sigmoid_focal_loss_backward_cuda_kernel", [&] {
        sigmoid_focal_loss_backward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, input.data_ptr<scalar_t>(),
                target.data_ptr<int64_t>(), weight.data_ptr<scalar_t>(),
                grad_input.data_ptr<scalar_t>(), gamma, alpha, num_classes);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor softmax, Tensor target,
                                               Tensor weight, Tensor output,
                                               const float gamma,
                                               const float alpha) {
  int output_size = output.numel();
  int num_classes = softmax.size(1);

  AT_ASSERTM(target.max().item<int64_t>() <= (int64_t)num_classes,
             "target label should smaller or equal than num classes");
  at::cuda::CUDAGuard device_guard(softmax.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      softmax.scalar_type(), "softmax_focal_loss_forward_cuda_kernel", [&] {
        softmax_focal_loss_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, softmax.data_ptr<scalar_t>(),
                target.data_ptr<int64_t>(), weight.data_ptr<scalar_t>(),
                output.data_ptr<scalar_t>(), gamma, alpha, num_classes);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor softmax, Tensor target,
                                                Tensor weight, Tensor buff,
                                                Tensor grad_input,
                                                const float gamma,
                                                const float alpha) {
  int num_classes = softmax.size(1);

  int output_size = buff.numel();
  at::cuda::CUDAGuard device_guard(grad_input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_input.scalar_type(),
      "softmax_focal_loss_backward_cuda1_"
      "kernel",
      [&] {
        softmax_focal_loss_backward_cuda1_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, softmax.data_ptr<scalar_t>(),
                target.data_ptr<int64_t>(), weight.data_ptr<scalar_t>(),
                buff.data_ptr<scalar_t>(), gamma, alpha, num_classes);
      });

  AT_CUDA_CHECK(cudaGetLastError());

  output_size = grad_input.numel();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_input.scalar_type(),
      "softmax_focal_loss_backward_cuda2_"
      "kernel",
      [&] {
        softmax_focal_loss_backward_cuda2_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, softmax.data_ptr<scalar_t>(),
                target.data_ptr<int64_t>(), buff.data_ptr<scalar_t>(),
                grad_input.data_ptr<scalar_t>(), num_classes);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/furthest_point_sample_cuda.cu
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling_gpu.cu

#include <stdio.h>
#include <stdlib.h>

#include "furthest_point_sample_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

inline int opt_n_threads(int work_size) {
  const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);

  return max(min(1 << pow_2, 1024), 1);
}

void FurthestPointSamplingForwardCUDAKernelLauncher(int b, int n, int m,
                                                    const float* dataset,
                                                    float* temp, int* idxs) {
  // dataset: (B, N, 3)
  // tmp: (B, N)
  // output:
  //      idx: (B, M)

  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  unsigned int n_threads = opt_n_threads(n);

  switch (n_threads) {
    case 1024:
      furthest_point_sampling_forward_cuda_kernel<1024>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 512:
      furthest_point_sampling_forward_cuda_kernel<512>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 256:
      furthest_point_sampling_forward_cuda_kernel<256>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 128:
      furthest_point_sampling_forward_cuda_kernel<128>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 64:
      furthest_point_sampling_forward_cuda_kernel<64>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 32:
      furthest_point_sampling_forward_cuda_kernel<32>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 16:
      furthest_point_sampling_forward_cuda_kernel<16>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 8:
      furthest_point_sampling_forward_cuda_kernel<8>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 4:
      furthest_point_sampling_forward_cuda_kernel<4>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 2:
      furthest_point_sampling_forward_cuda_kernel<2>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 1:
      furthest_point_sampling_forward_cuda_kernel<1>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    default:
      furthest_point_sampling_forward_cuda_kernel<512>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
  }

  AT_CUDA_CHECK(cudaGetLastError());
}

void FurthestPointSamplingWithDistForwardCUDAKernelLauncher(
    int b, int n, int m, const float* dataset, float* temp, int* idxs) {
  // dataset: (B, N, N)
  // temp: (B, N)
  // output:
  //      idx: (B, M)

  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  unsigned int n_threads = opt_n_threads(n);

  switch (n_threads) {
    case 1024:
      furthest_point_sampling_with_dist_forward_cuda_kernel<1024>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 512:
      furthest_point_sampling_with_dist_forward_cuda_kernel<512>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 256:
      furthest_point_sampling_with_dist_forward_cuda_kernel<256>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 128:
      furthest_point_sampling_with_dist_forward_cuda_kernel<128>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 64:
      furthest_point_sampling_with_dist_forward_cuda_kernel<64>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 32:
      furthest_point_sampling_with_dist_forward_cuda_kernel<32>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 16:
      furthest_point_sampling_with_dist_forward_cuda_kernel<16>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 8:
      furthest_point_sampling_with_dist_forward_cuda_kernel<8>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 4:
      furthest_point_sampling_with_dist_forward_cuda_kernel<4>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 2:
      furthest_point_sampling_with_dist_forward_cuda_kernel<2>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    case 1:
      furthest_point_sampling_with_dist_forward_cuda_kernel<1>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
      break;
    default:
      furthest_point_sampling_with_dist_forward_cuda_kernel<512>
          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
  }

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu
================================================
// Modified from
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act_kernel.cu
// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
//
// This work is made available under the Nvidia Source Code License-NC.
// To view a copy of this license, visit
// https://nvlabs.github.io/stylegan2/license.html

#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/cuda/CUDAContext.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <torch/types.h>

#include <ATen/cuda/CUDAApplyUtils.cuh>

template <typename scalar_t>
static __global__ void fused_bias_act_kernel(
    scalar_t* out, const scalar_t* p_x, const scalar_t* p_b,
    const scalar_t* p_ref, int act, int grad, scalar_t alpha, scalar_t scale,
    int loop_x, int size_x, int step_b, int size_b, int use_bias, int use_ref) {
  int xi = blockIdx.x * loop_x * blockDim.x + threadIdx.x;

  scalar_t zero = 0.0;

  for (int loop_idx = 0; loop_idx < loop_x && xi < size_x;
       loop_idx++, xi += blockDim.x) {
    scalar_t x = p_x[xi];

    if (use_bias) {
      x += p_b[(xi / step_b) % size_b];
    }

    scalar_t ref = use_ref ? p_ref[xi] : zero;

    scalar_t y;

    // act = 1: linear layer
    // act = 3: leaky relu layer
    // grad = 0: direct forward path
    // grad = 1: first order deviation
    // grad = 2: second order deviation
    switch (act * 10 + grad) {
      default:
      case 10:
        y = x;
        break;
      case 11:
        y = x;
        break;
      case 12:
        y = 0.0;
        break;

      case 30:
        y = (x > 0.0) ? x : x * alpha;
        break;
      case 31:
        y = (ref > 0.0) ? x : x * alpha;
        break;
      case 32:
        y = 0.0;
        break;
    }

    out[xi] = y * scale;
  }
}

torch::Tensor fused_bias_leakyrelu_op(const torch::Tensor& input,
                                      const torch::Tensor& bias,
                                      const torch::Tensor& refer, int act,
                                      int grad, float alpha, float scale) {
  int curDevice = -1;
  cudaGetDevice(&curDevice);
  cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice);

  auto x = input.contiguous();
  auto b = bias.contiguous();
  auto ref = refer.contiguous();

  int use_bias = b.numel() ? 1 : 0;
  int use_ref = ref.numel() ? 1 : 0;

  int size_x = x.numel();
  int size_b = b.numel();
  int step_b = 1;

  for (int i = 1 + 1; i < x.dim(); i++) {
    step_b *= x.size(i);
  }

  int loop_x = 4;
  int block_size = 4 * 32;
  int grid_size = (size_x - 1) / (loop_x * block_size) + 1;

  auto y = torch::empty_like(x);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      x.scalar_type(), "fused_bias_act_kernel", [&] {
        fused_bias_act_kernel<scalar_t><<<grid_size, block_size, 0, stream>>>(
            y.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(),
            b.data_ptr<scalar_t>(), ref.data_ptr<scalar_t>(), act, grad, alpha,
            scale, loop_x, size_x, step_b, size_b, use_bias, use_ref);
      });

  return y;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/gather_points_cuda.cu
================================================
#include <stdio.h>
#include <stdlib.h>

#include "gather_points_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void GatherPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                           const Tensor points,
                                           const Tensor idx, Tensor out) {
  // points: (B, C, N)
  // idx: (B, npoints)
  // output:
  //      out: (B, C, npoints)

  at::cuda::CUDAGuard device_guard(points.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(npoints, THREADS_PER_BLOCK), c, b);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      points.scalar_type(), "gather_points_forward_cuda_kernel", [&] {
        gather_points_forward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, stream>>>(
                b, c, n, npoints, points.data_ptr<scalar_t>(),
                idx.data_ptr<int>(), out.data_ptr<scalar_t>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void GatherPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                            const Tensor grad_out,
                                            const Tensor idx,
                                            Tensor grad_points) {
  // grad_out: (B, C, npoints)
  // idx: (B, npoints)
  // output:
  //      grad_points: (B, C, N)

  at::cuda::CUDAGuard device_guard(grad_out.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(npoints, THREADS_PER_BLOCK), c, b);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_out.scalar_type(), "gather_points_backward_cuda_kernel", [&] {
        gather_points_backward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, stream>>>(
                b, c, n, npoints, grad_out.data_ptr<scalar_t>(),
                idx.data_ptr<int>(), grad_points.data_ptr<scalar_t>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/group_points_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points_gpu.cu
#include <stdio.h>
#include <stdlib.h>

#include "group_points_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void GroupPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                          int nsample, const Tensor points,
                                          const Tensor idx, Tensor out) {
  // points: (B, C, N)
  // idx: (B, npoints, nsample)
  // output:
  //      out: (B, C, npoints, nsample)

  at::cuda::CUDAGuard device_guard(points.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(npoints * nsample, THREADS_PER_BLOCK), c, b);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      points.scalar_type(), "group_points_forward_cuda_kernel", [&] {
        group_points_forward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, stream>>>(
                b, c, n, npoints, nsample, points.data_ptr<scalar_t>(),
                idx.data_ptr<int>(), out.data_ptr<scalar_t>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void GroupPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints,
                                           int nsample, const Tensor grad_out,
                                           const Tensor idx,
                                           Tensor grad_points) {
  // grad_out: (B, C, npoints, nsample)
  // idx: (B, npoints, nsample)
  // output:
  //      grad_points: (B, C, N)

  at::cuda::CUDAGuard device_guard(grad_out.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(npoints * nsample, THREADS_PER_BLOCK), c, b);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_out.scalar_type(), "group_points_backward_cuda_kernel", [&] {
        group_points_backward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, stream>>>(
                b, c, n, npoints, nsample, grad_out.data_ptr<scalar_t>(),
                idx.data_ptr<int>(), grad_points.data_ptr<scalar_t>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/iou3d_cuda.cu
================================================
// Modified from
// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu

/*
3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
Written by Shaoshuai Shi
All Rights Reserved 2019-2020.
*/

#include <stdio.h>

#include "iou3d_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a,
                                                   const Tensor boxes_a,
                                                   const int num_b,
                                                   const Tensor boxes_b,
                                                   Tensor ans_overlap) {
  at::cuda::CUDAGuard device_guard(boxes_a.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(num_b, THREADS_PER_BLOCK_IOU3D),
              GET_BLOCKS(num_a, THREADS_PER_BLOCK_IOU3D));
  dim3 threads(THREADS_PER_BLOCK_IOU3D, THREADS_PER_BLOCK_IOU3D);

  iou3d_boxes_overlap_bev_forward_cuda_kernel<<<blocks, threads, 0, stream>>>(
      num_a, boxes_a.data_ptr<float>(), num_b, boxes_b.data_ptr<float>(),
      ans_overlap.data_ptr<float>());

  AT_CUDA_CHECK(cudaGetLastError());
}

void IoU3DBoxesIoUBevForwardCUDAKernelLauncher(const int num_a,
                                               const Tensor boxes_a,
                                               const int num_b,
                                               const Tensor boxes_b,
                                               Tensor ans_iou) {
  at::cuda::CUDAGuard device_guard(boxes_a.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(num_b, THREADS_PER_BLOCK_IOU3D),
              GET_BLOCKS(num_a, THREADS_PER_BLOCK_IOU3D));
  dim3 threads(THREADS_PER_BLOCK_IOU3D, THREADS_PER_BLOCK_IOU3D);

  iou3d_boxes_iou_bev_forward_cuda_kernel<<<blocks, threads, 0, stream>>>(
      num_a, boxes_a.data_ptr<float>(), num_b, boxes_b.data_ptr<float>(),
      ans_iou.data_ptr<float>());

  AT_CUDA_CHECK(cudaGetLastError());
}

void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes,
                                       unsigned long long *mask, int boxes_num,
                                       float nms_overlap_thresh) {
  at::cuda::CUDAGuard device_guard(boxes.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  dim3 blocks(GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS),
              GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS));
  dim3 threads(THREADS_PER_BLOCK_NMS);

  nms_forward_cuda_kernel<<<blocks, threads, 0, stream>>>(
      boxes_num, nms_overlap_thresh, boxes.data_ptr<float>(), mask);

  AT_CUDA_CHECK(cudaGetLastError());
}

void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes,
                                             unsigned long long *mask,
                                             int boxes_num,
                                             float nms_overlap_thresh) {
  at::cuda::CUDAGuard device_guard(boxes.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  dim3 blocks(GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS),
              GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS));
  dim3 threads(THREADS_PER_BLOCK_NMS);

  nms_normal_forward_cuda_kernel<<<blocks, threads, 0, stream>>>(
      boxes_num, nms_overlap_thresh, boxes.data_ptr<float>(), mask);

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/knn_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap

#include <cmath>
#include <cstdio>

#include "knn_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void KNNForwardCUDAKernelLauncher(int b, int n, int m, int nsample,
                                  const Tensor xyz, const Tensor new_xyz,
                                  Tensor idx, Tensor dist2) {
  // param new_xyz: (B, m, 3)
  // param xyz: (B, n, 3)
  // param idx: (B, m, nsample)

  at::cuda::CUDAGuard device_guard(new_xyz.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(m, THREADS_PER_BLOCK), b);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      new_xyz.scalar_type(), "knn_forward_cuda_kernel", [&] {
        knn_forward_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
            b, n, m, nsample, xyz.data_ptr<scalar_t>(),
            new_xyz.data_ptr<scalar_t>(), idx.data_ptr<int>(),
            dist2.data_ptr<scalar_t>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/masked_conv2d_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "masked_conv2d_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data,
                                           const Tensor mask_h_idx,
                                           const Tensor mask_w_idx,
                                           Tensor top_data, const int kernel_h,
                                           const int kernel_w, const int pad_h,
                                           const int pad_w) {
  int channels = bottom_data.size(1);
  int height = bottom_data.size(2);
  int width = bottom_data.size(3);
  int mask_cnt = mask_h_idx.size(0);
  int output_size = mask_cnt * channels;

  at::cuda::CUDAGuard device_guard(bottom_data.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      bottom_data.scalar_type(), "MaskedIm2colLaucherForward", ([&] {
        const scalar_t *bottom_data_ = bottom_data.data_ptr<scalar_t>();
        const int64_t *mask_h_idx_ = mask_h_idx.data_ptr<int64_t>();
        const int64_t *mask_w_idx_ = mask_w_idx.data_ptr<int64_t>();
        scalar_t *top_data_ = top_data.data_ptr<scalar_t>();
        MaskedIm2colForward<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, bottom_data_, height, width, kernel_h, kernel_w,
                pad_h, pad_w, mask_h_idx_, mask_w_idx_, mask_cnt, top_data_);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}

void MaskedCol2imForwardCUDAKernelLauncher(
    const Tensor bottom_data, const Tensor mask_h_idx, const Tensor mask_w_idx,
    Tensor top_data, const int height, const int width, const int channels) {
  int mask_cnt = mask_h_idx.size(0);
  int output_size = mask_cnt * channels;

  at::cuda::CUDAGuard device_guard(bottom_data.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      bottom_data.scalar_type(), "MaskedCol2imLaucherForward", ([&] {
        const scalar_t *bottom_data_ = bottom_data.data_ptr<scalar_t>();
        const int64_t *mask_h_idx_ = mask_h_idx.data_ptr<int64_t>();
        const int64_t *mask_w_idx_ = mask_w_idx.data_ptr<int64_t>();
        scalar_t *top_data_ = top_data.data_ptr<scalar_t>();

        MaskedCol2imForward<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, bottom_data_, height, width, channels, mask_h_idx_,
                mask_w_idx_, mask_cnt, top_data_);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/min_area_polygons.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/SDL-GuoZonghao/BeyondBoundingBox/blob/main/mmdet/ops/minareabbox/src/minareabbox_kernel.cu
#include "min_area_polygons_cuda.cuh"
#include "pytorch_cuda_helper.hpp"

void MinAreaPolygonsCUDAKernelLauncher(const Tensor pointsets,
                                       Tensor polygons) {
  int num_pointsets = pointsets.size(0);
  const int output_size = polygons.numel();
  at::cuda::CUDAGuard device_guard(pointsets.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      pointsets.scalar_type(), "min_area_polygons_cuda_kernel", ([&] {
        min_area_polygons_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                num_pointsets, pointsets.data_ptr<scalar_t>(),
                polygons.data_ptr<scalar_t>());
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/modulated_deform_conv_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "modulated_deform_conv_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void modulated_deformable_im2col_cuda(
    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor data_col) {
  // num_axes should be smaller than block size
  const int channel_per_deformable_group = channels / deformable_group;
  const int num_kernels = channels * batch_size * height_col * width_col;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] {
        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();

        modulated_deformable_im2col_gpu_kernel<<<
            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0,
            at::cuda::getCurrentCUDAStream()>>>(
            num_kernels, data_im_, data_offset_, data_mask_, height_im,
            width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
            dilation_h, dilation_w, channel_per_deformable_group, batch_size,
            channels, deformable_group, height_col, width_col, data_col_);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}

void modulated_deformable_col2im_cuda(
    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor grad_im) {
  const int channel_per_deformable_group = channels / deformable_group;
  const int num_kernels =
      channels * kernel_h * kernel_w * batch_size * height_col * width_col;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] {
        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
        scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();

        modulated_deformable_col2im_gpu_kernel<<<
            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0,
            at::cuda::getCurrentCUDAStream()>>>(
            num_kernels, data_col_, data_offset_, data_mask_, channels,
            height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h,
            stride_w, dilation_h, dilation_w, channel_per_deformable_group,
            batch_size, deformable_group, height_col, width_col, grad_im_);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}

void modulated_deformable_col2im_coord_cuda(
    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
    const Tensor data_mask, const int batch_size, const int channels,
    const int height_im, const int width_im, const int height_col,
    const int width_col, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int deformable_group,
    Tensor grad_offset, Tensor grad_mask) {
  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h *
                          kernel_w * deformable_group;
  const int channel_per_deformable_group =
      channels * kernel_h * kernel_w / deformable_group;

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] {
        const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
        scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();
        scalar_t *grad_mask_ = grad_mask.data_ptr<scalar_t>();

        modulated_deformable_col2im_coord_gpu_kernel<<<
            GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0,
            at::cuda::getCurrentCUDAStream()>>>(
            num_kernels, data_col_, data_im_, data_offset_, data_mask_,
            channels, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w,
            stride_h, stride_w, dilation_h, dilation_w,
            channel_per_deformable_group, batch_size,
            2 * kernel_h * kernel_w * deformable_group, deformable_group,
            height_col, width_col, grad_offset_, grad_mask_);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/ms_deform_attn_cuda.cu
================================================
/*!
**************************************************************************************************
* Deformable DETR
* Copyright (c) 2020 SenseTime. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
**************************************************************************************************
* Modified from
*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
**************************************************************************************************
*/

#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <cuda.h>
#include <cuda_runtime.h>

#include <THC/THCAtomics.cuh>
#include <vector>

#include "ms_deform_attn_cuda_kernel.cuh"

template <typename scalar_t>
void ms_deformable_im2col_cuda(cudaStream_t stream, const scalar_t *data_value,
                               const int64_t *data_spatial_shapes,
                               const int64_t *data_level_start_index,
                               const scalar_t *data_sampling_loc,
                               const scalar_t *data_attn_weight,
                               const int batch_size, const int spatial_size,
                               const int num_heads, const int channels,
                               const int num_levels, const int num_query,
                               const int num_point, scalar_t *data_col) {
  const int num_kernels = batch_size * num_query * num_heads * channels;
  const int num_actual_kernels = batch_size * num_query * num_heads * channels;
  const int num_threads = CUDA_NUM_THREADS;
  ms_deformable_im2col_gpu_kernel<scalar_t>
      <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0, stream>>>(
          num_kernels, data_value, data_spatial_shapes, data_level_start_index,
          data_sampling_loc, data_attn_weight, batch_size, spatial_size,
          num_heads, channels, num_levels, num_query, num_point, data_col);

  cudaError_t err = cudaGetLastError();
  if (err != cudaSuccess) {
    printf("error in ms_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
  }
}

template <typename scalar_t>
void ms_deformable_col2im_cuda(
    cudaStream_t stream, const scalar_t *grad_col, const scalar_t *data_value,
    const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
    const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
    const int batch_size, const int spatial_size, const int num_heads,
    const int channels, const int num_levels, const int num_query,
    const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
    scalar_t *grad_attn_weight) {
  const int num_threads =
      (channels > CUDA_NUM_THREADS) ? CUDA_NUM_THREADS : channels;
  const int num_kernels = batch_size * num_query * num_heads * channels;
  const int num_actual_kernels = batch_size * num_query * num_heads * channels;
  if (channels > 1024) {
    if ((channels & 1023) == 0) {
      ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks<scalar_t>
          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
             num_threads * 3 * sizeof(scalar_t), stream>>>(
              num_kernels, grad_col, data_value, data_spatial_shapes,
              data_level_start_index, data_sampling_loc, data_attn_weight,
              batch_size, spatial_size, num_heads, channels, num_levels,
              num_query, num_point, grad_value, grad_sampling_loc,
              grad_attn_weight);
    } else {
      ms_deformable_col2im_gpu_kernel_gm<scalar_t>
          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
             stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                       data_level_start_index, data_sampling_loc,
                       data_attn_weight, batch_size, spatial_size, num_heads,
                       channels, num_levels, num_query, num_point, grad_value,
                       grad_sampling_loc, grad_attn_weight);
    }
  } else {
    switch (channels) {
      case 1:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t,
                                                                      1>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      case 2:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t,
                                                                      2>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      case 4:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t,
                                                                      4>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      case 8:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t,
                                                                      8>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      case 16:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t,
                                                                      16>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      case 32:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t,
                                                                      32>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      case 64:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t,
                                                                      64>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      case 128:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t,
                                                                      128>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      case 256:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t,
                                                                      256>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      case 512:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t,
                                                                      512>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      case 1024:
        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t,
                                                                      1024>
            <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,
               stream>>>(num_kernels, grad_col, data_value, data_spatial_shapes,
                         data_level_start_index, data_sampling_loc,
                         data_attn_weight, batch_size, spatial_size, num_heads,
                         channels, num_levels, num_query, num_point, grad_value,
                         grad_sampling_loc, grad_attn_weight);
        break;
      default:
        if (channels < 64) {
          ms_deformable_col2im_gpu_kernel_shm_reduce_v1<scalar_t>
              <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
                 num_threads * 3 * sizeof(scalar_t), stream>>>(
                  num_kernels, grad_col, data_value, data_spatial_shapes,
                  data_level_start_index, data_sampling_loc, data_attn_weight,
                  batch_size, spatial_size, num_heads, channels, num_levels,
                  num_query, num_point, grad_value, grad_sampling_loc,
                  grad_attn_weight);
        } else {
          ms_deformable_col2im_gpu_kernel_shm_reduce_v2<scalar_t>
              <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
                 num_threads * 3 * sizeof(scalar_t), stream>>>(
                  num_kernels, grad_col, data_value, data_spatial_shapes,
                  data_level_start_index, data_sampling_loc, data_attn_weight,
                  batch_size, spatial_size, num_heads, channels, num_levels,
                  num_query, num_point, grad_value, grad_sampling_loc,
                  grad_attn_weight);
        }
    }
  }
  cudaError_t err = cudaGetLastError();
  if (err != cudaSuccess) {
    printf("error in ms_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
  }
}

at::Tensor ms_deform_attn_cuda_forward(const at::Tensor &value,
                                       const at::Tensor &spatial_shapes,
                                       const at::Tensor &level_start_index,
                                       const at::Tensor &sampling_loc,
                                       const at::Tensor &attn_weight,
                                       const int im2col_step) {
  AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
  AT_ASSERTM(spatial_shapes.is_contiguous(),
             "spatial_shapes tensor has to be contiguous");
  AT_ASSERTM(level_start_index.is_contiguous(),
             "level_start_index tensor has to be contiguous");
  AT_ASSERTM(sampling_loc.is_contiguous(),
             "sampling_loc tensor has to be contiguous");
  AT_ASSERTM(attn_weight.is_contiguous(),
             "attn_weight tensor has to be contiguous");

  AT_ASSERTM(value.is_cuda(), "value must be a CUDA tensor");
  AT_ASSERTM(spatial_shapes.is_cuda(), "spatial_shapes must be a CUDA tensor");
  AT_ASSERTM(level_start_index.is_cuda(),
             "level_start_index must be a CUDA tensor");
  AT_ASSERTM(sampling_loc.is_cuda(), "sampling_loc must be a CUDA tensor");
  AT_ASSERTM(attn_weight.is_cuda(), "attn_weight must be a CUDA tensor");

  const int batch = value.size(0);
  const int spatial_size = value.size(1);
  const int num_heads = value.size(2);
  const int channels = value.size(3);

  const int num_levels = spatial_shapes.size(0);

  const int num_query = sampling_loc.size(1);
  const int num_point = sampling_loc.size(4);

  const int im2col_step_ = std::min(batch, im2col_step);

  AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)",
             batch, im2col_step_);

  auto output =
      at::zeros({batch, num_query, num_heads, channels}, value.options());

  const int batch_n = im2col_step_;
  auto output_n = output.view(
      {batch / im2col_step_, batch_n, num_query, num_heads, channels});
  auto per_value_size = spatial_size * num_heads * channels;
  auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
  auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
  for (int n = 0; n < batch / im2col_step_; ++n) {
    auto columns = output_n.select(0, n);
    AT_DISPATCH_FLOATING_TYPES(
        value.scalar_type(), "ms_deform_attn_forward_cuda", ([&] {
          ms_deformable_im2col_cuda(
              at::cuda::getCurrentCUDAStream(),
              value.data_ptr<scalar_t>() + n * im2col_step_ * per_value_size,
              spatial_shapes.data_ptr<int64_t>(),
              level_start_index.data_ptr<int64_t>(),
              sampling_loc.data_ptr<scalar_t>() +
                  n * im2col_step_ * per_sample_loc_size,
              attn_weight.data_ptr<scalar_t>() +
                  n * im2col_step_ * per_attn_weight_size,
              batch_n, spatial_size, num_heads, channels, num_levels, num_query,
              num_point, columns.data_ptr<scalar_t>());
        }));
  }

  output = output.view({batch, num_query, num_heads * channels});

  return output;
}

void ms_deform_attn_cuda_backward(
    const at::Tensor &value, const at::Tensor &spatial_shapes,
    const at::Tensor &level_start_index, const at::Tensor &sampling_loc,
    const at::Tensor &attn_weight, const at::Tensor &grad_output,
    at::Tensor &grad_value, at::Tensor &grad_sampling_loc,
    at::Tensor &grad_attn_weight, const int im2col_step) {
  AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
  AT_ASSERTM(spatial_shapes.is_contiguous(),
             "spatial_shapes tensor has to be contiguous");
  AT_ASSERTM(level_start_index.is_contiguous(),
             "level_start_index tensor has to be contiguous");
  AT_ASSERTM(sampling_loc.is_contiguous(),
             "sampling_loc tensor has to be contiguous");
  AT_ASSERTM(attn_weight.is_contiguous(),
             "attn_weight tensor has to be contiguous");
  AT_ASSERTM(grad_output.is_contiguous(),
             "grad_output tensor has to be contiguous");

  AT_ASSERTM(value.is_cuda(), "value must be a CUDA tensor");
  AT_ASSERTM(spatial_shapes.is_cuda(), "spatial_shapes must be a CUDA tensor");
  AT_ASSERTM(level_start_index.is_cuda(),
             "level_start_index must be a CUDA tensor");
  AT_ASSERTM(sampling_loc.is_cuda(), "sampling_loc must be a CUDA tensor");
  AT_ASSERTM(attn_weight.is_cuda(), "attn_weight must be a CUDA tensor");
  AT_ASSERTM(grad_output.is_cuda(), "grad_output must be a CUDA tensor");

  const int batch = value.size(0);
  const int spatial_size = value.size(1);
  const int num_heads = value.size(2);
  const int channels = value.size(3);

  const int num_levels = spatial_shapes.size(0);

  const int num_query = sampling_loc.size(1);
  const int num_point = sampling_loc.size(4);

  const int im2col_step_ = std::min(batch, im2col_step);

  AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)",
             batch, im2col_step_);

  const int batch_n = im2col_step_;
  auto per_value_size = spatial_size * num_heads * channels;
  auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
  auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
  auto grad_output_n = grad_output.view(
      {batch / im2col_step_, batch_n, num_query, num_heads, channels});

  for (int n = 0; n < batch / im2col_step_; ++n) {
    auto grad_output_g = grad_output_n.select(0, n);
    AT_DISPATCH_FLOATING_TYPES(
        value.scalar_type(), "ms_deform_attn_backward_cuda", ([&] {
          ms_deformable_col2im_cuda(
              at::cuda::getCurrentCUDAStream(),
              grad_output_g.data_ptr<scalar_t>(),
              value.data_ptr<scalar_t>() + n * im2col_step_ * per_value_size,
              spatial_shapes.data_ptr<int64_t>(),
              level_start_index.data_ptr<int64_t>(),
              sampling_loc.data_ptr<scalar_t>() +
                  n * im2col_step_ * per_sample_loc_size,
              attn_weight.data_ptr<scalar_t>() +
                  n * im2col_step_ * per_attn_weight_size,
              batch_n, spatial_size, num_heads, channels, num_levels, num_query,
              num_point,
              grad_value.data_ptr<scalar_t>() +
                  n * im2col_step_ * per_value_size,
              grad_sampling_loc.data_ptr<scalar_t>() +
                  n * im2col_step_ * per_sample_loc_size,
              grad_attn_weight.data_ptr<scalar_t>() +
                  n * im2col_step_ * per_attn_weight_size);
        }));
  }
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/nms_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "nms_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold,
                             int offset) {
  at::cuda::CUDAGuard device_guard(boxes.device());

  if (boxes.numel() == 0) {
    return at::empty({0}, boxes.options().dtype(at::kLong));
  }
  auto order_t = std::get<1>(scores.sort(0, /*descending=*/true));
  auto boxes_sorted = boxes.index_select(0, order_t);

  int boxes_num = boxes.size(0);
  const int col_blocks = (boxes_num + threadsPerBlock - 1) / threadsPerBlock;
  const int col_blocks_alloc = GET_BLOCKS(boxes_num, threadsPerBlock);
  Tensor mask =
      at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong));
  dim3 blocks(col_blocks_alloc, col_blocks_alloc);
  dim3 threads(threadsPerBlock);
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  nms_cuda<<<blocks, threads, 0, stream>>>(
      boxes_num, iou_threshold, offset, boxes_sorted.data_ptr<float>(),
      (unsigned long long*)mask.data_ptr<int64_t>());

  at::Tensor mask_cpu = mask.to(at::kCPU);
  unsigned long long* mask_host =
      (unsigned long long*)mask_cpu.data_ptr<int64_t>();

  std::vector<unsigned long long> remv(col_blocks);
  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);

  at::Tensor keep_t =
      at::zeros({boxes_num}, boxes.options().dtype(at::kBool).device(at::kCPU));
  bool* keep = keep_t.data_ptr<bool>();

  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / threadsPerBlock;
    int inblock = i % threadsPerBlock;

    if (!(remv[nblock] & (1ULL << inblock))) {
      keep[i] = true;
      // set every overlap box with bit 1 in remv
      unsigned long long* p = mask_host + i * col_blocks;
      for (int j = nblock; j < col_blocks; j++) {
        remv[j] |= p[j];
      }
    }
  }

  AT_CUDA_CHECK(cudaGetLastError());
  return order_t.masked_select(keep_t.to(at::kCUDA));
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/nms_rotated_cuda.cu
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
#include "nms_rotated_cuda.cuh"
#include "pytorch_cuda_helper.hpp"

Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores,
                        const Tensor order_t, const Tensor dets_sorted,
                        float iou_threshold, const int multi_label) {
  // using scalar_t = float;
  AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor");
  AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor");
  at::cuda::CUDAGuard device_guard(dets.device());

  int dets_num = dets.size(0);

  const int col_blocks = at::cuda::ATenCeilDiv(dets_num, threadsPerBlock);

  Tensor mask =
      at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong));

  dim3 blocks(col_blocks, col_blocks);
  dim3 threads(threadsPerBlock);
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] {
        nms_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
            dets_num, iou_threshold, dets_sorted.data_ptr<scalar_t>(),
            (unsigned long long*)mask.data_ptr<int64_t>(), multi_label);
      });

  Tensor mask_cpu = mask.to(at::kCPU);
  unsigned long long* mask_host =
      (unsigned long long*)mask_cpu.data_ptr<int64_t>();

  std::vector<unsigned long long> remv(col_blocks);
  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);

  Tensor keep =
      at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU));
  int64_t* keep_out = keep.data_ptr<int64_t>();

  int num_to_keep = 0;
  for (int i = 0; i < dets_num; i++) {
    int nblock = i / threadsPerBlock;
    int inblock = i % threadsPerBlock;

    if (!(remv[nblock] & (1ULL << inblock))) {
      keep_out[num_to_keep++] = i;
      unsigned long long* p = mask_host + i * col_blocks;
      for (int j = nblock; j < col_blocks; j++) {
        remv[j] |= p[j];
      }
    }
  }

  AT_CUDA_CHECK(cudaGetLastError());
  return order_t.index(
      {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)
           .to(order_t.device(), keep.scalar_type())});
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/points_in_boxes_cuda.cu
================================================
// Modified from
// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
// Written by Shaoshuai Shi
// All Rights Reserved 2019.

#include <stdio.h>

#include "points_in_boxes_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void PointsInBoxesPartForwardCUDAKernelLauncher(int batch_size, int boxes_num,
                                                int pts_num, const Tensor boxes,
                                                const Tensor pts,
                                                Tensor box_idx_of_points) {
  // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate, z is
  // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
  // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
  // -1

  at::cuda::CUDAGuard device_guard(boxes.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  dim3 blocks(GET_BLOCKS(pts_num, THREADS_PER_BLOCK), batch_size);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      boxes.scalar_type(), "points_in_boxes_part_forward_cuda_kernel", [&] {
        points_in_boxes_part_forward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, stream>>>(
                batch_size, boxes_num, pts_num, boxes.data_ptr<scalar_t>(),
                pts.data_ptr<scalar_t>(), box_idx_of_points.data_ptr<int>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void PointsInBoxesAllForwardCUDAKernelLauncher(int batch_size, int boxes_num,
                                               int pts_num, const Tensor boxes,
                                               const Tensor pts,
                                               Tensor box_idx_of_points) {
  // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate, z is the bottom center, each box params pts: (B, npoints, 3)
  // [x, y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints),
  // default -1

  at::cuda::CUDAGuard device_guard(boxes.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  dim3 blocks(GET_BLOCKS(pts_num, THREADS_PER_BLOCK), batch_size);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      boxes.scalar_type(), "points_in_boxes_all_forward_cuda_kernel", [&] {
        points_in_boxes_all_forward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, stream>>>(
                batch_size, boxes_num, pts_num, boxes.data_ptr<scalar_t>(),
                pts.data_ptr<scalar_t>(), box_idx_of_points.data_ptr<int>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/points_in_polygons_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/ming71/CUDA/blob/master/point_justify/points_justify_kernel.cu

#include <stdio.h>

#include "points_in_polygons_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void PointsInPolygonsForwardCUDAKernelLauncher(const at::Tensor points,
                                               const at::Tensor polygons,
                                               const int rows, const int cols,
                                               at::Tensor output) {
  const int output_size = rows * cols;
  at::cuda::CUDAGuard device_guard(points.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      points.scalar_type(), "points_in_polygons_forward_cuda_kernel", ([&] {
        const scalar_t *vertex1 = points.data_ptr<scalar_t>();
        const scalar_t *vertex2 = polygons.data_ptr<scalar_t>();
        scalar_t *inside_flag = output.data_ptr<scalar_t>();

        points_in_polygons_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, vertex1, vertex2, rows, cols, inside_flag);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/psamask_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/hszhao/semseg/blob/master/lib/psa/src

#include <THC/THC.h>
#include <torch/serialize/tensor.h>

#include <THC/THCDeviceUtils.cuh>

#include "psamask_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp"

void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input,
                                      Tensor output, const int num_,
                                      const int h_feature, const int w_feature,
                                      const int h_mask, const int w_mask,
                                      const int half_h_mask,
                                      const int half_w_mask) {
  int nthreads = num_ * h_feature * w_feature;
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  if (psa_type == 0)
    AT_DISPATCH_FLOATING_TYPES(
        input.scalar_type(), "psamask_collect_forward_cuda", [&] {
          psamask_collect_forward_cuda<scalar_t><<<nthreads, 512, 0, stream>>>(
              nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
              half_w_mask, input.data_ptr<scalar_t>(),
              output.data_ptr<scalar_t>());
        });
  else
    AT_DISPATCH_FLOATING_TYPES(
        input.scalar_type(), "psamask_distribute_forward_cuda", [&] {
          psamask_distribute_forward_cuda<scalar_t>
              <<<nthreads, 512, 0, stream>>>(
                  nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
                  half_w_mask, input.data_ptr<scalar_t>(),
                  output.data_ptr<scalar_t>());
        });
}

void PSAMaskBackwardCUDAKernelLauncher(
    const int psa_type, const Tensor grad_output, Tensor grad_input,
    const int num_, const int h_feature, const int w_feature, const int h_mask,
    const int w_mask, const int half_h_mask, const int half_w_mask) {
  int nthreads = num_ * h_feature * w_feature;
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  if (psa_type == 0)
    AT_DISPATCH_FLOATING_TYPES(
        grad_input.scalar_type(), "psamask_collect_backward_cuda", [&] {
          psamask_collect_backward_cuda<scalar_t><<<nthreads, 512, 0, stream>>>(
              nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
              half_w_mask, grad_output.data_ptr<scalar_t>(),
              grad_input.data_ptr<scalar_t>());
        });
  else
    AT_DISPATCH_FLOATING_TYPES(
        grad_input.scalar_type(), "psamask_distribute_backward_cuda", [&] {
          psamask_distribute_backward_cuda<scalar_t>
              <<<nthreads, 512, 0, stream>>>(
                  nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask,
                  half_w_mask, grad_output.data_ptr<scalar_t>(),
                  grad_input.data_ptr<scalar_t>());
        });
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/riroi_align_rotated_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cuda_helper.hpp"
#include "riroi_align_rotated_cuda_kernel.cuh"

void RiROIAlignRotatedForwardCUDAKernelLauncher(
    const at::Tensor features, const at::Tensor rois, const float spatial_scale,
    const int num_samples, const bool clockwise, const int channels,
    const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, const int num_orientations,
    at::Tensor output) {
  const int output_size =
      num_rois * pooled_height * pooled_width * channels * num_orientations;
  at::cuda::CUDAGuard device_guard(features.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      features.scalar_type(), "riroi_align_rotated_forward_cuda_kernel", ([&] {
        const scalar_t *bottom_data = features.data_ptr<scalar_t>();
        const scalar_t *rois_data = rois.data_ptr<scalar_t>();
        scalar_t *top_data = output.data_ptr<scalar_t>();

        riroi_align_rotated_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, bottom_data, rois_data, scalar_t(spatial_scale),
                num_samples, clockwise, channels, height, width, pooled_height,
                pooled_width, num_orientations, top_data);
      }));

  AT_CUDA_CHECK(cudaGetLastError());
}

void RiROIAlignRotatedBackwardCUDAKernelLauncher(
    const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale,
    const int num_samples, const bool clockwise, const int channels,
    const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, const int num_orientations,
    at::Tensor bottom_grad) {
  const int output_size =
      num_rois * pooled_height * pooled_width * channels * num_orientations;
  at::cuda::CUDAGuard device_guard(top_grad.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      top_grad.scalar_type(), "riroi_align_rotated_backward_cuda_kernel", ([&] {
        const scalar_t *top_diff = top_grad.data_ptr<scalar_t>();
        const scalar_t *rois_data = rois.data_ptr<scalar_t>();
        scalar_t *bottom_diff = bottom_grad.data_ptr<scalar_t>();
        riroi_align_rotated_backward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, top_diff, rois_data, spatial_scale, num_samples,
                clockwise, channels, height, width, pooled_height, pooled_width,
                num_orientations, bottom_diff);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/roi_align_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cuda_helper.hpp"
#include "roi_align_cuda_kernel.cuh"

void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
                                       Tensor argmax_y, Tensor argmax_x,
                                       int aligned_height, int aligned_width,
                                       float spatial_scale, int sampling_ratio,
                                       int pool_mode, bool aligned) {
  int output_size = output.numel();
  int channels = input.size(1);
  int height = input.size(2);
  int width = input.size(3);

  at::cuda::CUDAGuard device_guard(input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "roi_align_forward_cuda_kernel", [&] {
        roi_align_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, input.data_ptr<scalar_t>(),
                rois.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
                argmax_y.data_ptr<scalar_t>(), argmax_x.data_ptr<scalar_t>(),
                aligned_height, aligned_width,
                static_cast<scalar_t>(spatial_scale), sampling_ratio, pool_mode,
                aligned, channels, height, width);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
                                        Tensor argmax_y, Tensor argmax_x,
                                        Tensor grad_input, int aligned_height,
                                        int aligned_width, float spatial_scale,
                                        int sampling_ratio, int pool_mode,
                                        bool aligned) {
  int output_size = grad_output.numel();
  int channels = grad_input.size(1);
  int height = grad_input.size(2);
  int width = grad_input.size(3);

  at::cuda::CUDAGuard device_guard(grad_output.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_output.scalar_type(), "roi_align_backward_cuda_kernel", [&] {
        roi_align_backward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, grad_output.data_ptr<scalar_t>(),
                rois.data_ptr<scalar_t>(), argmax_y.data_ptr<scalar_t>(),
                argmax_x.data_ptr<scalar_t>(), grad_input.data_ptr<scalar_t>(),
                aligned_height, aligned_width,
                static_cast<scalar_t>(spatial_scale), sampling_ratio, pool_mode,
                aligned, channels, height, width);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/roi_align_rotated_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cuda_helper.hpp"
#include "roi_align_rotated_cuda_kernel.cuh"

void ROIAlignRotatedForwardCUDAKernelLauncher(
    const at::Tensor features, const at::Tensor rois, const float spatial_scale,
    const int sample_num, const bool aligned, const bool clockwise,
    const int channels, const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, at::Tensor output) {
  const int output_size = num_rois * pooled_height * pooled_width * channels;
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      features.scalar_type(), "ROIAlignRotatedLaucherForward", ([&] {
        const scalar_t *bottom_data = features.data_ptr<scalar_t>();
        const scalar_t *rois_data = rois.data_ptr<scalar_t>();
        scalar_t *top_data = output.data_ptr<scalar_t>();

        roi_align_rotated_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
                output_size, bottom_data, rois_data, scalar_t(spatial_scale),
                sample_num, aligned, clockwise, channels, height, width,
                pooled_height, pooled_width, top_data);
      }));

  AT_CUDA_CHECK(cudaGetLastError());
}

void ROIAlignRotatedBackwardCUDAKernelLauncher(
    const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale,
    const int sample_num, const bool aligned, const bool clockwise,
    const int channels, const int height, const int width, const int num_rois,
    const int pooled_height, const int pooled_width, at::Tensor bottom_grad) {
  const int output_size = num_rois * pooled_height * pooled_width * channels;
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      top_grad.scalar_type(), "ROIAlignLaucherBackward", ([&] {
        const scalar_t *top_diff = top_grad.data_ptr<scalar_t>();
        const scalar_t *rois_data = rois.data_ptr<scalar_t>();
        scalar_t *bottom_diff = bottom_grad.data_ptr<scalar_t>();
        roi_align_rotated_backward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
                output_size, top_diff, rois_data, spatial_scale, sample_num,
                aligned, clockwise, channels, height, width, pooled_height,
                pooled_width, bottom_diff);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/roi_pool_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cuda_helper.hpp"
#include "roi_pool_cuda_kernel.cuh"

void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
                                      Tensor argmax, int pooled_height,
                                      int pooled_width, float spatial_scale) {
  int output_size = output.numel();
  int channels = input.size(1);
  int height = input.size(2);
  int width = input.size(3);

  at::cuda::CUDAGuard device_guard(input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "roi_pool_forward_cuda_kernel", [&] {
        roi_pool_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, input.data_ptr<scalar_t>(),
                rois.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
                argmax.data_ptr<int>(), pooled_height, pooled_width,
                static_cast<scalar_t>(spatial_scale), channels, height, width);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
                                       Tensor argmax, Tensor grad_input,
                                       int pooled_height, int pooled_width,
                                       float spatial_scale) {
  int output_size = grad_output.numel();
  int channels = grad_input.size(1);
  int height = grad_input.size(2);
  int width = grad_input.size(3);

  at::cuda::CUDAGuard device_guard(grad_output.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_output.scalar_type(), "roi_pool_backward_cuda_kernel", [&] {
        roi_pool_backward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, grad_output.data_ptr<scalar_t>(),
                rois.data_ptr<scalar_t>(), argmax.data_ptr<int>(),
                grad_input.data_ptr<scalar_t>(), pooled_height, pooled_width,
                channels, height, width);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/roiaware_pool3d_cuda.cu
================================================
// Modified from
// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
// Written by Shaoshuai Shi
// All Rights Reserved 2019.

#include <stdio.h>

#include "pytorch_cuda_helper.hpp"
#include "roiaware_pool3d_cuda_kernel.cuh"

void RoiawarePool3dForwardCUDAKernelLauncher(
    int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
    int out_y, int out_z, const Tensor rois, const Tensor pts,
    const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels,
    Tensor pooled_features, int pool_method) {
  // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate params pts: (npoints, 3) [x, y, z] in LiDAR coordinate params
  // pts_feature: (npoints, C) params argmax: (N, out_x, out_y, out_z, C) params
  // pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) params
  // pooled_features: (N, out_x, out_y, out_z, C) params pool_method: 0:
  // max_pool 1: avg_pool

  at::cuda::CUDAGuard device_guard(pts_feature.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  Tensor pts_mask =
      -at::ones({boxes_num, pts_num}, pts_feature.options().dtype(at::kInt));

  dim3 blocks_mask(GET_BLOCKS(pts_num, THREADS_PER_BLOCK), boxes_num);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      rois.scalar_type(), "generate_pts_mask_for_box3d", [&] {
        generate_pts_mask_for_box3d<scalar_t>
            <<<blocks_mask, threads, 0, stream>>>(
                boxes_num, pts_num, out_x, out_y, out_z,
                rois.data_ptr<scalar_t>(), pts.data_ptr<scalar_t>(),
                pts_mask.data_ptr<int>());
      });

  AT_CUDA_CHECK(cudaGetLastError());

  // TODO: Merge the collect and pool functions, SS

  dim3 blocks_collect(GET_BLOCKS(boxes_num, THREADS_PER_BLOCK));

  AT_DISPATCH_INTEGRAL_TYPES(
      pts_idx_of_voxels.scalar_type(), "collect_inside_pts_for_box3d", [&] {
        collect_inside_pts_for_box3d<scalar_t>
            <<<blocks_collect, threads, 0, stream>>>(
                boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, out_z,
                pts_mask.data_ptr<int>(),
                pts_idx_of_voxels.data_ptr<scalar_t>());
      });

  AT_CUDA_CHECK(cudaGetLastError());

  dim3 blocks_pool(GET_BLOCKS(out_x * out_y * out_z, THREADS_PER_BLOCK),
                   channels, boxes_num);
  if (pool_method == 0) {
    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
        pts_feature.scalar_type(), "roiaware_maxpool3d", [&] {
          roiaware_maxpool3d<scalar_t><<<blocks_pool, threads, 0, stream>>>(
              boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y,
              out_z, pts_feature.data_ptr<scalar_t>(),
              pts_idx_of_voxels.data_ptr<int>(),
              pooled_features.data_ptr<scalar_t>(), argmax.data_ptr<int>());
        });
  } else if (pool_method == 1) {
    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
        pts_feature.scalar_type(), "roiaware_avgpool3d", [&] {
          roiaware_avgpool3d<scalar_t><<<blocks_pool, threads, 0, stream>>>(
              boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y,
              out_z, pts_feature.data_ptr<scalar_t>(),
              pts_idx_of_voxels.data_ptr<int>(),
              pooled_features.data_ptr<scalar_t>());
        });
  }

  AT_CUDA_CHECK(cudaGetLastError());
}

void RoiawarePool3dBackwardCUDAKernelLauncher(
    int boxes_num, int out_x, int out_y, int out_z, int channels,
    int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax,
    const Tensor grad_out, Tensor grad_in, int pool_method) {
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
  // params argmax: (N, out_x, out_y, out_z, C)
  // params grad_out: (N, out_x, out_y, out_z, C)
  // params grad_in: (npoints, C), return value
  // params pool_method: 0: max_pool, 1: avg_pool

  at::cuda::CUDAGuard device_guard(grad_out.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  dim3 blocks(GET_BLOCKS(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
              boxes_num);
  dim3 threads(THREADS_PER_BLOCK);

  if (pool_method == 0) {
    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
        grad_in.scalar_type(), "roiaware_maxpool3d_backward", [&] {
          roiaware_maxpool3d_backward<scalar_t><<<blocks, threads, 0, stream>>>(
              boxes_num, channels, out_x, out_y, out_z, argmax.data_ptr<int>(),
              grad_out.data_ptr<scalar_t>(), grad_in.data_ptr<scalar_t>());
        });
  } else if (pool_method == 1) {
    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
        grad_in.scalar_type(), "roiaware_avgpool3d_backward", [&] {
          roiaware_avgpool3d_backward<scalar_t><<<blocks, threads, 0, stream>>>(
              boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel,
              pts_idx_of_voxels.data_ptr<int>(), grad_out.data_ptr<scalar_t>(),
              grad_in.data_ptr<scalar_t>());
        });
  }

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/roipoint_pool3d_cuda.cu
================================================
/*
Modified from
https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d_kernel.cu
Point cloud feature pooling
Written by Shaoshuai Shi
All Rights Reserved 2018.
*/

#include <math.h>
#include <stdio.h>

#include "pytorch_cuda_helper.hpp"
#include "roipoint_pool3d_cuda_kernel.cuh"

void RoIPointPool3dForwardCUDAKernelLauncher(
    int batch_size, int pts_num, int boxes_num, int feature_in_len,
    int sampled_pts_num, const Tensor xyz, const Tensor boxes3d,
    const Tensor pts_feature, Tensor pooled_features,
    Tensor pooled_empty_flag) {
  Tensor pts_assign = at::empty({batch_size, pts_num, boxes_num},
                                boxes3d.options().dtype(at::kInt));

  at::cuda::CUDAGuard device_guard(xyz.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(pts_num, THREADS_PER_BLOCK), boxes_num, batch_size);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      xyz.scalar_type(), "assign_pts_to_box3d", [&] {
        assign_pts_to_box3d<scalar_t><<<blocks, threads, 0, stream>>>(
            batch_size, pts_num, boxes_num, xyz.data_ptr<scalar_t>(),
            boxes3d.data_ptr<scalar_t>(), pts_assign.data_ptr<int>());
      });

  Tensor pts_idx = at::empty({batch_size, boxes_num, sampled_pts_num},
                             boxes3d.options().dtype(at::kInt));

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks2(GET_BLOCKS(boxes_num, THREADS_PER_BLOCK), batch_size);

  get_pooled_idx<<<blocks2, threads, 0, stream>>>(
      batch_size, pts_num, boxes_num, sampled_pts_num,
      pts_assign.data_ptr<int>(), pts_idx.data_ptr<int>(),
      pooled_empty_flag.data_ptr<int>());

  dim3 blocks_pool(GET_BLOCKS(sampled_pts_num, THREADS_PER_BLOCK), boxes_num,
                   batch_size);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      xyz.scalar_type(), "roipoint_pool3d_forward", [&] {
        roipoint_pool3d_forward<scalar_t><<<blocks_pool, threads, 0, stream>>>(
            batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num,
            xyz.data_ptr<scalar_t>(), pts_idx.data_ptr<int>(),
            pts_feature.data_ptr<scalar_t>(),
            pooled_features.data_ptr<scalar_t>(),
            pooled_empty_flag.data_ptr<int>());
      });
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/rotated_feature_align_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_kernel.cu
#include "pytorch_cuda_helper.hpp"
#include "rotated_feature_align_cuda_kernel.cuh"

void RotatedFeatureAlignForwardCUDAKernelLauncher(const Tensor features,
                                                  const Tensor best_bboxes,
                                                  const float spatial_scale,
                                                  const int points,
                                                  Tensor output) {
  at::cuda::CUDAGuard device_guard(features.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  const int output_size = features.numel();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      features.scalar_type(), "rotated_feature_align_forward_cuda_kernel",
      ([&] {
        const scalar_t* bottom_data = features.data_ptr<scalar_t>();
        const scalar_t* bboxes_data = best_bboxes.data_ptr<scalar_t>();
        scalar_t* top_data = output.data_ptr<scalar_t>();

        rotated_feature_align_forward_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, points, bottom_data, bboxes_data,
                scalar_t(spatial_scale), features.size(1), features.size(2),
                features.size(3), top_data);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}

void RotatedFeatureAlignBackwardCUDAKernelLauncher(const Tensor top_grad,
                                                   const Tensor best_bboxes,
                                                   const float spatial_scale,
                                                   const int points,
                                                   Tensor bottom_grad) {
  at::cuda::CUDAGuard device_guard(top_grad.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  const int output_size = top_grad.numel();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      top_grad.scalar_type(), "rotated_feature_align_backward_cuda_kernel",
      ([&] {
        const scalar_t* top_diff = top_grad.data_ptr<scalar_t>();
        const scalar_t* bboxes_data = best_bboxes.data_ptr<scalar_t>();
        scalar_t* bottom_diff = bottom_grad.data_ptr<scalar_t>();

        rotated_feature_align_backward_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, points, top_diff, bboxes_data,
                scalar_t(spatial_scale), top_grad.size(1), top_grad.size(2),
                top_grad.size(3), bottom_diff);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/scatter_points_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved.
#include <stdio.h>
#include <stdlib.h>
#include <torch/types.h>

#include "pytorch_cuda_helper.hpp"
#include "scatter_points_cuda_kernel.cuh"

std::vector<at::Tensor> DynamicPointToVoxelForwardCUDAKernelLauncher(
    const at::Tensor &feats, const at::Tensor &coors,
    const reduce_t reduce_type) {
  const int num_input = feats.size(0);
  const int num_feats = feats.size(1);

  if (num_input == 0)
    return {feats.clone().detach(), coors.clone().detach(),
            coors.new_empty({0}, torch::kInt32),
            coors.new_empty({0}, torch::kInt32)};

  at::Tensor out_coors;
  at::Tensor coors_map;
  at::Tensor reduce_count;

  auto coors_clean = coors.masked_fill(coors.lt(0).any(-1, true), -1);

  std::tie(out_coors, coors_map, reduce_count) =
      at::unique_dim(coors_clean, 0, true, true, true);

  // the first element of out_coors is always (-1,-1,-1) and should be removed
  out_coors = out_coors.slice(0, 1);
  reduce_count = reduce_count.slice(0, 1).to(torch::kInt32);
  coors_map = coors_map.to(torch::kInt32) - 1;

  auto reduced_feats =
      at::empty({out_coors.size(0), num_feats}, feats.options());

  at::cuda::CUDAGuard device_guard(feats.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  AT_DISPATCH_FLOATING_TYPES(
      feats.scalar_type(), "feats_reduce_kernel", ([&] {
        if (reduce_type == reduce_t::MAX)
          reduced_feats.fill_(-std::numeric_limits<scalar_t>::infinity());
        else
          reduced_feats.fill_(static_cast<scalar_t>(0));

        dim3 blocks(std::min(
            at::cuda::ATenCeilDiv(num_input, THREADS_PER_BLOCK), maxGridDim));
        dim3 threads(THREADS_PER_BLOCK);
        feats_reduce_kernel<<<blocks, threads, 0, stream>>>(
            feats.data_ptr<scalar_t>(), coors_map.data_ptr<int32_t>(),
            reduced_feats.data_ptr<scalar_t>(), num_input, num_feats,
            reduce_type);
        if (reduce_type == reduce_t::MEAN)
          reduced_feats /= reduce_count.unsqueeze(-1).to(reduced_feats.dtype());
      }));

  AT_CUDA_CHECK(cudaGetLastError());

  return {reduced_feats, out_coors, coors_map, reduce_count};
}

void DynamicPointToVoxelBackwardCUDAKernelLauncher(
    at::Tensor &grad_feats, const at::Tensor &grad_reduced_feats,
    const at::Tensor &feats, const at::Tensor &reduced_feats,
    const at::Tensor &coors_map, const at::Tensor &reduce_count,
    const reduce_t reduce_type) {
  const int num_input = feats.size(0);
  const int num_reduced = reduced_feats.size(0);
  const int num_feats = feats.size(1);

  grad_feats.fill_(0);
  // copy voxel grad to points

  if (num_input == 0 || num_reduced == 0) return;
  at::cuda::CUDAGuard device_guard(feats.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  if (reduce_type == reduce_t::MEAN || reduce_type == reduce_t::SUM) {
    AT_DISPATCH_FLOATING_TYPES(
        grad_reduced_feats.scalar_type(), "add_reduce_traceback_grad_kernel",
        ([&] {
          dim3 blocks(std::min(
              at::cuda::ATenCeilDiv(num_input, THREADS_PER_BLOCK), maxGridDim));
          dim3 threads(THREADS_PER_BLOCK);
          add_reduce_traceback_grad_kernel<<<blocks, threads, 0, stream>>>(
              grad_feats.data_ptr<scalar_t>(),
              grad_reduced_feats.data_ptr<scalar_t>(),
              coors_map.data_ptr<int32_t>(), reduce_count.data_ptr<int32_t>(),
              num_input, num_feats, reduce_type);
        }));

    AT_CUDA_CHECK(cudaGetLastError());
  } else {
    auto reduce_from = at::full({num_reduced, num_feats}, num_input,
                                coors_map.options().dtype(torch::kInt32));
    AT_DISPATCH_FLOATING_TYPES(
        grad_reduced_feats.scalar_type(),
        "max_reduce_traceback_scatter_idx_kernel", ([&] {
          dim3 blocks(std::min(
              at::cuda::ATenCeilDiv(num_input, THREADS_PER_BLOCK), maxGridDim));
          dim3 threads(THREADS_PER_BLOCK);
          max_reduce_traceback_scatter_idx_kernel<<<blocks, threads, 0,
                                                    stream>>>(
              feats.data_ptr<scalar_t>(), reduced_feats.data_ptr<scalar_t>(),
              reduce_from.data_ptr<int32_t>(), coors_map.data_ptr<int32_t>(),
              num_input, num_feats);
        }));

    AT_CUDA_CHECK(cudaGetLastError());

    AT_DISPATCH_FLOATING_TYPES(
        grad_reduced_feats.scalar_type(),
        "max_reduce_traceback_scatter_idx_kernel", ([&] {
          dim3 blocks(
              std::min(at::cuda::ATenCeilDiv(num_reduced, THREADS_PER_BLOCK),
                       maxGridDim));
          dim3 threads(THREADS_PER_BLOCK);
          max_reduce_scatter_grad_kernel<<<blocks, threads, 0, stream>>>(
              grad_feats.data_ptr<scalar_t>(),
              grad_reduced_feats.data_ptr<scalar_t>(),
              reduce_from.data_ptr<int32_t>(), num_reduced, num_feats);
        }));

    AT_CUDA_CHECK(cudaGetLastError());
  }
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/sync_bn_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cuda_helper.hpp"
#include "sync_bn_cuda_kernel.cuh"

void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean) {
  int num = input.size(0);
  int channels = input.size(1);
  int spatial = input.size(2);

  at::cuda::CUDAGuard device_guard(input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] {
        sync_bn_forward_mean_cuda_kernel<scalar_t>
            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
                input.data_ptr<scalar_t>(), mean.data_ptr<float>(), num,
                channels, spatial);
      });
  AT_CUDA_CHECK(cudaGetLastError());
}

void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean,
                                        Tensor var) {
  int num = input.size(0);
  int channels = input.size(1);
  int spatial = input.size(2);

  at::cuda::CUDAGuard device_guard(input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] {
        sync_bn_forward_var_cuda_kernel<scalar_t>
            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
                input.data_ptr<scalar_t>(), mean.data_ptr<float>(),
                var.data_ptr<float>(), num, channels, spatial);
      });
  AT_CUDA_CHECK(cudaGetLastError());
}

void SyncBNForwardOutputCUDAKernelLauncher(
    const Tensor input, const Tensor mean, const Tensor var,
    Tensor running_mean, Tensor running_var, const Tensor weight,
    const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps,
    float momentum, int group_size) {
  int num = input.size(0);
  int channels = input.size(1);
  int spatial = input.size(2);

  at::cuda::CUDAGuard device_guard(input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] {
        sync_bn_forward_output_cuda_kernel<scalar_t>
            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
                input.data_ptr<scalar_t>(), mean.data_ptr<float>(),
                var.data_ptr<float>(), running_mean.data_ptr<float>(),
                running_var.data_ptr<float>(), weight.data_ptr<float>(),
                bias.data_ptr<float>(), norm.data_ptr<float>(),
                std.data_ptr<float>(), output.data_ptr<scalar_t>(), num,
                channels, spatial, eps, momentum, group_size);
      });
  AT_CUDA_CHECK(cudaGetLastError());
}

void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output,
                                           const Tensor norm,
                                           Tensor grad_weight,
                                           Tensor grad_bias) {
  int num = grad_output.size(0);
  int channels = grad_output.size(1);
  int spatial = grad_output.size(2);

  at::cuda::CUDAGuard device_guard(grad_output.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_output.scalar_type(), "sync_bn_backward_param_cuda_kernel", [&] {
        sync_bn_backward_param_cuda_kernel<scalar_t>
            <<<channels, THREADS_PER_BLOCK, 0, stream>>>(
                grad_output.data_ptr<scalar_t>(), norm.data_ptr<float>(),
                grad_weight.data_ptr<float>(), grad_bias.data_ptr<float>(), num,
                channels, spatial);
      });
  AT_CUDA_CHECK(cudaGetLastError());
}

void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output,
                                          const Tensor weight,
                                          const Tensor grad_weight,
                                          const Tensor grad_bias,
                                          const Tensor norm, const Tensor std,
                                          Tensor grad_input) {
  int output_size = grad_input.numel();
  int num = grad_input.size(0);
  int channels = grad_input.size(1);
  int spatial = grad_input.size(2);

  at::cuda::CUDAGuard device_guard(grad_input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_output.scalar_type(), "sync_bn_backward_data_cuda_kernel", [&] {
        sync_bn_backward_data_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, grad_output.data_ptr<scalar_t>(),
                weight.data_ptr<float>(), grad_weight.data_ptr<float>(),
                grad_bias.data_ptr<float>(), norm.data_ptr<float>(),
                std.data_ptr<float>(), grad_input.data_ptr<scalar_t>(), num,
                channels, spatial);
      });
  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/three_interpolate_cuda.cu
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate_gpu.cu

#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#include "pytorch_cuda_helper.hpp"
#include "three_interpolate_cuda_kernel.cuh"

void ThreeInterpolateForwardCUDAKernelLauncher(int b, int c, int m, int n,
                                               const Tensor points,
                                               const Tensor idx,
                                               const Tensor weight,
                                               Tensor out) {
  // points: (B, C, M)
  // idx: (B, N, 3)
  // weight: (B, N, 3)
  // output:
  //      out: (B, C, N)

  at::cuda::CUDAGuard device_guard(points.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(n, THREADS_PER_BLOCK), c, b);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      points.scalar_type(), "three_interpolate_forward_cuda_kernel", [&] {
        three_interpolate_forward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, stream>>>(
                b, c, m, n, points.data_ptr<scalar_t>(), idx.data_ptr<int>(),
                weight.data_ptr<scalar_t>(), out.data_ptr<scalar_t>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void ThreeInterpolateBackwardCUDAKernelLauncher(int b, int c, int n, int m,
                                                const Tensor grad_out,
                                                const Tensor idx,
                                                const Tensor weight,
                                                Tensor grad_points) {
  // grad_out: (B, C, N)
  // weight: (B, N, 3)
  // output:
  //      grad_points: (B, C, M)

  at::cuda::CUDAGuard device_guard(grad_out.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(n, THREADS_PER_BLOCK), c, b);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_out.scalar_type(), "three_interpolate_backward_cuda_kernel", [&] {
        three_interpolate_backward_cuda_kernel<scalar_t>
            <<<blocks, threads, 0, stream>>>(
                b, c, n, m, grad_out.data_ptr<scalar_t>(), idx.data_ptr<int>(),
                weight.data_ptr<scalar_t>(), grad_points.data_ptr<scalar_t>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/three_nn_cuda.cu
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate_gpu.cu

#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#include "pytorch_cuda_helper.hpp"
#include "three_nn_cuda_kernel.cuh"

void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown,
                                      const Tensor known, Tensor dist2,
                                      Tensor idx) {
  // unknown: (B, N, 3)
  // known: (B, M, 3)
  // output:
  //      dist2: (B, N, 3)
  //      idx: (B, N, 3)

  at::cuda::CUDAGuard device_guard(unknown.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  // blockIdx.x(col), blockIdx.y(row)
  dim3 blocks(GET_BLOCKS(n, THREADS_PER_BLOCK), b);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      unknown.scalar_type(), "three_nn_forward_cuda_kernel", [&] {
        three_nn_forward_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
            b, n, m, unknown.data_ptr<scalar_t>(), known.data_ptr<scalar_t>(),
            dist2.data_ptr<scalar_t>(), idx.data_ptr<int>());
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/tin_shift_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cuda_helper.hpp"
#include "pytorch_device_registry.hpp"
#include "tin_shift_cuda_kernel.cuh"

void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift,
                                       Tensor output) {
  int output_size = output.numel();
  int batch_size = input.size(0);
  int t_size = input.size(1);
  int channels = input.size(2);
  int hw_size = input.size(3);
  int group_size = shift.size(1);
  int group_channel = channels / group_size;
  int num_kernels = batch_size * hw_size * channels;

  at::cuda::CUDAGuard device_guard(input.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "tin_shift_forward_cuda_kernel", [&] {
        tin_shift_forward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, input.data_ptr<scalar_t>(), shift.data_ptr<int>(),
                output.data_ptr<scalar_t>(), batch_size, channels, t_size,
                hw_size, group_size, group_channel);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}

void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift,
                                        Tensor grad_input) {
  int output_size = grad_output.numel();
  int batch_size = grad_output.size(0);
  int t_size = grad_output.size(1);
  int channels = grad_output.size(2);
  int hw_size = grad_output.size(3);
  int group_size = shift.size(1);
  int group_channel = channels / group_size;
  int num_kernels = batch_size * hw_size * channels;

  at::cuda::CUDAGuard device_guard(grad_output.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_output.scalar_type(), "tin_shift_backward_cuda_kernel", [&] {
        tin_shift_backward_cuda_kernel<scalar_t>
            <<<GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
                output_size, grad_output.data_ptr<scalar_t>(),
                shift.data_ptr<int>(), grad_input.data_ptr<scalar_t>(),
                batch_size, channels, t_size, hw_size, group_size,
                group_channel);
      });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu
================================================
// Modified from
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d_kernel.cu
// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
//
// This work is made available under the Nvidia Source Code License-NC.
// To view a copy of this license, visit
// https://nvlabs.github.io/stylegan2/license.html

#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/cuda/CUDAContext.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <torch/types.h>

#include <ATen/cuda/CUDAApplyUtils.cuh>

static __host__ __device__ __forceinline__ int floor_div(int a, int b) {
  int c = a / b;

  if (c * b > a) {
    c--;
  }

  return c;
}

struct UpFirDn2DKernelParams {
  int up_x;
  int up_y;
  int down_x;
  int down_y;
  int pad_x0;
  int pad_x1;
  int pad_y0;
  int pad_y1;

  int major_dim;
  int in_h;
  int in_w;
  int minor_dim;
  int kernel_h;
  int kernel_w;
  int out_h;
  int out_w;
  int loop_major;
  int loop_x;
};

template <typename scalar_t>
__global__ void upfirdn2d_kernel_large(scalar_t *out, const scalar_t *input,
                                       const scalar_t *kernel,
                                       const UpFirDn2DKernelParams p) {
  int minor_idx = blockIdx.x * blockDim.x + threadIdx.x;
  int out_y = minor_idx / p.minor_dim;
  minor_idx -= out_y * p.minor_dim;
  int out_x_base = blockIdx.y * p.loop_x * blockDim.y + threadIdx.y;
  int major_idx_base = blockIdx.z * p.loop_major;

  if (out_x_base >= p.out_w || out_y >= p.out_h ||
      major_idx_base >= p.major_dim) {
    return;
  }

  int mid_y = out_y * p.down_y + p.up_y - 1 - p.pad_y0;
  int in_y = min(max(floor_div(mid_y, p.up_y), 0), p.in_h);
  int h = min(max(floor_div(mid_y + p.kernel_h, p.up_y), 0), p.in_h) - in_y;
  int kernel_y = mid_y + p.kernel_h - (in_y + 1) * p.up_y;

  for (int loop_major = 0, major_idx = major_idx_base;
       loop_major < p.loop_major && major_idx < p.major_dim;
       loop_major++, major_idx++) {
    for (int loop_x = 0, out_x = out_x_base;
         loop_x < p.loop_x && out_x < p.out_w; loop_x++, out_x += blockDim.y) {
      int mid_x = out_x * p.down_x + p.up_x - 1 - p.pad_x0;
      int in_x = min(max(floor_div(mid_x, p.up_x), 0), p.in_w);
      int w = min(max(floor_div(mid_x + p.kernel_w, p.up_x), 0), p.in_w) - in_x;
      int kernel_x = mid_x + p.kernel_w - (in_x + 1) * p.up_x;

      const scalar_t *x_p =
          &input[((major_idx * p.in_h + in_y) * p.in_w + in_x) * p.minor_dim +
                 minor_idx];
      const scalar_t *k_p = &kernel[kernel_y * p.kernel_w + kernel_x];
      int x_px = p.minor_dim;
      int k_px = -p.up_x;
      int x_py = p.in_w * p.minor_dim;
      int k_py = -p.up_y * p.kernel_w;

      scalar_t v = 0.0f;

      for (int y = 0; y < h; y++) {
        for (int x = 0; x < w; x++) {
          v += static_cast<scalar_t>(*x_p) * static_cast<scalar_t>(*k_p);
          x_p += x_px;
          k_p += k_px;
        }

        x_p += x_py - w * x_px;
        k_p += k_py - w * k_px;
      }

      out[((major_idx * p.out_h + out_y) * p.out_w + out_x) * p.minor_dim +
          minor_idx] = v;
    }
  }
}

template <typename scalar_t, int up_x, int up_y, int down_x, int down_y,
          int kernel_h, int kernel_w, int tile_out_h, int tile_out_w>
__global__ void upfirdn2d_kernel(scalar_t *out, const scalar_t *input,
                                 const scalar_t *kernel,
                                 const UpFirDn2DKernelParams p) {
  const int tile_in_h = ((tile_out_h - 1) * down_y + kernel_h - 1) / up_y + 1;
  const int tile_in_w = ((tile_out_w - 1) * down_x + kernel_w - 1) / up_x + 1;

  __shared__ volatile float sk[kernel_h][kernel_w];
  __shared__ volatile float sx[tile_in_h][tile_in_w];

  int minor_idx = blockIdx.x;
  int tile_out_y = minor_idx / p.minor_dim;
  minor_idx -= tile_out_y * p.minor_dim;
  tile_out_y *= tile_out_h;
  int tile_out_x_base = blockIdx.y * p.loop_x * tile_out_w;
  int major_idx_base = blockIdx.z * p.loop_major;

  if (tile_out_x_base >= p.out_w | tile_out_y >= p.out_h |
      major_idx_base >= p.major_dim) {
    return;
  }

  for (int tap_idx = threadIdx.x; tap_idx < kernel_h * kernel_w;
       tap_idx += blockDim.x) {
    int ky = tap_idx / kernel_w;
    int kx = tap_idx - ky * kernel_w;
    scalar_t v = 0.0;

    if (kx < p.kernel_w & ky < p.kernel_h) {
      v = kernel[(p.kernel_h - 1 - ky) * p.kernel_w + (p.kernel_w - 1 - kx)];
    }

    sk[ky][kx] = v;
  }

  for (int loop_major = 0, major_idx = major_idx_base;
       loop_major < p.loop_major & major_idx < p.major_dim;
       loop_major++, major_idx++) {
    for (int loop_x = 0, tile_out_x = tile_out_x_base;
         loop_x < p.loop_x & tile_out_x < p.out_w;
         loop_x++, tile_out_x += tile_out_w) {
      int tile_mid_x = tile_out_x * down_x + up_x - 1 - p.pad_x0;
      int tile_mid_y = tile_out_y * down_y + up_y - 1 - p.pad_y0;
      int tile_in_x = floor_div(tile_mid_x, up_x);
      int tile_in_y = floor_div(tile_mid_y, up_y);

      __syncthreads();

      for (int in_idx = threadIdx.x; in_idx < tile_in_h * tile_in_w;
           in_idx += blockDim.x) {
        int rel_in_y = in_idx / tile_in_w;
        int rel_in_x = in_idx - rel_in_y * tile_in_w;
        int in_x = rel_in_x + tile_in_x;
        int in_y = rel_in_y + tile_in_y;

        scalar_t v = 0.0;

        if (in_x >= 0 & in_y >= 0 & in_x < p.in_w & in_y < p.in_h) {
          v = input[((major_idx * p.in_h + in_y) * p.in_w + in_x) *
                        p.minor_dim +
                    minor_idx];
        }

        sx[rel_in_y][rel_in_x] = v;
      }

      __syncthreads();
      for (int out_idx = threadIdx.x; out_idx < tile_out_h * tile_out_w;
           out_idx += blockDim.x) {
        int rel_out_y = out_idx / tile_out_w;
        int rel_out_x = out_idx - rel_out_y * tile_out_w;
        int out_x = rel_out_x + tile_out_x;
        int out_y = rel_out_y + tile_out_y;

        int mid_x = tile_mid_x + rel_out_x * down_x;
        int mid_y = tile_mid_y + rel_out_y * down_y;
        int in_x = floor_div(mid_x, up_x);
        int in_y = floor_div(mid_y, up_y);
        int rel_in_x = in_x - tile_in_x;
        int rel_in_y = in_y - tile_in_y;
        int kernel_x = (in_x + 1) * up_x - mid_x - 1;
        int kernel_y = (in_y + 1) * up_y - mid_y - 1;

        scalar_t v = 0.0;

#pragma unroll
        for (int y = 0; y < kernel_h / up_y; y++)
#pragma unroll
          for (int x = 0; x < kernel_w / up_x; x++)
            v += sx[rel_in_y + y][rel_in_x + x] *
                 sk[kernel_y + y * up_y][kernel_x + x * up_x];

        if (out_x < p.out_w & out_y < p.out_h) {
          out[((major_idx * p.out_h + out_y) * p.out_w + out_x) * p.minor_dim +
              minor_idx] = v;
        }
      }
    }
  }
}

torch::Tensor upfirdn2d_op(const torch::Tensor &input,
                           const torch::Tensor &kernel, int up_x, int up_y,
                           int down_x, int down_y, int pad_x0, int pad_x1,
                           int pad_y0, int pad_y1) {
  int curDevice = -1;
  cudaGetDevice(&curDevice);
  cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice);

  UpFirDn2DKernelParams p;

  auto x = input.contiguous();
  auto k = kernel.contiguous();

  p.major_dim = x.size(0);
  p.in_h = x.size(1);
  p.in_w = x.size(2);
  p.minor_dim = x.size(3);
  p.kernel_h = k.size(0);
  p.kernel_w = k.size(1);
  p.up_x = up_x;
  p.up_y = up_y;
  p.down_x = down_x;
  p.down_y = down_y;
  p.pad_x0 = pad_x0;
  p.pad_x1 = pad_x1;
  p.pad_y0 = pad_y0;
  p.pad_y1 = pad_y1;

  p.out_h = (p.in_h * p.up_y + p.pad_y0 + p.pad_y1 - p.kernel_h + p.down_y) /
            p.down_y;
  p.out_w = (p.in_w * p.up_x + p.pad_x0 + p.pad_x1 - p.kernel_w + p.down_x) /
            p.down_x;

  auto out =
      at::empty({p.major_dim, p.out_h, p.out_w, p.minor_dim}, x.options());

  int mode = -1;

  int tile_out_h = -1;
  int tile_out_w = -1;

  if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 &&
      p.kernel_h <= 4 && p.kernel_w <= 4) {
    mode = 1;
    tile_out_h = 16;
    tile_out_w = 64;
  }

  if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 &&
      p.kernel_h <= 3 && p.kernel_w <= 3) {
    mode = 2;
    tile_out_h = 16;
    tile_out_w = 64;
  }

  if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 &&
      p.kernel_h <= 4 && p.kernel_w <= 4) {
    mode = 3;
    tile_out_h = 16;
    tile_out_w = 64;
  }

  if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 &&
      p.kernel_h <= 2 && p.kernel_w <= 2) {
    mode = 4;
    tile_out_h = 16;
    tile_out_w = 64;
  }

  if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 &&
      p.kernel_h <= 4 && p.kernel_w <= 4) {
    mode = 5;
    tile_out_h = 8;
    tile_out_w = 32;
  }

  if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 &&
      p.kernel_h <= 2 && p.kernel_w <= 2) {
    mode = 6;
    tile_out_h = 8;
    tile_out_w = 32;
  }

  dim3 block_size;
  dim3 grid_size;

  if (tile_out_h > 0 && tile_out_w > 0) {
    p.loop_major = (p.major_dim - 1) / 16384 + 1;
    p.loop_x = 1;
    block_size = dim3(32 * 8, 1, 1);
    grid_size = dim3(((p.out_h - 1) / tile_out_h + 1) * p.minor_dim,
                     (p.out_w - 1) / (p.loop_x * tile_out_w) + 1,
                     (p.major_dim - 1) / p.loop_major + 1);
  } else {
    p.loop_major = (p.major_dim - 1) / 16384 + 1;
    p.loop_x = 4;
    block_size = dim3(4, 32, 1);
    grid_size = dim3((p.out_h * p.minor_dim - 1) / block_size.x + 1,
                     (p.out_w - 1) / (p.loop_x * block_size.y) + 1,
                     (p.major_dim - 1) / p.loop_major + 1);
  }

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] {
    switch (mode) {
      case 1:
        upfirdn2d_kernel<scalar_t, 1, 1, 1, 1, 4, 4, 16, 64>
            <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
                                                   x.data_ptr<scalar_t>(),
                                                   k.data_ptr<scalar_t>(), p);

        break;

      case 2:
        upfirdn2d_kernel<scalar_t, 1, 1, 1, 1, 3, 3, 16, 64>
            <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
                                                   x.data_ptr<scalar_t>(),
                                                   k.data_ptr<scalar_t>(), p);

        break;

      case 3:
        upfirdn2d_kernel<scalar_t, 2, 2, 1, 1, 4, 4, 16, 64>
            <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
                                                   x.data_ptr<scalar_t>(),
                                                   k.data_ptr<scalar_t>(), p);

        break;

      case 4:
        upfirdn2d_kernel<scalar_t, 2, 2, 1, 1, 2, 2, 16, 64>
            <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
                                                   x.data_ptr<scalar_t>(),
                                                   k.data_ptr<scalar_t>(), p);

        break;

      case 5:
        upfirdn2d_kernel<scalar_t, 1, 1, 2, 2, 4, 4, 8, 32>
            <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
                                                   x.data_ptr<scalar_t>(),
                                                   k.data_ptr<scalar_t>(), p);

        break;

      case 6:
        upfirdn2d_kernel<scalar_t, 1, 1, 2, 2, 4, 4, 8, 32>
            <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
                                                   x.data_ptr<scalar_t>(),
                                                   k.data_ptr<scalar_t>(), p);

        break;

      default:
        upfirdn2d_kernel_large<scalar_t><<<grid_size, block_size, 0, stream>>>(
            out.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(),
            k.data_ptr<scalar_t>(), p);
    }
  });

  return out;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/cuda/voxelization_cuda.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved.
#include <stdio.h>
#include <stdlib.h>

#include "pytorch_cuda_helper.hpp"
#include "voxelization_cuda_kernel.cuh"

int HardVoxelizeForwardCUDAKernelLauncher(
    const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors,
    at::Tensor &num_points_per_voxel, const std::vector<float> voxel_size,
    const std::vector<float> coors_range, const int max_points,
    const int max_voxels, const int NDim = 3) {
  // current version tooks about 0.04s for one frame on cpu
  // check device

  at::cuda::CUDAGuard device_guard(points.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  const int num_points = points.size(0);
  const int num_features = points.size(1);

  const float voxel_x = voxel_size[0];
  const float voxel_y = voxel_size[1];
  const float voxel_z = voxel_size[2];
  const float coors_x_min = coors_range[0];
  const float coors_y_min = coors_range[1];
  const float coors_z_min = coors_range[2];
  const float coors_x_max = coors_range[3];
  const float coors_y_max = coors_range[4];
  const float coors_z_max = coors_range[5];

  const int grid_x = round((coors_x_max - coors_x_min) / voxel_x);
  const int grid_y = round((coors_y_max - coors_y_min) / voxel_y);
  const int grid_z = round((coors_z_max - coors_z_min) / voxel_z);

  // map points to voxel coors
  at::Tensor temp_coors =
      at::zeros({num_points, NDim}, points.options().dtype(at::kInt));

  dim3 grid(std::min(at::cuda::ATenCeilDiv(num_points, 512), 4096));
  dim3 block(512);

  // 1. link point to corresponding voxel coors
  AT_DISPATCH_ALL_TYPES(
      points.scalar_type(), "hard_voxelize_kernel", ([&] {
        dynamic_voxelize_kernel<scalar_t, int><<<grid, block, 0, stream>>>(
            points.contiguous().data_ptr<scalar_t>(),
            temp_coors.contiguous().data_ptr<int>(), voxel_x, voxel_y, voxel_z,
            coors_x_min, coors_y_min, coors_z_min, coors_x_max, coors_y_max,
            coors_z_max, grid_x, grid_y, grid_z, num_points, num_features,
            NDim);
      }));

  AT_CUDA_CHECK(cudaGetLastError());

  // 2. map point to the idx of the corresponding voxel, find duplicate coor
  // create some temporary variables
  auto point_to_pointidx = -at::ones(
      {
          num_points,
      },
      points.options().dtype(at::kInt));
  auto point_to_voxelidx = -at::ones(
      {
          num_points,
      },
      points.options().dtype(at::kInt));

  dim3 map_grid(std::min(at::cuda::ATenCeilDiv(num_points, 512), 4096));
  dim3 map_block(512);

  AT_DISPATCH_ALL_TYPES(
      temp_coors.scalar_type(), "determin_duplicate", ([&] {
        point_to_voxelidx_kernel<int><<<map_grid, map_block, 0, stream>>>(
            temp_coors.contiguous().data_ptr<int>(),
            point_to_voxelidx.contiguous().data_ptr<int>(),
            point_to_pointidx.contiguous().data_ptr<int>(), max_points,
            max_voxels, num_points, NDim);
      }));

  AT_CUDA_CHECK(cudaGetLastError());

  // 3. determine voxel num and voxel's coor index
  // make the logic in the CUDA device could accelerate about 10 times
  auto coor_to_voxelidx = -at::ones(
      {
          num_points,
      },
      points.options().dtype(at::kInt));
  auto voxel_num = at::zeros(
      {
          1,
      },
      points.options().dtype(at::kInt));  // must be zero from the beginning

  AT_DISPATCH_ALL_TYPES(temp_coors.scalar_type(), "determin_duplicate", ([&] {
                          determin_voxel_num<int><<<1, 1, 0, stream>>>(
                              num_points_per_voxel.contiguous().data_ptr<int>(),
                              point_to_voxelidx.contiguous().data_ptr<int>(),
                              point_to_pointidx.contiguous().data_ptr<int>(),
                              coor_to_voxelidx.contiguous().data_ptr<int>(),
                              voxel_num.contiguous().data_ptr<int>(),
                              max_points, max_voxels, num_points);
                        }));

  AT_CUDA_CHECK(cudaGetLastError());

  // 4. copy point features to voxels
  // Step 4 & 5 could be parallel
  auto pts_output_size = num_points * num_features;
  dim3 cp_grid(std::min(at::cuda::ATenCeilDiv(pts_output_size, 512), 4096));
  dim3 cp_block(512);
  AT_DISPATCH_ALL_TYPES(
      points.scalar_type(), "assign_point_to_voxel", ([&] {
        assign_point_to_voxel<float, int><<<cp_grid, cp_block, 0, stream>>>(
            pts_output_size, points.contiguous().data_ptr<float>(),
            point_to_voxelidx.contiguous().data_ptr<int>(),
            coor_to_voxelidx.contiguous().data_ptr<int>(),
            voxels.contiguous().data_ptr<float>(), max_points, num_features,
            num_points, NDim);
      }));
  //   cudaDeviceSynchronize();
  //   AT_CUDA_CHECK(cudaGetLastError());

  // 5. copy coors of each voxels
  auto coors_output_size = num_points * NDim;
  dim3 coors_cp_grid(
      std::min(at::cuda::ATenCeilDiv(coors_output_size, 512), 4096));
  dim3 coors_cp_block(512);
  AT_DISPATCH_ALL_TYPES(
      points.scalar_type(), "assign_point_to_voxel", ([&] {
        assign_voxel_coors<float, int>
            <<<coors_cp_grid, coors_cp_block, 0, stream>>>(
                coors_output_size, temp_coors.contiguous().data_ptr<int>(),
                point_to_voxelidx.contiguous().data_ptr<int>(),
                coor_to_voxelidx.contiguous().data_ptr<int>(),
                coors.contiguous().data_ptr<int>(), num_points, NDim);
      }));

  AT_CUDA_CHECK(cudaGetLastError());

  auto voxel_num_cpu = voxel_num.to(at::kCPU);
  int voxel_num_int = voxel_num_cpu.data_ptr<int>()[0];

  return voxel_num_int;
}

void DynamicVoxelizeForwardCUDAKernelLauncher(
    const at::Tensor &points, at::Tensor &coors,
    const std::vector<float> voxel_size, const std::vector<float> coors_range,
    const int NDim = 3) {
  // current version tooks about 0.04s for one frame on cpu
  // check device

  at::cuda::CUDAGuard device_guard(points.device());
  cudaStream_t stream = at::cuda::getCurrentCUDAStream();

  const int num_points = points.size(0);
  const int num_features = points.size(1);

  const float voxel_x = voxel_size[0];
  const float voxel_y = voxel_size[1];
  const float voxel_z = voxel_size[2];
  const float coors_x_min = coors_range[0];
  const float coors_y_min = coors_range[1];
  const float coors_z_min = coors_range[2];
  const float coors_x_max = coors_range[3];
  const float coors_y_max = coors_range[4];
  const float coors_z_max = coors_range[5];

  const int grid_x = round((coors_x_max - coors_x_min) / voxel_x);
  const int grid_y = round((coors_y_max - coors_y_min) / voxel_y);
  const int grid_z = round((coors_z_max - coors_z_min) / voxel_z);

  const int col_blocks = at::cuda::ATenCeilDiv(num_points, THREADS_PER_BLOCK);
  dim3 blocks(col_blocks);
  dim3 threads(THREADS_PER_BLOCK);

  AT_DISPATCH_ALL_TYPES(points.scalar_type(), "dynamic_voxelize_kernel", [&] {
    dynamic_voxelize_kernel<scalar_t, int><<<blocks, threads, 0, stream>>>(
        points.contiguous().data_ptr<scalar_t>(),
        coors.contiguous().data_ptr<int>(), voxel_x, voxel_y, voxel_z,
        coors_x_min, coors_y_min, coors_z_min, coors_x_max, coors_y_max,
        coors_z_max, grid_x, grid_y, grid_z, num_points, num_features, NDim);
  });

  AT_CUDA_CHECK(cudaGetLastError());
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/deform_conv.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void deformable_im2col_impl(Tensor data_im, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor data_col) {
  DISPATCH_DEVICE_IMPL(deformable_im2col_impl, data_im, data_offset, channels,
                       height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h,
                       stride_w, dilation_h, dilation_w, parallel_imgs,
                       deformable_group, data_col);
}

void deformable_col2im_impl(Tensor data_col, Tensor data_offset,
                            const int channels, const int height,
                            const int width, const int ksize_h,
                            const int ksize_w, const int pad_h, const int pad_w,
                            const int stride_h, const int stride_w,
                            const int dilation_h, const int dilation_w,
                            const int parallel_imgs, const int deformable_group,
                            Tensor grad_im) {
  DISPATCH_DEVICE_IMPL(deformable_col2im_impl, data_col, data_offset, channels,
                       height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h,
                       stride_w, dilation_h, dilation_w, parallel_imgs,
                       deformable_group, grad_im);
}

void deformable_col2im_coord_impl(
    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
    const int height, const int width, const int ksize_h, const int ksize_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int parallel_imgs,
    const int deformable_group, Tensor grad_offset) {
  DISPATCH_DEVICE_IMPL(deformable_col2im_coord_impl, data_col, data_im,
                       data_offset, channels, height, width, ksize_h, ksize_w,
                       pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
                       parallel_imgs, deformable_group, grad_offset);
}

void deform_conv_shape_check(at::Tensor input, at::Tensor offset,
                             at::Tensor *gradOutput, at::Tensor weight, int kH,
                             int kW, int dH, int dW, int padH, int padW,
                             int dilationH, int dilationW, int group,
                             int deformable_group) {
  TORCH_CHECK(
      weight.ndimension() == 4,
      "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, but got: %s",
      weight.ndimension());

  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");

  TORCH_CHECK(kW > 0 && kH > 0,
              "kernel size should be greater than zero, but got kH: %d kW: %d",
              kH, kW);

  TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
              "kernel size should be consistent with weight, ",
              "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d",
              kH, kW, weight.size(2), weight.size(3));

  TORCH_CHECK(dW > 0 && dH > 0,
              "stride should be greater than zero, but got dH: %d dW: %d", dH,
              dW);

  TORCH_CHECK(
      dilationW > 0 && dilationH > 0,
      "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
      dilationH, dilationW);

  int ndim = input.ndimension();
  int dimf = 0;
  int dimh = 1;
  int dimw = 2;

  if (ndim == 4) {
    dimf++;
    dimh++;
    dimw++;
  }

  TORCH_CHECK(ndim == 3 || ndim == 4,
              "3D or 4D input tensor expected but got: %s", ndim);

  long nInputPlane = weight.size(1) * group;
  long inputHeight = input.size(dimh);
  long inputWidth = input.size(dimw);
  long nOutputPlane = weight.size(0);
  long outputHeight =
      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
  long outputWidth =
      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;

  TORCH_CHECK(nInputPlane % deformable_group == 0,
              "input channels must divide deformable group size");

  if (outputWidth < 1 || outputHeight < 1)
    AT_ERROR(
        "Given input size: (%ld x %ld x %ld). "
        "Calculated output size: (%ld x %ld x %ld). Output size is too small",
        nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
        outputWidth);

  TORCH_CHECK(input.size(1) == nInputPlane,
              "invalid number of input planes, expected: %d, but got: %d",
              nInputPlane, input.size(1));

  TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
              "input image is smaller than kernel");

  TORCH_CHECK(
      (offset.size(2) == outputHeight && offset.size(3) == outputWidth),
      "invalid spatial size of offset, expected height: %d width: %d, but "
      "got height: %d width: %d",
      outputHeight, outputWidth, offset.size(2), offset.size(3));

  TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
              "invalid number of channels of offset");

  if (gradOutput != NULL) {
    TORCH_CHECK(
        gradOutput->size(dimf) == nOutputPlane,
        "invalid number of gradOutput planes, expected: %d, but got: %d",
        nOutputPlane, gradOutput->size(dimf));

    TORCH_CHECK(
        (gradOutput->size(dimh) == outputHeight &&
         gradOutput->size(dimw) == outputWidth),
        "invalid size of gradOutput, expected height: %d width: %d , but "
        "got height: %d width: %d",
        outputHeight, outputWidth, gradOutput->size(dimh),
        gradOutput->size(dimw));
  }
}

void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
                         Tensor output, Tensor columns, Tensor ones, int kW,
                         int kH, int dW, int dH, int padW, int padH,
                         int dilationW, int dilationH, int group,
                         int deformable_group, int im2col_step) {
  if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
    CHECK_CUDA_INPUT(input);
    CHECK_CUDA_INPUT(offset);
    CHECK_CUDA_INPUT(weight);
    CHECK_CUDA_INPUT(output);
    CHECK_CUDA_INPUT(columns);
    CHECK_CUDA_INPUT(ones);
#else
    AT_ERROR("DeformConv is not compiled with GPU support");
#endif
  } else {
    CHECK_CPU_INPUT(input);
    CHECK_CPU_INPUT(offset);
    CHECK_CPU_INPUT(weight);
    CHECK_CPU_INPUT(output);
    CHECK_CPU_INPUT(columns);
    CHECK_CPU_INPUT(ones);
  }

  deform_conv_shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH,
                          padW, dilationH, dilationW, group, deformable_group);
  at::DeviceGuard guard(input.device());

  int batch = 1;
  if (input.ndimension() == 3) {
    // Force batch
    batch = 0;
    input.unsqueeze_(0);
    offset.unsqueeze_(0);
  }

  // todo: assert batchsize dividable by im2col_step

  long batchSize = input.size(0);
  long nInputPlane = input.size(1);
  long inputHeight = input.size(2);
  long inputWidth = input.size(3);

  long nOutputPlane = weight.size(0);

  long outputWidth =
      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
  long outputHeight =
      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

  output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
                        outputHeight, outputWidth});
  columns = at::zeros(
      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
      input.options());

  if (ones.ndimension() != 2 ||
      ones.size(0) * ones.size(1) < outputHeight * outputWidth) {
    ones = at::ones({outputHeight, outputWidth}, input.options());
  }

  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
                      inputHeight, inputWidth});
  offset =
      offset.view({batchSize / im2col_step, im2col_step,
                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  Tensor output_buffer = at::zeros({batchSize / im2col_step, nOutputPlane,
                                    im2col_step * outputHeight, outputWidth},
                                   output.options());

  output_buffer = output_buffer.view(
      {output_buffer.size(0), group, output_buffer.size(1) / group,
       output_buffer.size(2), output_buffer.size(3)});

  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
    deformable_im2col_impl(input[elt], offset[elt], nInputPlane, inputHeight,
                           inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
                           dilationW, im2col_step, deformable_group, columns);

    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
    weight = weight.view({group, weight.size(0) / group, weight.size(1),
                          weight.size(2), weight.size(3)});

    for (int g = 0; g < group; g++) {
      output_buffer[elt][g] = output_buffer[elt][g]
                                  .flatten(1)
                                  .addmm_(weight[g].flatten(1), columns[g])
                                  .view_as(output_buffer[elt][g]);
    }
    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
                          weight.size(3), weight.size(4)});
  }

  output_buffer = output_buffer.view(
      {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2),
       output_buffer.size(3), output_buffer.size(4)});

  output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane,
                                      im2col_step, outputHeight, outputWidth});
  output_buffer.transpose_(1, 2);
  output.copy_(output_buffer);
  output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});

  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
  offset = offset.view(
      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  if (batch == 0) {
    output = output.view({nOutputPlane, outputHeight, outputWidth});
    input = input.view({nInputPlane, inputHeight, inputWidth});
    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
  }
}

void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
                                Tensor gradInput, Tensor gradOffset,
                                Tensor weight, Tensor columns, int kW, int kH,
                                int dW, int dH, int padW, int padH,
                                int dilationW, int dilationH, int group,
                                int deformable_group, int im2col_step) {
  if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
    CHECK_CUDA_INPUT(input);
    CHECK_CUDA_INPUT(offset);
    CHECK_CUDA_INPUT(gradOutput);
    CHECK_CUDA_INPUT(gradInput);
    CHECK_CUDA_INPUT(gradOffset);
    CHECK_CUDA_INPUT(weight);
    CHECK_CUDA_INPUT(columns);
#else
    AT_ERROR("DeformConv is not compiled with GPU support");
#endif
  } else {
    CHECK_CPU_INPUT(input);
    CHECK_CPU_INPUT(offset);
    CHECK_CPU_INPUT(gradOutput);
    CHECK_CPU_INPUT(gradInput);
    CHECK_CPU_INPUT(gradOffset);
    CHECK_CPU_INPUT(weight);
    CHECK_CPU_INPUT(columns);
  }
  deform_conv_shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW,
                          padH, padW, dilationH, dilationW, group,
                          deformable_group);

  at::DeviceGuard guard(input.device());

  int batch = 1;
  if (input.ndimension() == 3) {
    // Force batch
    batch = 0;
    input = input.view({1, input.size(0), input.size(1), input.size(2)});
    offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)});
    gradOutput = gradOutput.view(
        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
  }

  long batchSize = input.size(0);
  long nInputPlane = input.size(1);
  long inputHeight = input.size(2);
  long inputWidth = input.size(3);

  long nOutputPlane = weight.size(0);

  long outputWidth =
      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
  long outputHeight =
      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

  TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
  columns = at::zeros(
      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
      input.options());

  // change order of grad output
  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
                                nOutputPlane, outputHeight, outputWidth});
  gradOutput.transpose_(1, 2);

  gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane,
                              inputHeight, inputWidth});
  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
                      inputHeight, inputWidth});
  gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step,
                                deformable_group * 2 * kH * kW, outputHeight,
                                outputWidth});
  offset =
      offset.view({batchSize / im2col_step, im2col_step,
                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
    // divide into groups
    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
    weight = weight.view({group, weight.size(0) / group, weight.size(1),
                          weight.size(2), weight.size(3)});
    gradOutput = gradOutput.view(
        {gradOutput.size(0), group, gradOutput.size(1) / group,
         gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)});

    for (int g = 0; g < group; g++) {
      columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
                                     gradOutput[elt][g].flatten(1), 0.0f, 1.0f);
    }

    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
    gradOutput = gradOutput.view(
        {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2),
         gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)});

    deformable_col2im_coord_impl(columns, input[elt], offset[elt], nInputPlane,
                                 inputHeight, inputWidth, kH, kW, padH, padW,
                                 dH, dW, dilationH, dilationW, im2col_step,
                                 deformable_group, gradOffset[elt]);

    deformable_col2im_impl(columns, offset[elt], nInputPlane, inputHeight,
                           inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
                           dilationW, im2col_step, deformable_group,
                           gradInput[elt]);

    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
                          weight.size(3), weight.size(4)});
  }

  gradOutput.transpose_(1, 2);
  gradOutput =
      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});

  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
  gradOffset = gradOffset.view(
      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
  offset = offset.view(
      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  if (batch == 0) {
    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
    input = input.view({nInputPlane, inputHeight, inputWidth});
    gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});
    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
    gradOffset =
        gradOffset.view({offset.size(1), offset.size(2), offset.size(3)});
  }
}

void deform_conv_backward_parameters(Tensor input, Tensor offset,
                                     Tensor gradOutput, Tensor gradWeight,
                                     Tensor columns, Tensor ones, int kW,
                                     int kH, int dW, int dH, int padW, int padH,
                                     int dilationW, int dilationH, int group,
                                     int deformable_group, float scale,
                                     int im2col_step) {
  if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
    CHECK_CUDA_INPUT(input);
    CHECK_CUDA_INPUT(offset);
    CHECK_CUDA_INPUT(gradOutput);
    CHECK_CUDA_INPUT(gradWeight);
    CHECK_CUDA_INPUT(columns);
    CHECK_CUDA_INPUT(ones);
#else
    AT_ERROR("DeformConv is not compiled with GPU support");
#endif
  } else {
    CHECK_CPU_INPUT(input);
    CHECK_CPU_INPUT(offset);
    CHECK_CPU_INPUT(gradOutput);
    CHECK_CPU_INPUT(gradWeight);
    CHECK_CPU_INPUT(columns);
    CHECK_CPU_INPUT(ones);
  }

  deform_conv_shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH,
                          dW, padH, padW, dilationH, dilationW, group,
                          deformable_group);
  at::DeviceGuard guard(input.device());

  int batch = 1;

  if (input.ndimension() == 3) {
    // Force batch
    batch = 0;
    input = input.view(
        at::IntList({1, input.size(0), input.size(1), input.size(2)}));
    gradOutput = gradOutput.view(
        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
  }

  long batchSize = input.size(0);
  long nInputPlane = input.size(1);
  long inputHeight = input.size(2);
  long inputWidth = input.size(3);

  long nOutputPlane = gradWeight.size(0);

  long outputWidth =
      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
  long outputHeight =
      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

  columns = at::zeros(
      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
      input.options());

  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
                                nOutputPlane, outputHeight, outputWidth});
  gradOutput.transpose_(1, 2);

  Tensor gradOutputBuffer = at::zeros_like(gradOutput);
  gradOutputBuffer =
      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step,
                             outputHeight, outputWidth});
  gradOutputBuffer = gradOutputBuffer.contiguous();
  gradOutputBuffer.copy_(gradOutput);
  gradOutputBuffer =
      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane,
                             im2col_step * outputHeight, outputWidth});

  gradOutput.transpose_(1, 2);
  gradOutput =
      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});

  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
                      inputHeight, inputWidth});
  offset =
      offset.view({batchSize / im2col_step, im2col_step,
                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
    deformable_im2col_impl(input[elt], offset[elt], nInputPlane, inputHeight,
                           inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
                           dilationW, im2col_step, deformable_group, columns);

    // divide into group
    gradOutputBuffer = gradOutputBuffer.view(
        {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group,
         gradOutputBuffer.size(2), gradOutputBuffer.size(3)});
    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
    gradWeight =
        gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1),
                         gradWeight.size(2), gradWeight.size(3)});

    for (int g = 0; g < group; g++) {
      gradWeight[g] = gradWeight[g]
                          .flatten(1)
                          .addmm_(gradOutputBuffer[elt][g].flatten(1),
                                  columns[g].transpose(1, 0), 1.0, scale)
                          .view_as(gradWeight[g]);
    }
    gradOutputBuffer = gradOutputBuffer.view(
        {gradOutputBuffer.size(0),
         gradOutputBuffer.size(1) * gradOutputBuffer.size(2),
         gradOutputBuffer.size(3), gradOutputBuffer.size(4)});
    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
    gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1),
                                  gradWeight.size(2), gradWeight.size(3),
                                  gradWeight.size(4)});
  }

  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
  offset = offset.view(
      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});

  if (batch == 0) {
    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
    input = input.view({nInputPlane, inputHeight, inputWidth});
  }
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/deform_roi_pool.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void deform_roi_pool_forward_impl(Tensor input, Tensor rois, Tensor offset,
                                  Tensor output, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int sampling_ratio, float gamma) {
  DISPATCH_DEVICE_IMPL(deform_roi_pool_forward_impl, input, rois, offset,
                       output, pooled_height, pooled_width, spatial_scale,
                       sampling_ratio, gamma);
}

void deform_roi_pool_backward_impl(Tensor grad_output, Tensor input,
                                   Tensor rois, Tensor offset,
                                   Tensor grad_input, Tensor grad_offset,
                                   int pooled_height, int pooled_width,
                                   float spatial_scale, int sampling_ratio,
                                   float gamma) {
  DISPATCH_DEVICE_IMPL(deform_roi_pool_backward_impl, grad_output, input, rois,
                       offset, grad_input, grad_offset, pooled_height,
                       pooled_width, spatial_scale, sampling_ratio, gamma);
}

void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset,
                             Tensor output, int pooled_height, int pooled_width,
                             float spatial_scale, int sampling_ratio,
                             float gamma) {
  deform_roi_pool_forward_impl(input, rois, offset, output, pooled_height,
                               pooled_width, spatial_scale, sampling_ratio,
                               gamma);
}

void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
                              Tensor offset, Tensor grad_input,
                              Tensor grad_offset, int pooled_height,
                              int pooled_width, float spatial_scale,
                              int sampling_ratio, float gamma) {
  deform_roi_pool_backward_impl(grad_output, input, rois, offset, grad_input,
                                grad_offset, pooled_height, pooled_width,
                                spatial_scale, sampling_ratio, gamma);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/focal_loss.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void sigmoid_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha) {
  DISPATCH_DEVICE_IMPL(sigmoid_focal_loss_forward_impl, input, target, weight,
                       output, gamma, alpha);
}

void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target,
                                      Tensor weight, Tensor grad_input,
                                      float gamma, float alpha) {
  DISPATCH_DEVICE_IMPL(sigmoid_focal_loss_backward_impl, input, target, weight,
                       grad_input, gamma, alpha);
}

void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
                                     Tensor output, float gamma, float alpha) {
  DISPATCH_DEVICE_IMPL(softmax_focal_loss_forward_impl, input, target, weight,
                       output, gamma, alpha);
}

void softmax_focal_loss_backward_impl(Tensor input, Tensor target,
                                      Tensor weight, Tensor buff,
                                      Tensor grad_input, float gamma,
                                      float alpha) {
  DISPATCH_DEVICE_IMPL(softmax_focal_loss_backward_impl, input, target, weight,
                       buff, grad_input, gamma, alpha);
}

void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
                                Tensor output, float gamma, float alpha) {
  sigmoid_focal_loss_forward_impl(input, target, weight, output, gamma, alpha);
}

void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
                                 Tensor grad_input, float gamma, float alpha) {
  sigmoid_focal_loss_backward_impl(input, target, weight, grad_input, gamma,
                                   alpha);
}

void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
                                Tensor output, float gamma, float alpha) {
  softmax_focal_loss_forward_impl(input, target, weight, output, gamma, alpha);
}

void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
                                 Tensor buff, Tensor grad_input, float gamma,
                                 float alpha) {
  softmax_focal_loss_backward_impl(input, target, weight, buff, grad_input,
                                   gamma, alpha);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/furthest_point_sample.cpp
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void furthest_point_sampling_forward_impl(Tensor points_tensor,
                                          Tensor temp_tensor, Tensor idx_tensor,
                                          int b, int n, int m) {
  DISPATCH_DEVICE_IMPL(furthest_point_sampling_forward_impl, points_tensor,
                       temp_tensor, idx_tensor, b, n, m);
}

void furthest_point_sampling_with_dist_forward_impl(Tensor points_tensor,
                                                    Tensor temp_tensor,
                                                    Tensor idx_tensor, int b,
                                                    int n, int m) {
  DISPATCH_DEVICE_IMPL(furthest_point_sampling_with_dist_forward_impl,
                       points_tensor, temp_tensor, idx_tensor, b, n, m);
}

void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
                                     Tensor idx_tensor, int b, int n, int m) {
  furthest_point_sampling_forward_impl(points_tensor, temp_tensor, idx_tensor,
                                       b, n, m);
}

void furthest_point_sampling_with_dist_forward(Tensor points_tensor,
                                               Tensor temp_tensor,
                                               Tensor idx_tensor, int b, int n,
                                               int m) {
  furthest_point_sampling_with_dist_forward_impl(points_tensor, temp_tensor,
                                                 idx_tensor, b, n, m);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/fused_bias_leakyrelu.cpp
================================================
// Modified from
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act.cpp

/*
Copyright (c) 2021, NVIDIA Corporation. All rights reserved.

NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
Augmentation (ADA)
=======================================================================

1. Definitions

"Licensor" means any person or entity that distributes its Work.

"Software" means the original work of authorship made available under
this License.

"Work" means the Software and any additions to or derivative works of
the Software that are made available under this License.

The terms "reproduce," "reproduction," "derivative works," and
"distribution" have the meaning as provided under U.S. copyright law;
provided, however, that for the purposes of this License, derivative
works shall not include works that remain separable from, or merely
link (or bind by name) to the interfaces of, the Work.

Works, including the Software, are "made available" under this License
by including in or with the Work either (a) a copyright notice
referencing the applicability of this License to the Work, or (b) a
copy of this License.

2. License Grants

    2.1 Copyright Grant. Subject to the terms and conditions of this
    License, each Licensor grants to you a perpetual, worldwide,
    non-exclusive, royalty-free, copyright license to reproduce,
    prepare derivative works of, publicly display, publicly perform,
    sublicense and distribute its Work and any resulting derivative
    works in any form.

3. Limitations

    3.1 Redistribution. You may reproduce or distribute the Work only
    if (a) you do so under this License, (b) you include a complete
    copy of this License with your distribution, and (c) you retain
    without modification any copyright, patent, trademark, or
    attribution notices that are present in the Work.

    3.2 Derivative Works. You may specify that additional or different
    terms apply to the use, reproduction, and distribution of your
    derivative works of the Work ("Your Terms") only if (a) Your Terms
    provide that the use limitation in Section 3.3 applies to your
    derivative works, and (b) you identify the specific derivative
    works that are subject to Your Terms. Notwithstanding Your Terms,
    this License (including the redistribution requirements in Section
    3.1) will continue to apply to the Work itself.

    3.3 Use Limitation. The Work and any derivative works thereof only
    may be used or intended for use non-commercially. Notwithstanding
    the foregoing, NVIDIA and its affiliates may use the Work and any
    derivative works commercially. As used herein, "non-commercially"
    means for research or evaluation purposes only.

    3.4 Patent Claims. If you bring or threaten to bring a patent claim
    against any Licensor (including any claim, cross-claim or
    counterclaim in a lawsuit) to enforce any patents that you allege
    are infringed by any Work, then your rights under this License from
    such Licensor (including the grant in Section 2.1) will terminate
    immediately.

    3.5 Trademarks. This License does not grant any rights to use any
    Licensor’s or its affiliates’ names, logos, or trademarks, except
    as necessary to reproduce the notices described in this License.

    3.6 Termination. If you violate any term of this License, then your
    rights under this License (including the grant in Section 2.1) will
    terminate immediately.

4. Disclaimer of Warranty.

THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
THIS LICENSE.

5. Limitation of Liability.

EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
THE POSSIBILITY OF SUCH DAMAGES.

=======================================================================
*/

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

torch::Tensor fused_bias_leakyrelu_op_impl(const torch::Tensor& input,
                                           const torch::Tensor& bias,
                                           const torch::Tensor& refer, int act,
                                           int grad, float alpha, float scale) {
  return DISPATCH_DEVICE_IMPL(fused_bias_leakyrelu_op_impl, input, bias, refer,
                              act, grad, alpha, scale);
}

torch::Tensor fused_bias_leakyrelu(const torch::Tensor& input,
                                   const torch::Tensor& bias,
                                   const torch::Tensor& refer, int act,
                                   int grad, float alpha, float scale) {
  return fused_bias_leakyrelu_op_impl(input, bias, refer, act, grad, alpha,
                                      scale);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/gather_points.cpp
================================================
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void gather_points_forward_impl(int b, int c, int n, int npoints,
                                const Tensor points, const Tensor idx,
                                Tensor out) {
  DISPATCH_DEVICE_IMPL(gather_points_forward_impl, b, c, n, npoints, points,
                       idx, out);
}

void gather_points_backward_impl(int b, int c, int n, int npoints,
                                 const Tensor grad_out, const Tensor idx,
                                 Tensor grad_points) {
  DISPATCH_DEVICE_IMPL(gather_points_backward_impl, b, c, n, npoints, grad_out,
                       idx, grad_points);
}

void gather_points_forward(Tensor points_tensor, Tensor idx_tensor,
                           Tensor out_tensor, int b, int c, int n,
                           int npoints) {
  gather_points_forward_impl(b, c, n, npoints, points_tensor, idx_tensor,
                             out_tensor);
}

void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                            Tensor grad_points_tensor, int b, int c, int n,
                            int npoints) {
  gather_points_backward_impl(b, c, n, npoints, grad_out_tensor, idx_tensor,
                              grad_points_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/group_points.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void group_points_forward_impl(int b, int c, int n, int npoints, int nsample,
                               const Tensor points, const Tensor idx,
                               Tensor out) {
  DISPATCH_DEVICE_IMPL(group_points_forward_impl, b, c, n, npoints, nsample,
                       points, idx, out);
}

void group_points_backward_impl(int b, int c, int n, int npoints, int nsample,
                                const Tensor grad_out, const Tensor idx,
                                Tensor grad_points) {
  DISPATCH_DEVICE_IMPL(group_points_backward_impl, b, c, n, npoints, nsample,
                       grad_out, idx, grad_points);
}

void group_points_forward(Tensor points_tensor, Tensor idx_tensor,
                          Tensor out_tensor, int b, int c, int n, int npoints,
                          int nsample) {
  DISPATCH_DEVICE_IMPL(group_points_forward_impl, b, c, n, npoints, nsample,
                       points_tensor, idx_tensor, out_tensor);
}

void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                           Tensor grad_points_tensor, int b, int c, int n,
                           int npoints, int nsample) {
  group_points_backward_impl(b, c, n, npoints, nsample, grad_out_tensor,
                             idx_tensor, grad_points_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/info.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
#include "pytorch_cpp_helper.hpp"

#ifdef MMCV_WITH_CUDA
#ifndef HIP_DIFF
#include <cuda_runtime_api.h>
int get_cudart_version() { return CUDART_VERSION; }
#endif
#endif

std::string get_compiling_cuda_version() {
#ifdef MMCV_WITH_CUDA
#ifndef HIP_DIFF
  std::ostringstream oss;
  // copied from
  // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
  auto printCudaStyleVersion = [&](int v) {
    oss << (v / 1000) << "." << (v / 10 % 100);
    if (v % 10 != 0) {
      oss << "." << (v % 10);
    }
  };
  printCudaStyleVersion(get_cudart_version());
  return oss.str();
#else
  return std::string("rocm not available");
#endif
#else
  return std::string("not available");
#endif
}

// similar to
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
std::string get_compiler_version() {
  std::ostringstream ss;
#if defined(__GNUC__)
#ifndef __clang__
  { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
#endif
#endif

#if defined(__clang_major__)
  {
    ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
       << __clang_patchlevel__;
  }
#endif

#if defined(_MSC_VER)
  { ss << "MSVC " << _MSC_FULL_VER; }
#endif
  return ss.str();
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/iou3d.cpp
================================================
// Modified from
// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp

/*
3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
Written by Shaoshuai Shi
All Rights Reserved 2019-2020.
*/

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;

void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a,
                                          const int num_b, const Tensor boxes_b,
                                          Tensor ans_overlap) {
  DISPATCH_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, num_a, boxes_a,
                       num_b, boxes_b, ans_overlap);
}

void iou3d_boxes_iou_bev_forward_impl(const int num_a, const Tensor boxes_a,
                                      const int num_b, const Tensor boxes_b,
                                      Tensor ans_iou) {
  DISPATCH_DEVICE_IMPL(iou3d_boxes_iou_bev_forward_impl, num_a, boxes_a, num_b,
                       boxes_b, ans_iou);
}

void iou3d_nms_forward_impl(const Tensor boxes, unsigned long long *mask,
                            int boxes_num, float nms_overlap_thresh) {
  DISPATCH_DEVICE_IMPL(iou3d_nms_forward_impl, boxes, mask, boxes_num,
                       nms_overlap_thresh);
}

void iou3d_nms_normal_forward_impl(const Tensor boxes, unsigned long long *mask,
                                   int boxes_num, float nms_overlap_thresh) {
  DISPATCH_DEVICE_IMPL(iou3d_nms_normal_forward_impl, boxes, mask, boxes_num,
                       nms_overlap_thresh);
}

void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
                                     Tensor ans_overlap) {
  // params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
  // params boxes_b: (M, 5)
  // params ans_overlap: (N, M)

  int num_a = boxes_a.size(0);
  int num_b = boxes_b.size(0);

  iou3d_boxes_overlap_bev_forward_impl(num_a, boxes_a, num_b, boxes_b,
                                       ans_overlap);
}

void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
                                 Tensor ans_iou) {
  // params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
  // params boxes_b: (M, 5)
  // params ans_overlap: (N, M)
  int num_a = boxes_a.size(0);
  int num_b = boxes_b.size(0);

  iou3d_boxes_iou_bev_forward_impl(num_a, boxes_a, num_b, boxes_b, ans_iou);
}

void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num,
                       float nms_overlap_thresh) {
  // params boxes: (N, 5) [x1, y1, x2, y2, ry]
  // params keep: (N)
  CHECK_CONTIGUOUS(boxes);
  CHECK_CONTIGUOUS(keep);

  int boxes_num = boxes.size(0);
  int64_t *keep_data = keep.data_ptr<int64_t>();
  int64_t *keep_num_data = keep_num.data_ptr<int64_t>();

  const int col_blocks =
      (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;

  Tensor mask =
      at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong));
  unsigned long long *mask_data =
      (unsigned long long *)mask.data_ptr<int64_t>();
  iou3d_nms_forward_impl(boxes, mask_data, boxes_num, nms_overlap_thresh);

  at::Tensor mask_cpu = mask.to(at::kCPU);
  unsigned long long *mask_host =
      (unsigned long long *)mask_cpu.data_ptr<int64_t>();

  std::vector<unsigned long long> remv_cpu(col_blocks);
  memset(&remv_cpu[0], 0, sizeof(unsigned long long) * col_blocks);

  int num_to_keep = 0;

  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / THREADS_PER_BLOCK_NMS;
    int inblock = i % THREADS_PER_BLOCK_NMS;

    if (!(remv_cpu[nblock] & (1ULL << inblock))) {
      keep_data[num_to_keep++] = i;
      unsigned long long *p = &mask_host[0] + i * col_blocks;
      for (int j = nblock; j < col_blocks; j++) {
        remv_cpu[j] |= p[j];
      }
    }
    *keep_num_data = num_to_keep;
  }
}

void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
                              float nms_overlap_thresh) {
  // params boxes: (N, 5) [x1, y1, x2, y2, ry]
  // params keep: (N)

  CHECK_CONTIGUOUS(boxes);
  CHECK_CONTIGUOUS(keep);

  int boxes_num = boxes.size(0);
  int64_t *keep_data = keep.data_ptr<int64_t>();
  int64_t *keep_num_data = keep_num.data_ptr<int64_t>();

  const int col_blocks =
      (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;

  Tensor mask =
      at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong));
  unsigned long long *mask_data =
      (unsigned long long *)mask.data_ptr<int64_t>();
  iou3d_nms_normal_forward_impl(boxes, mask_data, boxes_num,
                                nms_overlap_thresh);

  at::Tensor mask_cpu = mask.to(at::kCPU);
  unsigned long long *mask_host =
      (unsigned long long *)mask_cpu.data_ptr<int64_t>();

  std::vector<unsigned long long> remv_cpu(col_blocks);
  memset(&remv_cpu[0], 0, sizeof(unsigned long long) * col_blocks);
  int num_to_keep = 0;

  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / THREADS_PER_BLOCK_NMS;
    int inblock = i % THREADS_PER_BLOCK_NMS;

    if (!(remv_cpu[nblock] & (1ULL << inblock))) {
      keep_data[num_to_keep++] = i;
      unsigned long long *p = &mask_host[0] + i * col_blocks;
      for (int j = nblock; j < col_blocks; j++) {
        remv_cpu[j] |= p[j];
      }
    }
  }

  *keep_num_data = num_to_keep;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/knn.cpp
================================================
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void knn_forward_impl(int b, int n, int m, int nsample, const Tensor xyz,
                      const Tensor new_xyz, Tensor idx, Tensor dist2) {
  DISPATCH_DEVICE_IMPL(knn_forward_impl, b, n, m, nsample, xyz, new_xyz, idx,
                       dist2);
}

void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
                 Tensor dist2_tensor, int b, int n, int m, int nsample) {
  knn_forward_impl(b, n, m, nsample, xyz_tensor, new_xyz_tensor, idx_tensor,
                   dist2_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/masked_conv2d.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void masked_im2col_forward_impl(const Tensor im, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor col,
                                const int kernel_h, const int kernel_w,
                                const int pad_h, const int pad_w) {
  DISPATCH_DEVICE_IMPL(masked_im2col_forward_impl, im, mask_h_idx, mask_w_idx,
                       col, kernel_h, kernel_w, pad_h, pad_w);
}

void masked_col2im_forward_impl(const Tensor col, const Tensor mask_h_idx,
                                const Tensor mask_w_idx, Tensor im, int height,
                                int width, int channels) {
  DISPATCH_DEVICE_IMPL(masked_col2im_forward_impl, col, mask_h_idx, mask_w_idx,
                       im, height, width, channels);
}

void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx,
                           const Tensor mask_w_idx, Tensor col,
                           const int kernel_h, const int kernel_w,
                           const int pad_h, const int pad_w) {
  masked_im2col_forward_impl(im, mask_h_idx, mask_w_idx, col, kernel_h,
                             kernel_w, pad_h, pad_w);
}

void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx,
                           const Tensor mask_w_idx, Tensor im, int height,
                           int width, int channels) {
  masked_col2im_forward_impl(col, mask_h_idx, mask_w_idx, im, height, width,
                             channels);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/min_area_polygons.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void min_area_polygons_impl(const Tensor pointsets, Tensor polygons) {
  DISPATCH_DEVICE_IMPL(min_area_polygons_impl, pointsets, polygons);
}

void min_area_polygons(const Tensor pointsets, Tensor polygons) {
  min_area_polygons_impl(pointsets, polygons);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void modulated_deformable_im2col_impl(
    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor data_col) {
  DISPATCH_DEVICE_IMPL(modulated_deformable_im2col_impl, data_im, data_offset,
                       data_mask, batch_size, channels, height_im, width_im,
                       height_col, width_col, kernel_h, kernel_w, pad_h, pad_w,
                       stride_h, stride_w, dilation_h, dilation_w,
                       deformable_group, data_col);
}

void modulated_deformable_col2im_impl(
    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, Tensor grad_im) {
  DISPATCH_DEVICE_IMPL(modulated_deformable_col2im_impl, data_col, data_offset,
                       data_mask, batch_size, channels, height_im, width_im,
                       height_col, width_col, kernel_h, kernel_w, pad_h, pad_w,
                       stride_h, stride_w, dilation_h, dilation_w,
                       deformable_group, grad_im);
}

void modulated_deformable_col2im_coord_impl(
    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
    const Tensor data_mask, const int batch_size, const int channels,
    const int height_im, const int width_im, const int height_col,
    const int width_col, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w, const int deformable_group,
    Tensor grad_offset, Tensor grad_mask) {
  DISPATCH_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, data_col,
                       data_im, data_offset, data_mask, batch_size, channels,
                       height_im, width_im, height_col, width_col, kernel_h,
                       kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
                       dilation_w, deformable_group, grad_offset, grad_mask);
}

void modulated_deform_conv_forward(
    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
    const int dilation_h, const int dilation_w, const int group,
    const int deformable_group, const bool with_bias) {
  at::DeviceGuard guard(input.device());

  const int batch = input.size(0);
  const int channels = input.size(1);
  const int height = input.size(2);
  const int width = input.size(3);

  const int channels_out = weight.size(0);
  const int channels_kernel = weight.size(1);
  const int kernel_h_ = weight.size(2);
  const int kernel_w_ = weight.size(3);

  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).",
             kernel_h_, kernel_w, kernel_h_, kernel_w_);
  if (channels != channels_kernel * group)
    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).",
             channels, channels_kernel * group);

  const int height_out =
      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
  const int width_out =
      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;

  if (ones.ndimension() != 2 ||
      ones.size(0) * ones.size(1) < height_out * width_out) {
    // Resize plane and fill with ones...
    ones = at::ones({height_out, width_out}, input.options());
  }

  // resize output
  output = output.view({batch, channels_out, height_out, width_out}).zero_();
  // resize temporary columns
  columns =
      at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out},
                input.options());

  output = output.view({output.size(0), group, output.size(1) / group,
                        output.size(2), output.size(3)});

  for (int b = 0; b < batch; b++) {
    modulated_deformable_im2col_impl(
        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
        dilation_h, dilation_w, deformable_group, columns);

    // divide into group
    weight = weight.view({group, weight.size(0) / group, weight.size(1),
                          weight.size(2), weight.size(3)});
    columns = columns.view({group, columns.size(0) / group, columns.size(1)});

    for (int g = 0; g < group; g++) {
      output[b][g] = output[b][g]
                         .flatten(1)
                         .addmm_(weight[g].flatten(1), columns[g])
                         .view_as(output[b][g]);
    }

    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
                          weight.size(3), weight.size(4)});
    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
  }

  output = output.view({output.size(0), output.size(1) * output.size(2),
                        output.size(3), output.size(4)});

  if (with_bias) {
    output += bias.view({1, bias.size(0), 1, 1});
  }
}

void modulated_deform_conv_backward(
    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
    const bool with_bias) {
  at::DeviceGuard guard(input.device());

  const int batch = input.size(0);
  const int channels = input.size(1);
  const int height = input.size(2);
  const int width = input.size(3);

  const int channels_kernel = weight.size(1);
  const int kernel_h_ = weight.size(2);
  const int kernel_w_ = weight.size(3);
  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).",
             kernel_h_, kernel_w, kernel_h_, kernel_w_);
  if (channels != channels_kernel * group)
    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).",
             channels, channels_kernel * group);

  const int height_out =
      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
  const int width_out =
      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;

  if (ones.ndimension() != 2 ||
      ones.size(0) * ones.size(1) < height_out * width_out) {
    // Resize plane and fill with ones...
    ones = at::ones({height_out, width_out}, input.options());
  }

  grad_input = grad_input.view({batch, channels, height, width});
  columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out},
                      input.options());

  grad_output =
      grad_output.view({grad_output.size(0), group, grad_output.size(1) / group,
                        grad_output.size(2), grad_output.size(3)});

  for (int b = 0; b < batch; b++) {
    // divide int group
    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
    weight = weight.view({group, weight.size(0) / group, weight.size(1),
                          weight.size(2), weight.size(3)});

    for (int g = 0; g < group; g++) {
      columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
                        grad_output[b][g].flatten(1), 0.0f, 1.0f);
    }

    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
                          weight.size(3), weight.size(4)});

    // gradient w.r.t. input coordinate data
    modulated_deformable_col2im_coord_impl(
        columns, input[b], offset[b], mask[b], 1, channels, height, width,
        height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h,
        stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b],
        grad_mask[b]);
    // gradient w.r.t. input data
    modulated_deformable_col2im_impl(
        columns, offset[b], mask[b], 1, channels, height, width, height_out,
        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
        dilation_h, dilation_w, deformable_group, grad_input[b]);

    // gradient w.r.t. weight, dWeight should accumulate across the batch and
    // group
    modulated_deformable_im2col_impl(
        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
        dilation_h, dilation_w, deformable_group, columns);

    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
    grad_weight = grad_weight.view({group, grad_weight.size(0) / group,
                                    grad_weight.size(1), grad_weight.size(2),
                                    grad_weight.size(3)});
    if (with_bias)
      grad_bias = grad_bias.view({group, grad_bias.size(0) / group});

    for (int g = 0; g < group; g++) {
      grad_weight[g] =
          grad_weight[g]
              .flatten(1)
              .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1))
              .view_as(grad_weight[g]);
      if (with_bias) {
        grad_bias[g] =
            grad_bias[g]
                .view({-1, 1})
                .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1}))
                .view(-1);
      }
    }

    columns =
        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
    grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1),
                                    grad_weight.size(2), grad_weight.size(3),
                                    grad_weight.size(4)});
    if (with_bias)
      grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)});
  }
  grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1),
                                  grad_output.size(2), grad_output.size(3),
                                  grad_output.size(4)});
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/ms_deform_attn.cpp
================================================
/*!
**************************************************************************************************
* Deformable DETR
* Copyright (c) 2020 SenseTime. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
**************************************************************************************************
* Modified from
*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
**************************************************************************************************
*/

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

Tensor ms_deform_attn_impl_forward(const Tensor &value,
                                   const Tensor &spatial_shapes,
                                   const Tensor &level_start_index,
                                   const Tensor &sampling_loc,
                                   const Tensor &attn_weight,
                                   const int im2col_step) {
  return DISPATCH_DEVICE_IMPL(ms_deform_attn_impl_forward, value,
                              spatial_shapes, level_start_index, sampling_loc,
                              attn_weight, im2col_step);
}

void ms_deform_attn_impl_backward(
    const Tensor &value, const Tensor &spatial_shapes,
    const Tensor &level_start_index, const Tensor &sampling_loc,
    const Tensor &attn_weight, const Tensor &grad_output, Tensor &grad_value,
    Tensor &grad_sampling_loc, Tensor &grad_attn_weight,
    const int im2col_step) {
  DISPATCH_DEVICE_IMPL(ms_deform_attn_impl_backward, value, spatial_shapes,
                       level_start_index, sampling_loc, attn_weight,
                       grad_output, grad_value, grad_sampling_loc,
                       grad_attn_weight, im2col_step);
}

Tensor ms_deform_attn_forward(const Tensor &value, const Tensor &spatial_shapes,
                              const Tensor &level_start_index,
                              const Tensor &sampling_loc,
                              const Tensor &attn_weight,
                              const int im2col_step) {
  at::DeviceGuard guard(value.device());
  return ms_deform_attn_impl_forward(value, spatial_shapes, level_start_index,
                                     sampling_loc, attn_weight, im2col_step);
}

void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes,
                             const Tensor &level_start_index,
                             const Tensor &sampling_loc,
                             const Tensor &attn_weight,
                             const Tensor &grad_output, Tensor &grad_value,
                             Tensor &grad_sampling_loc,
                             Tensor &grad_attn_weight, const int im2col_step) {
  at::DeviceGuard guard(value.device());
  ms_deform_attn_impl_backward(value, spatial_shapes, level_start_index,
                               sampling_loc, attn_weight, grad_output,
                               grad_value, grad_sampling_loc, grad_attn_weight,
                               im2col_step);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/nms.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
  return DISPATCH_DEVICE_IMPL(nms_impl, boxes, scores, iou_threshold, offset);
}

Tensor softnms_impl(Tensor boxes, Tensor scores, Tensor dets,
                    float iou_threshold, float sigma, float min_score,
                    int method, int offset) {
  return DISPATCH_DEVICE_IMPL(softnms_impl, boxes, scores, dets, iou_threshold,
                              sigma, min_score, method, offset);
}

std::vector<std::vector<int> > nms_match_impl(Tensor dets,
                                              float iou_threshold) {
  return DISPATCH_DEVICE_IMPL(nms_match_impl, dets, iou_threshold);
}

Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
  return nms_impl(boxes, scores, iou_threshold, offset);
}

Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold,
               float sigma, float min_score, int method, int offset) {
  return softnms_impl(boxes, scores, dets, iou_threshold, sigma, min_score,
                      method, offset);
}

std::vector<std::vector<int> > nms_match(Tensor dets, float iou_threshold) {
  return nms_match_impl(dets, iou_threshold);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/nms_rotated.cpp
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated.h
#include "pytorch_cpp_helper.hpp"

Tensor nms_rotated_cpu(const Tensor dets, const Tensor scores,
                       const float iou_threshold);

#ifdef MMCV_WITH_CUDA
Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores,
                        const Tensor order, const Tensor dets_sorted,
                        const float iou_threshold, const int multi_label);
#endif

// Interface for Python
// inline is needed to prevent multiple function definitions when this header is
// included by different cpps
Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
                   const Tensor dets_sorted, const float iou_threshold,
                   const int multi_label) {
  assert(dets.device().is_cuda() == scores.device().is_cuda());
  if (dets.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
    return nms_rotated_cuda(dets, scores, order, dets_sorted, iou_threshold,
                            multi_label);
#else
    AT_ERROR("Not compiled with GPU support");
#endif
  }

  return nms_rotated_cpu(dets, scores, iou_threshold);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/pixel_group.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// It is modified from https://github.com/WenmuZhou/PAN.pytorch

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

std::vector<std::vector<float>> pixel_group_impl(
    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
    Tensor kernel_contour, int kernel_region_num, float dis_threshold) {
  return DISPATCH_DEVICE_IMPL(pixel_group_impl, score, mask, embedding,
                              kernel_label, kernel_contour, kernel_region_num,
                              dis_threshold);
}

std::vector<std::vector<float>> pixel_group(
    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
    Tensor kernel_contour, int kernel_region_num, float distance_threshold) {
  score = score.contiguous();
  mask = mask.contiguous();
  embedding = embedding.contiguous();
  kernel_label = kernel_label.contiguous();
  kernel_contour = kernel_contour.contiguous();

  return pixel_group_impl(score, mask, embedding, kernel_label, kernel_contour,
                          kernel_region_num, distance_threshold);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/points_in_boxes.cpp
================================================
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void points_in_boxes_part_forward_impl(int batch_size, int boxes_num,
                                       int pts_num, const Tensor boxes,
                                       const Tensor pts,
                                       Tensor box_idx_of_points) {
  DISPATCH_DEVICE_IMPL(points_in_boxes_part_forward_impl, batch_size, boxes_num,
                       pts_num, boxes, pts, box_idx_of_points);
}

void points_in_boxes_all_forward_impl(int batch_size, int boxes_num,
                                      int pts_num, const Tensor boxes,
                                      const Tensor pts,
                                      Tensor box_idx_of_points) {
  DISPATCH_DEVICE_IMPL(points_in_boxes_all_forward_impl, batch_size, boxes_num,
                       pts_num, boxes, pts, box_idx_of_points);
}

void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                  Tensor box_idx_of_points_tensor) {
  // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate, z is the bottom center, each box params pts: (B, npoints, 3)
  // [x, y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints),
  // default -1
  int batch_size = boxes_tensor.size(0);
  int boxes_num = boxes_tensor.size(1);
  int pts_num = pts_tensor.size(1);
  points_in_boxes_part_forward_impl(batch_size, boxes_num, pts_num,
                                    boxes_tensor, pts_tensor,
                                    box_idx_of_points_tensor);
}

void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                 Tensor box_idx_of_points_tensor) {
  // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
  // coordinate, z is the bottom center. params pts: (B, npoints, 3) [x, y, z]
  // in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default -1
  int batch_size = boxes_tensor.size(0);
  int boxes_num = boxes_tensor.size(1);
  int pts_num = pts_tensor.size(1);
  points_in_boxes_all_forward_impl(batch_size, boxes_num, pts_num, boxes_tensor,
                                   pts_tensor, box_idx_of_points_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/points_in_polygons.cpp
================================================
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void points_in_polygons_forward_impl(const Tensor points, const Tensor polygons,
                                     Tensor output, const int rows,
                                     const int cols) {
  DISPATCH_DEVICE_IMPL(points_in_polygons_forward_impl, points, polygons,
                       output, rows, cols);
}

void points_in_polygons_forward(Tensor points, Tensor polygons, Tensor output) {
  int rows = points.size(0);
  int cols = polygons.size(0);
  points_in_polygons_forward_impl(points, polygons, output, rows, cols);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/psamask.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/hszhao/semseg/blob/master/lib/psa/src
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output,
                          const int num_, const int h_feature,
                          const int w_feature, const int h_mask,
                          const int w_mask, const int half_h_mask,
                          const int half_w_mask) {
  DISPATCH_DEVICE_IMPL(psamask_forward_impl, psa_type, input, output, num_,
                       h_feature, w_feature, h_mask, w_mask, half_h_mask,
                       half_w_mask);
}

void psamask_backward_impl(const int psa_type, const Tensor grad_output,
                           Tensor grad_input, const int num_,
                           const int h_feature, const int w_feature,
                           const int h_mask, const int w_mask,
                           const int half_h_mask, const int half_w_mask) {
  DISPATCH_DEVICE_IMPL(psamask_backward_impl, psa_type, grad_output, grad_input,
                       num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
                       half_w_mask);
}

void psamask_forward(const Tensor input, Tensor output, const int psa_type,
                     const int num_, const int h_feature, const int w_feature,
                     const int h_mask, const int w_mask, const int half_h_mask,
                     const int half_w_mask) {
  psamask_forward_impl(psa_type, input, output, num_, h_feature, w_feature,
                       h_mask, w_mask, half_h_mask, half_w_mask);
}

void psamask_backward(Tensor grad_output, const Tensor grad_input,
                      const int psa_type, const int num_, const int h_feature,
                      const int w_feature, const int h_mask, const int w_mask,
                      const int half_h_mask, const int half_w_mask) {
  psamask_backward_impl(psa_type, grad_output, grad_input, num_, h_feature,
                        w_feature, h_mask, w_mask, half_h_mask, half_w_mask);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/pybind.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"

std::string get_compiler_version();
std::string get_compiling_cuda_version();

void assign_score_withk_forward(const Tensor &points, const Tensor &centers,
                                const Tensor &scores, const Tensor &knn_idx,
                                Tensor &output, int B, int N0, int N1, int M,
                                int K, int O, int aggregate);

void assign_score_withk_backward(const Tensor &grad_out, const Tensor &points,
                                 const Tensor &centers, const Tensor &scores,
                                 const Tensor &knn_idx, Tensor &grad_points,
                                 Tensor &grad_centers, Tensor &grad_scores,
                                 int B, int N0, int N1, int M, int K, int O,
                                 int aggregate);

void carafe_naive_forward(Tensor features, Tensor masks, Tensor output,
                          int kernel_size, int group_size, int scale_factor);

void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks,
                           Tensor bottom_grad, Tensor mask_grad,
                           int kernel_size, int group_size, int scale_factor);

void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures,
                    Tensor routput, Tensor rmasks, Tensor output,
                    int kernel_size, int group_size, int scale_factor);

void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
                     Tensor rtop_grad, Tensor rbottom_grad_hs,
                     Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad,
                     Tensor mask_grad, int kernel_size, int group_size,
                     int scale_factor);

void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
                         Tensor output, Tensor columns, Tensor ones, int kW,
                         int kH, int dW, int dH, int padW, int padH,
                         int dilationW, int dilationH, int group,
                         int deformable_group, int im2col_step);

void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
                                Tensor gradInput, Tensor gradOffset,
                                Tensor weight, Tensor columns, int kW, int kH,
                                int dW, int dH, int padW, int padH,
                                int dilationW, int dilationH, int group,
                                int deformable_group, int im2col_step);

void deform_conv_backward_parameters(Tensor input, Tensor offset,
                                     Tensor gradOutput, Tensor gradWeight,
                                     Tensor columns, Tensor ones, int kW,
                                     int kH, int dW, int dH, int padW, int padH,
                                     int dilationW, int dilationH, int group,
                                     int deformable_group, float scale,
                                     int im2col_step);

void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset,
                             Tensor output, int pooled_height, int pooled_width,
                             float spatial_scale, int sampling_ratio,
                             float gamma);

void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
                              Tensor offset, Tensor grad_input,
                              Tensor grad_offset, int pooled_height,
                              int pooled_width, float spatial_scale,
                              int sampling_ratio, float gamma);

void group_points_forward(Tensor points_tensor, Tensor idx_tensor,
                          Tensor out_tensor, int b, int c, int n, int npoints,
                          int nsample);

void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                           Tensor grad_points_tensor, int b, int c, int n,
                           int npoints, int nsample);

void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
                             Tensor pooled_features, Tensor pooled_empty_flag);

void gather_points_forward(Tensor points_tensor, Tensor idx_tensor,
                           Tensor out_tensor, int b, int c, int n, int npoints);

void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                            Tensor grad_points_tensor, int b, int c, int n,
                            int npoints);

void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
                                Tensor output, float gamma, float alpha);

void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
                                 Tensor grad_input, float gamma, float alpha);

void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
                                Tensor output, float gamma, float alpha);

void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
                                 Tensor buff, Tensor grad_input, float gamma,
                                 float alpha);

void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
                               Tensor weight_tensor, Tensor out_tensor, int b,
                               int c, int m, int n);

void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                                Tensor weight_tensor, Tensor grad_points_tensor,
                                int b, int c, int n, int m);

void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
                      Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
                      int m);

void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
                   const int mode, const bool aligned, const int offset);

void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
                 Tensor dist2_tensor, int b, int n, int m, int nsample);
void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
                                     Tensor ans_overlap);

void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
                                 Tensor ans_iou);

void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num,
                       float nms_overlap_thresh);

void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
                              float nms_overlap_thresh);

void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
                                     Tensor idx_tensor, int b, int n, int m);

void furthest_point_sampling_with_dist_forward(Tensor points_tensor,
                                               Tensor temp_tensor,
                                               Tensor idx_tensor, int b, int n,
                                               int m);

void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx,
                           const Tensor mask_w_idx, Tensor col,
                           const int kernel_h, const int kernel_w,
                           const int pad_h, const int pad_w);

void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx,
                           const Tensor mask_w_idx, Tensor im, int height,
                           int width, int channels);

void modulated_deform_conv_forward(
    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
    const int dilation_h, const int dilation_w, const int group,
    const int deformable_group, const bool with_bias);

void modulated_deform_conv_backward(
    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
    const bool with_bias);

Tensor ms_deform_attn_forward(const Tensor &value, const Tensor &spatial_shapes,
                              const Tensor &level_start_index,
                              const Tensor &sampling_loc,
                              const Tensor &attn_weight, const int im2col_step);

void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes,
                             const Tensor &level_start_index,
                             const Tensor &sampling_loc,
                             const Tensor &attn_weight,
                             const Tensor &grad_output, Tensor &grad_value,
                             Tensor &grad_sampling_loc,
                             Tensor &grad_attn_weight, const int im2col_step);

Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset);

Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold,
               float sigma, float min_score, int method, int offset);

std::vector<std::vector<int>> nms_match(Tensor dets, float iou_threshold);

std::vector<std::vector<float>> pixel_group(
    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
    Tensor kernel_contour, int kernel_region_num, float distance_threshold);

std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
                                             Tensor internal_kernel_label,
                                             int min_kernel_area,
                                             int kernel_num);

void roi_align_forward(Tensor input, Tensor rois, Tensor output,
                       Tensor argmax_y, Tensor argmax_x, int aligned_height,
                       int aligned_width, float spatial_scale,
                       int sampling_ratio, int pool_mode, bool aligned);

void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y,
                        Tensor argmax_x, Tensor grad_input, int aligned_height,
                        int aligned_width, float spatial_scale,
                        int sampling_ratio, int pool_mode, bool aligned);

void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax,
                      int pooled_height, int pooled_width, float spatial_scale);

void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax,
                       Tensor grad_input, int pooled_height, int pooled_width,
                       float spatial_scale);

void sync_bn_forward_mean(const Tensor input, Tensor mean);

void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var);

void sync_bn_forward_output(const Tensor input, const Tensor mean,
                            const Tensor var, const Tensor weight,
                            const Tensor bias, Tensor running_mean,
                            Tensor running_var, Tensor norm, Tensor std,
                            Tensor output, float eps, float momentum,
                            int group_size);

void sync_bn_backward_param(const Tensor grad_output, const Tensor norm,
                            Tensor grad_weight, Tensor grad_bias);

void sync_bn_backward_data(const Tensor grad_output, const Tensor weight,
                           const Tensor grad_weight, const Tensor grad_bias,
                           const Tensor norm, const Tensor std,
                           Tensor grad_input);

void psamask_forward(const Tensor input, Tensor output, const int psa_type,
                     const int num_, const int h_feature, const int w_feature,
                     const int h_mask, const int w_mask, const int half_h_mask,
                     const int half_w_mask);

void psamask_backward(Tensor grad_output, const Tensor grad_input,
                      const int psa_type, const int num_, const int h_feature,
                      const int w_feature, const int h_mask, const int w_mask,
                      const int half_h_mask, const int half_w_mask);

void tin_shift_forward(Tensor input, Tensor shift, Tensor output);

void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input);

void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor,
                        Tensor idx_tensor, int b, int n, int m,
                        float min_radius, float max_radius, int nsample);

Tensor bottom_pool_forward(Tensor input);

Tensor bottom_pool_backward(Tensor input, Tensor grad_output);

Tensor left_pool_forward(Tensor input);

Tensor left_pool_backward(Tensor input, Tensor grad_output);

Tensor right_pool_forward(Tensor input);

Tensor right_pool_backward(Tensor input, Tensor grad_output);

Tensor top_pool_forward(Tensor input);

Tensor top_pool_backward(Tensor input, Tensor grad_output);

void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                     const int mode_flag, const bool aligned);

Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
                   const Tensor dets_sorted, const float iou_threshold,
                   const int multi_label);

Tensor upfirdn2d(const Tensor &input, const Tensor &kernel, int up_x, int up_y,
                 int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0,
                 int pad_y1);

Tensor fused_bias_leakyrelu(const Tensor &input, const Tensor &bias,
                            const Tensor &refer, int act, int grad, float alpha,
                            float scale);

void roi_align_rotated_forward(Tensor input, Tensor rois, Tensor output,
                               int pooled_height, int pooled_width,
                               float spatial_scale, int sample_num,
                               bool aligned, bool clockwise);

void roi_align_rotated_backward(Tensor grad_output, Tensor rois,
                                Tensor grad_input, int pooled_height,
                                int pooled_width, float spatial_scale,
                                int sample_num, bool aligned, bool clockwise);

std::vector<torch::Tensor> dynamic_point_to_voxel_forward(
    const torch::Tensor &feats, const torch::Tensor &coors,
    const std::string &reduce_type);

void dynamic_point_to_voxel_backward(torch::Tensor &grad_feats,
                                     const torch::Tensor &grad_reduced_feats,
                                     const torch::Tensor &feats,
                                     const torch::Tensor &reduced_feats,
                                     const torch::Tensor &coors_idx,
                                     const torch::Tensor &reduce_count,
                                     const std::string &reduce_type);

void hard_voxelize_forward(const at::Tensor &points,
                           const at::Tensor &voxel_size,
                           const at::Tensor &coors_range, at::Tensor &voxels,
                           at::Tensor &coors, at::Tensor &num_points_per_voxel,
                           at::Tensor &voxel_num, const int max_points,
                           const int max_voxels, const int NDim);

void dynamic_voxelize_forward(const at::Tensor &points,
                              const at::Tensor &voxel_size,
                              const at::Tensor &coors_range, at::Tensor &coors,
                              const int NDim);

void border_align_forward(const Tensor &input, const Tensor &boxes,
                          Tensor output, Tensor argmax_idx,
                          const int pool_size);

void border_align_backward(const Tensor &grad_output, const Tensor &boxes,
                           const Tensor &argmax_idx, Tensor grad_input,
                           const int pool_size);

void points_in_boxes_cpu_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                 Tensor pts_indices_tensor);

void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                  Tensor box_idx_of_points_tensor);

void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor,
                                 Tensor box_idx_of_points_tensor);

void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature,
                             Tensor argmax, Tensor pts_idx_of_voxels,
                             Tensor pooled_features, int pool_method);

void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax,
                              Tensor grad_out, Tensor grad_in, int pool_method);

void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH,
                         int kW, int patchH, int patchW, int padH, int padW,
                         int dilationH, int dilationW, int dilation_patchH,
                         int dilation_patchW, int dH, int dW);

void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
                          Tensor grad_input1, Tensor grad_input2, int kH,
                          int kW, int patchH, int patchW, int padH, int padW,
                          int dilationH, int dilationW, int dilation_patchH,
                          int dilation_patchW, int dH, int dW);

void rotated_feature_align_forward(const Tensor features,
                                   const Tensor best_bboxes, Tensor output,
                                   const float spatial_scale, const int points);

void rotated_feature_align_backward(const Tensor top_grad,
                                    const Tensor best_bboxes,
                                    Tensor bottom_grad,
                                    const float spatial_scale,
                                    const int points);

void riroi_align_rotated_forward(Tensor features, Tensor rois, Tensor output,
                                 int pooled_height, int pooled_width,
                                 float spatial_scale, int num_samples,
                                 int num_orientations, bool clockwise);

void riroi_align_rotated_backward(Tensor top_grad, Tensor rois,
                                  Tensor bottom_grad, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int num_samples, int num_orientations,
                                  bool clockwise);

void points_in_polygons_forward(Tensor points, Tensor polygons, Tensor output);

void min_area_polygons(const Tensor pointsets, Tensor polygons);

void active_rotated_filter_forward(const Tensor input, const Tensor indices,
                                   Tensor output);

void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices,
                                    Tensor grad_in);

void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious);

void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output);

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("upfirdn2d", &upfirdn2d, "upfirdn2d (CUDA)", py::arg("input"),
        py::arg("kernel"), py::arg("up_x"), py::arg("up_y"), py::arg("down_x"),
        py::arg("down_y"), py::arg("pad_x0"), py::arg("pad_x1"),
        py::arg("pad_y0"), py::arg("pad_y1"));
  m.def("fused_bias_leakyrelu", &fused_bias_leakyrelu,
        "fused_bias_leakyrelu (CUDA)", py::arg("input"), py::arg("bias"),
        py::arg("empty"), py::arg("act"), py::arg("grad"), py::arg("alpha"),
        py::arg("scale"));
  m.def("gather_points_forward", &gather_points_forward,
        "gather_points_forward", py::arg("points_tensor"),
        py::arg("idx_tensor"), py::arg("out_tensor"), py::arg("b"),
        py::arg("c"), py::arg("n"), py::arg("npoints"));
  m.def("gather_points_backward", &gather_points_backward,
        "gather_points_backward", py::arg("grad_out_tensor"),
        py::arg("idx_tensor"), py::arg("grad_points_tensor"), py::arg("b"),
        py::arg("c"), py::arg("n"), py::arg("npoints"));
  m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
  m.def("get_compiling_cuda_version", &get_compiling_cuda_version,
        "get_compiling_cuda_version");
  m.def("assign_score_withk_forward", &assign_score_withk_forward,
        "assign_score_withk_forward", py::arg("points"), py::arg("centers"),
        py::arg("scores"), py::arg("knn_idx"), py::arg("output"), py::arg("B"),
        py::arg("N0"), py::arg("N1"), py::arg("M"), py::arg("K"), py::arg("O"),
        py::arg("aggregate"));
  m.def("assign_score_withk_backward", &assign_score_withk_backward,
        "assign_score_withk_backward", py::arg("grad_out"), py::arg("points"),
        py::arg("centers"), py::arg("scores"), py::arg("knn_idx"),
        py::arg("grad_points"), py::arg("grad_centers"), py::arg("grad_scores"),
        py::arg("B"), py::arg("N0"), py::arg("N1"), py::arg("M"), py::arg("K"),
        py::arg("O"), py::arg("aggregate"));
  m.def("knn_forward", &knn_forward, "knn_forward", py::arg("xyz_tensor"),
        py::arg("new_xyz_tensor"), py::arg("idx_tensor"),
        py::arg("dist2_tensor"), py::arg("b"), py::arg("n"), py::arg("m"),
        py::arg("nsample"));
  m.def("carafe_naive_forward", &carafe_naive_forward, "carafe_naive_forward",
        py::arg("features"), py::arg("masks"), py::arg("output"),
        py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor"));
  m.def("carafe_naive_backward", &carafe_naive_backward,
        "carafe_naive_backward", py::arg("top_grad"), py::arg("features"),
        py::arg("masks"), py::arg("bottom_grad"), py::arg("mask_grad"),
        py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor"));
  m.def("carafe_forward", &carafe_forward, "carafe_forward",
        py::arg("features"), py::arg("masks"), py::arg("rfeatures"),
        py::arg("routput"), py::arg("rmasks"), py::arg("output"),
        py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor"));
  m.def("carafe_backward", &carafe_backward, "carafe_backward",
        py::arg("top_grad"), py::arg("rfeatures"), py::arg("masks"),
        py::arg("rtop_grad"), py::arg("rbottom_grad_hs"),
        py::arg("rbottom_grad"), py::arg("rmask_grad"), py::arg("bottom_grad"),
        py::arg("mask_grad"), py::arg("kernel_size"), py::arg("group_size"),
        py::arg("scale_factor"));
  m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward",
        py::arg("input"), py::arg("weight"), py::arg("offset"),
        py::arg("output"), py::arg("columns"), py::arg("ones"), py::arg("kW"),
        py::arg("kH"), py::arg("dW"), py::arg("dH"), py::arg("padH"),
        py::arg("padW"), py::arg("dilationW"), py::arg("dilationH"),
        py::arg("group"), py::arg("deformable_group"), py::arg("im2col_step"));
  m.def("deform_conv_backward_input", &deform_conv_backward_input,
        "deform_conv_backward_input", py::arg("input"), py::arg("offset"),
        py::arg("gradOutput"), py::arg("gradInput"), py::arg("gradOffset"),
        py::arg("weight"), py::arg("columns"), py::arg("kW"), py::arg("kH"),
        py::arg("dW"), py::arg("dH"), py::arg("padH"), py::arg("padW"),
        py::arg("dilationW"), py::arg("dilationH"), py::arg("group"),
        py::arg("deformable_group"), py::arg("im2col_step"));
  m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters,
        "deform_conv_backward_parameters", py::arg("input"), py::arg("offset"),
        py::arg("gradOutput"), py::arg("gradWeight"), py::arg("columns"),
        py::arg("ones"), py::arg("kW"), py::arg("kH"), py::arg("dW"),
        py::arg("dH"), py::arg("padH"), py::arg("padW"), py::arg("dilationW"),
        py::arg("dilationH"), py::arg("group"), py::arg("deformable_group"),
        py::arg("scale"), py::arg("im2col_step"));
  m.def("deform_roi_pool_forward", &deform_roi_pool_forward,
        "deform roi pool forward", py::arg("input"), py::arg("rois"),
        py::arg("offset"), py::arg("output"), py::arg("pooled_height"),
        py::arg("pooled_width"), py::arg("spatial_scale"),
        py::arg("sampling_ratio"), py::arg("gamma"));
  m.def("deform_roi_pool_backward", &deform_roi_pool_backward,
        "deform roi pool backward", py::arg("grad_output"), py::arg("input"),
        py::arg("rois"), py::arg("offset"), py::arg("grad_input"),
        py::arg("grad_offset"), py::arg("pooled_height"),
        py::arg("pooled_width"), py::arg("spatial_scale"),
        py::arg("sampling_ratio"), py::arg("gamma"));
  m.def("roipoint_pool3d_forward", &roipoint_pool3d_forward,
        "roipoint_pool3d_forward", py::arg("xyz"), py::arg("boxes3d"),
        py::arg("pts_feature"), py::arg("pooled_features"),
        py::arg("pooled_empty_flag"));
  m.def("sigmoid_focal_loss_forward", &sigmoid_focal_loss_forward,
        "sigmoid_focal_loss_forward ", py::arg("input"), py::arg("target"),
        py::arg("weight"), py::arg("output"), py::arg("gamma"),
        py::arg("alpha"));
  m.def("sigmoid_focal_loss_backward", &sigmoid_focal_loss_backward,
        "sigmoid_focal_loss_backward", py::arg("input"), py::arg("target"),
        py::arg("weight"), py::arg("grad_input"), py::arg("gamma"),
        py::arg("alpha"));
  m.def("softmax_focal_loss_forward", &softmax_focal_loss_forward,
        "softmax_focal_loss_forward", py::arg("input"), py::arg("target"),
        py::arg("weight"), py::arg("output"), py::arg("gamma"),
        py::arg("alpha"));
  m.def("softmax_focal_loss_backward", &softmax_focal_loss_backward,
        "softmax_focal_loss_backward", py::arg("input"), py::arg("target"),
        py::arg("weight"), py::arg("buff"), py::arg("grad_input"),
        py::arg("gamma"), py::arg("alpha"));
  m.def("three_interpolate_forward", &three_interpolate_forward,
        "three_interpolate_forward", py::arg("points_tensor"),
        py::arg("idx_tensor"), py::arg("weight_tensor"), py::arg("out_tensor"),
        py::arg("b"), py::arg("c"), py::arg("m"), py::arg("n"));
  m.def("three_interpolate_backward", &three_interpolate_backward,
        "three_interpolate_backward", py::arg("grad_out_tensor"),
        py::arg("idx_tensor"), py::arg("weight_tensor"),
        py::arg("grad_points_tensor"), py::arg("b"), py::arg("c"), py::arg("n"),
        py::arg("m"));
  m.def("three_nn_forward", &three_nn_forward, "three_nn_forward",
        py::arg("unknown_tensor"), py::arg("known_tensor"),
        py::arg("dist2_tensor"), py::arg("idx_tensor"), py::arg("b"),
        py::arg("n"), py::arg("m"));
  m.def("bbox_overlaps", &bbox_overlaps, "bbox_overlaps", py::arg("bboxes1"),
        py::arg("bboxes2"), py::arg("ious"), py::arg("mode"),
        py::arg("aligned"), py::arg("offset"));
  m.def("group_points_forward", &group_points_forward, "group_points_forward",
        py::arg("points_tensor"), py::arg("idx_tensor"), py::arg("out_tensor"),
        py::arg("b"), py::arg("c"), py::arg("n"), py::arg("npoints"),
        py::arg("nsample"));
  m.def("group_points_backward", &group_points_backward,
        "group_points_backward", py::arg("grad_out_tensor"),
        py::arg("idx_tensor"), py::arg("grad_points_tensor"), py::arg("b"),
        py::arg("c"), py::arg("n"), py::arg("npoints"), py::arg("nsample"));
  m.def("knn_forward", &knn_forward, "knn_forward", py::arg("b"), py::arg("n"),
        py::arg("m"), py::arg("nsample"), py::arg("xyz_tensor"),
        py::arg("new_xyz_tensor"), py::arg("idx_tensor"),
        py::arg("dist2_tensor"));
  m.def("iou3d_boxes_overlap_bev_forward", &iou3d_boxes_overlap_bev_forward,
        "iou3d_boxes_overlap_bev_forward", py::arg("boxes_a"),
        py::arg("boxes_b"), py::arg("ans_overlap"));
  m.def("iou3d_boxes_iou_bev_forward", &iou3d_boxes_iou_bev_forward,
        "iou3d_boxes_iou_bev_forward", py::arg("boxes_a"), py::arg("boxes_b"),
        py::arg("ans_iou"));
  m.def("iou3d_nms_forward", &iou3d_nms_forward, "iou3d_nms_forward",
        py::arg("boxes"), py::arg("keep"), py::arg("num_out"),
        py::arg("nms_overlap_thresh"));
  m.def("iou3d_nms_normal_forward", &iou3d_nms_normal_forward,
        "iou3d_nms_normal_forward", py::arg("boxes"), py::arg("keep"),
        py::arg("num_out"), py::arg("nms_overlap_thresh"));
  m.def("furthest_point_sampling_forward", &furthest_point_sampling_forward,
        "furthest_point_sampling_forward", py::arg("points_tensor"),
        py::arg("temp_tensor"), py::arg("idx_tensor"), py::arg("b"),
        py::arg("n"), py::arg("m"));
  m.def("furthest_point_sampling_with_dist_forward",
        &furthest_point_sampling_with_dist_forward,
        "furthest_point_sampling_with_dist_forward", py::arg("points_tensor"),
        py::arg("temp_tensor"), py::arg("idx_tensor"), py::arg("b"),
        py::arg("n"), py::arg("m"));
  m.def("masked_im2col_forward", &masked_im2col_forward,
        "masked_im2col_forward", py::arg("im"), py::arg("mask_h_idx"),
        py::arg("mask_w_idx"), py::arg("col"), py::arg("kernel_h"),
        py::arg("kernel_w"), py::arg("pad_h"), py::arg("pad_w"));
  m.def("masked_col2im_forward", &masked_col2im_forward,
        "masked_col2im_forward", py::arg("col"), py::arg("mask_h_idx"),
        py::arg("mask_w_idx"), py::arg("im"), py::arg("height"),
        py::arg("width"), py::arg("channels"));
  m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward,
        "modulated deform conv forward", py::arg("input"), py::arg("weight"),
        py::arg("bias"), py::arg("ones"), py::arg("offset"), py::arg("mask"),
        py::arg("output"), py::arg("columns"), py::arg("kernel_h"),
        py::arg("kernel_w"), py::arg("stride_h"), py::arg("stride_w"),
        py::arg("pad_h"), py::arg("pad_w"), py::arg("dilation_h"),
        py::arg("dilation_w"), py::arg("group"), py::arg("deformable_group"),
        py::arg("with_bias"));
  m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward,
        "modulated deform conv backward", py::arg("input"), py::arg("weight"),
        py::arg("bias"), py::arg("ones"), py::arg("offset"), py::arg("mask"),
        py::arg("columns"), py::arg("grad_input"), py::arg("grad_weight"),
        py::arg("grad_bias"), py::arg("grad_offset"), py::arg("grad_mask"),
        py::arg("grad_output"), py::arg("kernel_h"), py::arg("kernel_w"),
        py::arg("stride_h"), py::arg("stride_w"), py::arg("pad_h"),
        py::arg("pad_w"), py::arg("dilation_h"), py::arg("dilation_w"),
        py::arg("group"), py::arg("deformable_group"), py::arg("with_bias"));
  m.def("nms", &nms, "nms (CPU/CUDA) ", py::arg("boxes"), py::arg("scores"),
        py::arg("iou_threshold"), py::arg("offset"));
  m.def("softnms", &softnms, "softnms (CPU) ", py::arg("boxes"),
        py::arg("scores"), py::arg("dets"), py::arg("iou_threshold"),
        py::arg("sigma"), py::arg("min_score"), py::arg("method"),
        py::arg("offset"));
  m.def("nms_match", &nms_match, "nms_match (CPU) ", py::arg("dets"),
        py::arg("iou_threshold"));
  m.def("pixel_group", &pixel_group, "pixel group (CPU) ", py::arg("score"),
        py::arg("mask"), py::arg("embedding"), py::arg("kernel_label"),
        py::arg("kernel_contour"), py::arg("kernel_region_label"),
        py::arg("distance_threshold"));
  m.def("contour_expand", &contour_expand, "contour exapnd (CPU) ",
        py::arg("kernel_mask"), py::arg("internal_kernel_label"),
        py::arg("min_kernel_area"), py::arg("kernel_num"));
  m.def("roi_align_forward", &roi_align_forward, "roi_align forward",
        py::arg("input"), py::arg("rois"), py::arg("output"),
        py::arg("argmax_y"), py::arg("argmax_x"), py::arg("aligned_height"),
        py::arg("aligned_width"), py::arg("spatial_scale"),
        py::arg("sampling_ratio"), py::arg("pool_mode"), py::arg("aligned"));
  m.def("roi_align_backward", &roi_align_backward, "roi_align backward",
        py::arg("grad_output"), py::arg("rois"), py::arg("argmax_y"),
        py::arg("argmax_x"), py::arg("grad_input"), py::arg("aligned_height"),
        py::arg("aligned_width"), py::arg("spatial_scale"),
        py::arg("sampling_ratio"), py::arg("pool_mode"), py::arg("aligned"));
  m.def("roi_pool_forward", &roi_pool_forward, "roi_pool forward",
        py::arg("input"), py::arg("rois"), py::arg("output"), py::arg("argmax"),
        py::arg("pooled_height"), py::arg("pooled_width"),
        py::arg("spatial_scale"));
  m.def("roi_pool_backward", &roi_pool_backward, "roi_pool backward",
        py::arg("grad_output"), py::arg("rois"), py::arg("argmax"),
        py::arg("grad_input"), py::arg("pooled_height"),
        py::arg("pooled_width"), py::arg("spatial_scale"));
  m.def("sync_bn_forward_mean", &sync_bn_forward_mean, "sync_bn forward_mean",
        py::arg("input"), py::arg("mean"));
  m.def("sync_bn_forward_var", &sync_bn_forward_var, "sync_bn forward_var",
        py::arg("input"), py::arg("mean"), py::arg("var"));
  m.def("sync_bn_forward_output", &sync_bn_forward_output,
        "sync_bn forward_output", py::arg("input"), py::arg("mean"),
        py::arg("var"), py::arg("weight"), py::arg("bias"),
        py::arg("running_mean"), py::arg("running_var"), py::arg("norm"),
        py::arg("std"), py::arg("output"), py::arg("eps"), py::arg("momentum"),
        py::arg("group_size"));
  m.def("sync_bn_backward_param", &sync_bn_backward_param,
        "sync_bn backward_param", py::arg("grad_output"), py::arg("norm"),
        py::arg("grad_weight"), py::arg("grad_bias"));
  m.def("sync_bn_backward_data", &sync_bn_backward_data,
        "sync_bn backward_data", py::arg("grad_output"), py::arg("weight"),
        py::arg("grad_weight"), py::arg("grad_bias"), py::arg("norm"),
        py::arg("std"), py::arg("grad_input"));
  m.def("psamask_forward", &psamask_forward, "PSAMASK forward (CPU/CUDA)",
        py::arg("input"), py::arg("output"), py::arg("psa_type"),
        py::arg("num_"), py::arg("h_feature"), py::arg("w_feature"),
        py::arg("h_mask"), py::arg("w_mask"), py::arg("half_h_mask"),
        py::arg("half_w_mask"));
  m.def("psamask_backward", &psamask_backward, "PSAMASK backward (CPU/CUDA)",
        py::arg("grad_output"), py::arg("grad_input"), py::arg("psa_type"),
        py::arg("num_"), py::arg("h_feature"), py::arg("w_feature"),
        py::arg("h_mask"), py::arg("w_mask"), py::arg("half_h_mask"),
        py::arg("half_w_mask"));
  m.def("tin_shift_forward", &tin_shift_forward, "tin_shift forward",
        py::arg("input"), py::arg("shift"), py::arg("output"));
  m.def("tin_shift_backward", &tin_shift_backward, "tin_shift backward",
        py::arg("grad_output"), py::arg("shift"), py::arg("grad_input"));
  m.def("bottom_pool_forward", &bottom_pool_forward, "Bottom Pool Forward",
        py::arg("input"), py::call_guard<py::gil_scoped_release>());
  m.def("bottom_pool_backward", &bottom_pool_backward, "Bottom Pool Backward",
        py::arg("input"), py::arg("grad_output"),
        py::call_guard<py::gil_scoped_release>());
  m.def("left_pool_forward", &left_pool_forward, "Left Pool Forward",
        py::arg("input"), py::call_guard<py::gil_scoped_release>());
  m.def("left_pool_backward", &left_pool_backward, "Left Pool Backward",
        py::arg("input"), py::arg("grad_output"),
        py::call_guard<py::gil_scoped_release>());
  m.def("right_pool_forward", &right_pool_forward, "Right Pool Forward",
        py::arg("input"), py::call_guard<py::gil_scoped_release>());
  m.def("right_pool_backward", &right_pool_backward, "Right Pool Backward",
        py::arg("input"), py::arg("grad_output"),
        py::call_guard<py::gil_scoped_release>());
  m.def("top_pool_forward", &top_pool_forward, "Top Pool Forward",
        py::arg("input"), py::call_guard<py::gil_scoped_release>());
  m.def("top_pool_backward", &top_pool_backward, "Top Pool Backward",
        py::arg("input"), py::arg("grad_output"),
        py::call_guard<py::gil_scoped_release>());
  m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes",
        py::arg("boxes1"), py::arg("boxes2"), py::arg("ious"),
        py::arg("mode_flag"), py::arg("aligned"));
  m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes", py::arg("dets"),
        py::arg("scores"), py::arg("order"), py::arg("dets_sorted"),
        py::arg("iou_threshold"), py::arg("multi_label"));
  m.def("ball_query_forward", &ball_query_forward, "ball_query_forward",
        py::arg("new_xyz_tensor"), py::arg("xyz_tensor"), py::arg("idx_tensor"),
        py::arg("b"), py::arg("n"), py::arg("m"), py::arg("min_radius"),
        py::arg("max_radius"), py::arg("nsample"));
  m.def("roi_align_rotated_forward", &roi_align_rotated_forward,
        "roi_align_rotated forward", py::arg("input"), py::arg("rois"),
        py::arg("output"), py::arg("pooled_height"), py::arg("pooled_width"),
        py::arg("spatial_scale"), py::arg("sample_num"), py::arg("aligned"),
        py::arg("clockwise"));
  m.def("roi_align_rotated_backward", &roi_align_rotated_backward,
        "roi_align_rotated backward", py::arg("rois"), py::arg("grad_input"),
        py::arg("grad_output"), py::arg("pooled_height"),
        py::arg("pooled_width"), py::arg("spatial_scale"),
        py::arg("sample_num"), py::arg("aligned"), py::arg("clockwise"));
  m.def("dynamic_point_to_voxel_forward", &dynamic_point_to_voxel_forward,
        "dynamic_point_to_voxel_forward", py::arg("feats"), py::arg("coors"),
        py::arg("reduce_type"));
  m.def("dynamic_point_to_voxel_backward", &dynamic_point_to_voxel_backward,
        "dynamic_point_to_voxel_backward", py::arg("grad_feats"),
        py::arg("grad_reduced_feats"), py::arg("feats"),
        py::arg("reduced_feats"), py::arg("coors_idx"), py::arg("reduce_count"),
        py::arg("reduce_type"));
  m.def("hard_voxelize_forward", &hard_voxelize_forward,
        "hard_voxelize_forward", py::arg("points"), py::arg("voxel_size"),
        py::arg("coors_range"), py::arg("voxels"), py::arg("coors"),
        py::arg("num_points_per_voxel"), py::arg("voxel_num"),
        py::arg("max_points"), py::arg("max_voxels"), py::arg("NDim"));
  m.def("dynamic_voxelize_forward", &dynamic_voxelize_forward,
        "dynamic_voxelize_forward", py::arg("points"), py::arg("voxel_size"),
        py::arg("coors_range"), py::arg("coors"), py::arg("NDim"));
  m.def("ms_deform_attn_forward", &ms_deform_attn_forward,
        "forward function of multi-scale deformable attention",
        py::arg("value"), py::arg("value_spatial_shapes"),
        py::arg("value_level_start_index"), py::arg("sampling_locations"),
        py::arg("attention_weights"), py::arg("im2col_step"));
  m.def("ms_deform_attn_backward", &ms_deform_attn_backward,
        "backward function of multi-scale deformable attention",
        py::arg("value"), py::arg("value_spatial_shapes"),
        py::arg("value_level_start_index"), py::arg("sampling_locations"),
        py::arg("attention_weights"), py::arg("grad_output"),
        py::arg("grad_value"), py::arg("grad_sampling_loc"),
        py::arg("grad_attn_weight"), py::arg("im2col_step"));
  m.def("border_align_forward", &border_align_forward,
        "forward function of border_align", py::arg("input"), py::arg("boxes"),
        py::arg("output"), py::arg("argmax_idx"), py::arg("pool_size"));
  m.def("border_align_backward", &border_align_backward,
        "backward function of border_align", py::arg("grad_output"),
        py::arg("boxes"), py::arg("argmax_idx"), py::arg("grad_input"),
        py::arg("pool_size"));
  m.def("correlation_forward", &correlation_forward, "Correlation forward",
        py::arg("input1"), py::arg("input2"), py::arg("output"), py::arg("kH"),
        py::arg("kW"), py::arg("patchH"), py::arg("patchW"), py::arg("padH"),
        py::arg("padW"), py::arg("dilationH"), py::arg("dilationW"),
        py::arg("dilation_patchH"), py::arg("dilation_patchW"), py::arg("dH"),
        py::arg("dW"));
  m.def("correlation_backward", &correlation_backward, "Correlation backward",
        py::arg("grad_output"), py::arg("input1"), py::arg("input2"),
        py::arg("grad_input1"), py::arg("grad_input2"), py::arg("kH"),
        py::arg("kW"), py::arg("patchH"), py::arg("patchW"), py::arg("padH"),
        py::arg("padW"), py::arg("dilationH"), py::arg("dilationW"),
        py::arg("dilation_patchH"), py::arg("dilation_patchW"), py::arg("dH"),
        py::arg("dW"));
  m.def("points_in_boxes_cpu_forward", &points_in_boxes_cpu_forward,
        "points_in_boxes_cpu_forward", py::arg("boxes_tensor"),
        py::arg("pts_tensor"), py::arg("pts_indices_tensor"));
  m.def("points_in_boxes_part_forward", &points_in_boxes_part_forward,
        "points_in_boxes_part_forward", py::arg("boxes_tensor"),
        py::arg("pts_tensor"), py::arg("box_idx_of_points_tensor"));
  m.def("points_in_boxes_all_forward", &points_in_boxes_all_forward,
        "points_in_boxes_all_forward", py::arg("boxes_tensor"),
        py::arg("pts_tensor"), py::arg("box_idx_of_points_tensor"));
  m.def("roiaware_pool3d_forward", &roiaware_pool3d_forward,
        "roiaware_pool3d_forward", py::arg("rois"), py::arg("pts"),
        py::arg("pts_feature"), py::arg("argmax"), py::arg("pts_idx_of_voxels"),
        py::arg("pooled_features"), py::arg("pool_method"));
  m.def("roiaware_pool3d_backward", &roiaware_pool3d_backward,
        "roiaware_pool3d_backward", py::arg("pts_idx_of_voxels"),
        py::arg("argmax"), py::arg("grad_out"), py::arg("grad_in"),
        py::arg("pool_method"));
  m.def("rotated_feature_align_forward", &rotated_feature_align_forward,
        "Feature Refine forward (CUDA)", py::arg("features"),
        py::arg("best_bboxes"), py::arg("output"), py::arg("spatial_scale"),
        py::arg("points"));
  m.def("rotated_feature_align_backward", &rotated_feature_align_backward,
        "Feature Refine backward (CUDA)", py::arg("top_grad"),
        py::arg("best_bboxes"), py::arg("bottom_grad"),
        py::arg("spatial_scale"), py::arg("points"));
  m.def("riroi_align_rotated_forward", &riroi_align_rotated_forward,
        "riroi_align_rotated forward", py::arg("features"), py::arg("rois"),
        py::arg("output"), py::arg("pooled_height"), py::arg("pooled_width"),
        py::arg("spatial_scale"), py::arg("num_samples"),
        py::arg("num_orientations"), py::arg("clockwise"));
  m.def("riroi_align_rotated_backward", &riroi_align_rotated_backward,
        "riroi_align_rotated backward", py::arg("top_grad"), py::arg("rois"),
        py::arg("bottom_grad"), py::arg("pooled_height"),
        py::arg("pooled_width"), py::arg("spatial_scale"),
        py::arg("num_samples"), py::arg("num_orientations"),
        py::arg("clockwise"));
  m.def("points_in_polygons_forward", &points_in_polygons_forward,
        "points_in_polygons_forward", py::arg("points"), py::arg("polygons"),
        py::arg("output"));
  m.def("min_area_polygons", &min_area_polygons, "min_area_polygons",
        py::arg("pointsets"), py::arg("polygons"));
  m.def("active_rotated_filter_forward", &active_rotated_filter_forward,
        "active_rotated_filter_forward", py::arg("input"), py::arg("indices"),
        py::arg("output"));
  m.def("active_rotated_filter_backward", &active_rotated_filter_backward,
        "active_rotated_filter_backward", py::arg("grad_out"),
        py::arg("indices"), py::arg("grad_in"));
  m.def("convex_iou", &convex_iou, "convex_iou", py::arg("pointsets"),
        py::arg("polygons"), py::arg("ious"));
  m.def("convex_giou", &convex_giou, "convex_giou", py::arg("pointsets"),
        py::arg("polygons"), py::arg("output"));
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/riroi_align_rotated.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void riroi_align_rotated_forward_impl(Tensor features, Tensor rois,
                                      Tensor output, int pooled_height,
                                      int pooled_width, float spatial_scale,
                                      int num_samples, int num_orientations,
                                      bool clockwise) {
  DISPATCH_DEVICE_IMPL(riroi_align_rotated_forward_impl, features, rois, output,
                       pooled_height, pooled_width, spatial_scale, num_samples,
                       num_orientations, clockwise);
}

void riroi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
                                       Tensor bottom_grad, int pooled_height,
                                       int pooled_width, float spatial_scale,
                                       int num_samples, int num_orientations,
                                       bool clockwise) {
  DISPATCH_DEVICE_IMPL(riroi_align_rotated_backward_impl, top_grad, rois,
                       bottom_grad, pooled_height, pooled_width, spatial_scale,
                       num_samples, num_orientations, clockwise);
}

void riroi_align_rotated_forward(Tensor features, Tensor rois, Tensor output,
                                 int pooled_height, int pooled_width,
                                 float spatial_scale, int num_samples,
                                 int num_orientations, bool clockwise) {
  riroi_align_rotated_forward_impl(features, rois, output, pooled_height,
                                   pooled_width, spatial_scale, num_samples,
                                   num_orientations, clockwise);
}

void riroi_align_rotated_backward(Tensor top_grad, Tensor rois,
                                  Tensor bottom_grad, int pooled_height,
                                  int pooled_width, float spatial_scale,
                                  int num_samples, int num_orientations,
                                  bool clockwise) {
  riroi_align_rotated_backward_impl(top_grad, rois, bottom_grad, pooled_height,
                                    pooled_width, spatial_scale, num_samples,
                                    num_orientations, clockwise);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/roi_align.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
                            Tensor argmax_y, Tensor argmax_x,
                            int aligned_height, int aligned_width,
                            float spatial_scale, int sampling_ratio,
                            int pool_mode, bool aligned) {
  DISPATCH_DEVICE_IMPL(roi_align_forward_impl, input, rois, output, argmax_y,
                       argmax_x, aligned_height, aligned_width, spatial_scale,
                       sampling_ratio, pool_mode, aligned);
}

void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
                             Tensor argmax_x, Tensor grad_input,
                             int aligned_height, int aligned_width,
                             float spatial_scale, int sampling_ratio,
                             int pool_mode, bool aligned) {
  DISPATCH_DEVICE_IMPL(roi_align_backward_impl, grad_output, rois, argmax_y,
                       argmax_x, grad_input, aligned_height, aligned_width,
                       spatial_scale, sampling_ratio, pool_mode, aligned);
}

void roi_align_forward(Tensor input, Tensor rois, Tensor output,
                       Tensor argmax_y, Tensor argmax_x, int aligned_height,
                       int aligned_width, float spatial_scale,
                       int sampling_ratio, int pool_mode, bool aligned) {
  roi_align_forward_impl(input, rois, output, argmax_y, argmax_x,
                         aligned_height, aligned_width, spatial_scale,
                         sampling_ratio, pool_mode, aligned);
}

void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y,
                        Tensor argmax_x, Tensor grad_input, int aligned_height,
                        int aligned_width, float spatial_scale,
                        int sampling_ratio, int pool_mode, bool aligned) {
  roi_align_backward_impl(grad_output, rois, argmax_y, argmax_x, grad_input,
                          aligned_height, aligned_width, spatial_scale,
                          sampling_ratio, pool_mode, aligned);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/roi_align_rotated.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output,
                                    int aligned_height, int aligned_width,
                                    float spatial_scale, int sample_ratio,
                                    bool aligned, bool clockwise) {
  DISPATCH_DEVICE_IMPL(roi_align_rotated_forward_impl, features, rois, output,
                       aligned_height, aligned_width, spatial_scale,
                       sample_ratio, aligned, clockwise);
}

void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
                                     Tensor bottom_grad, int aligned_height,
                                     int aligned_width, float spatial_scale,
                                     int sample_ratio, bool aligned,
                                     bool clockwise) {
  DISPATCH_DEVICE_IMPL(roi_align_rotated_backward_impl, top_grad, rois,
                       bottom_grad, aligned_height, aligned_width,
                       spatial_scale, sample_ratio, aligned, clockwise);
}

void roi_align_rotated_forward(Tensor input, Tensor rois, Tensor output,
                               int aligned_height, int aligned_width,
                               float spatial_scale, int sampling_ratio,
                               bool aligned, bool clockwise) {
  roi_align_rotated_forward_impl(input, rois, output, aligned_height,
                                 aligned_width, spatial_scale, sampling_ratio,
                                 aligned, clockwise);
}

void roi_align_rotated_backward(Tensor top_grad, Tensor rois,
                                Tensor bottom_grad, int aligned_height,
                                int aligned_width, float spatial_scale,
                                int sampling_ratio, bool aligned,
                                bool clockwise) {
  roi_align_rotated_backward_impl(top_grad, rois, bottom_grad, aligned_height,
                                  aligned_width, spatial_scale, sampling_ratio,
                                  aligned, clockwise);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/roi_pool.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
                           Tensor argmax, int pooled_height, int pooled_width,
                           float spatial_scale) {
  DISPATCH_DEVICE_IMPL(roi_pool_forward_impl, input, rois, output, argmax,
                       pooled_height, pooled_width, spatial_scale);
}

void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax,
                            Tensor grad_input, int pooled_height,
                            int pooled_width, float spatial_scale) {
  DISPATCH_DEVICE_IMPL(roi_pool_backward_impl, grad_output, rois, argmax,
                       grad_input, pooled_height, pooled_width, spatial_scale);
}

void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax,
                      int pooled_height, int pooled_width,
                      float spatial_scale) {
  roi_pool_forward_impl(input, rois, output, argmax, pooled_height,
                        pooled_width, spatial_scale);
}

void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax,
                       Tensor grad_input, int pooled_height, int pooled_width,
                       float spatial_scale) {
  roi_pool_backward_impl(grad_output, rois, argmax, grad_input, pooled_height,
                         pooled_width, spatial_scale);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/roiaware_pool3d.cpp
================================================
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels,
                                  int max_pts_each_voxel, int out_x, int out_y,
                                  int out_z, const Tensor rois,
                                  const Tensor pts, const Tensor pts_feature,
                                  Tensor argmax, Tensor pts_idx_of_voxels,
                                  Tensor pooled_features, int pool_method) {
  DISPATCH_DEVICE_IMPL(roiaware_pool3d_forward_impl, boxes_num, pts_num,
                       channels, max_pts_each_voxel, out_x, out_y, out_z, rois,
                       pts, pts_feature, argmax, pts_idx_of_voxels,
                       pooled_features, pool_method);
}

void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y,
                                   int out_z, int channels,
                                   int max_pts_each_voxel,
                                   const Tensor pts_idx_of_voxels,
                                   const Tensor argmax, const Tensor grad_out,
                                   Tensor grad_in, int pool_method) {
  DISPATCH_DEVICE_IMPL(roiaware_pool3d_backward_impl, boxes_num, out_x, out_y,
                       out_z, channels, max_pts_each_voxel, pts_idx_of_voxels,
                       argmax, grad_out, grad_in, pool_method);
}

void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature,
                             Tensor argmax, Tensor pts_idx_of_voxels,
                             Tensor pooled_features, int pool_method) {
  // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, ry] in LiDAR
  // coordinate
  // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
  // params pts_feature: (npoints, C)
  // params argmax: (N, out_x, out_y, out_z, C)
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
  // params pooled_features: (N, out_x, out_y, out_z, C)
  // params pool_method: 0: max_pool 1: avg_pool
  int boxes_num = rois.size(0);
  int pts_num = pts.size(0);
  int channels = pts_feature.size(1);
  int max_pts_each_voxel = pts_idx_of_voxels.size(4);  // index 0 is the counter
  int out_x = pts_idx_of_voxels.size(1);
  int out_y = pts_idx_of_voxels.size(2);
  int out_z = pts_idx_of_voxels.size(3);
  assert((out_x < 256) && (out_y < 256) &&
         (out_z < 256));  // we encode index with 8bit

  roiaware_pool3d_forward_impl(boxes_num, pts_num, channels, max_pts_each_voxel,
                               out_x, out_y, out_z, rois, pts, pts_feature,
                               argmax, pts_idx_of_voxels, pooled_features,
                               pool_method);
}

void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax,
                              Tensor grad_out, Tensor grad_in,
                              int pool_method) {
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
  // params argmax: (N, out_x, out_y, out_z, C)
  // params grad_out: (N, out_x, out_y, out_z, C)
  // params grad_in: (npoints, C), return value
  // params pool_method: 0: max_pool 1: avg_pool
  int boxes_num = pts_idx_of_voxels.size(0);
  int out_x = pts_idx_of_voxels.size(1);
  int out_y = pts_idx_of_voxels.size(2);
  int out_z = pts_idx_of_voxels.size(3);
  int max_pts_each_voxel = pts_idx_of_voxels.size(4);  // index 0 is the counter
  int channels = grad_out.size(4);

  roiaware_pool3d_backward_impl(boxes_num, out_x, out_y, out_z, channels,
                                max_pts_each_voxel, pts_idx_of_voxels, argmax,
                                grad_out, grad_in, pool_method);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/roipoint_pool3d.cpp
================================================
/*
Modified from
https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d.cpp
Point cloud feature pooling
Written by Shaoshuai Shi
All Rights Reserved 2018.
*/

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num,
                                  int feature_in_len, int sampled_pts_num,
                                  const Tensor xyz, const Tensor boxes3d,
                                  const Tensor pts_feature,
                                  Tensor pooled_features,
                                  Tensor pooled_empty_flag) {
  DISPATCH_DEVICE_IMPL(roipoint_pool3d_forward_impl, batch_size, pts_num,
                       boxes_num, feature_in_len, sampled_pts_num, xyz, boxes3d,
                       pts_feature, pooled_features, pooled_empty_flag);
}

void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
                             Tensor pooled_features, Tensor pooled_empty_flag) {
  // params xyz: (B, N, 3)
  // params boxes3d: (B, M, 7)
  // params pts_feature: (B, N, C)
  // params pooled_features: (B, M, 512, 3+C)
  // params pooled_empty_flag: (B, M)
  int batch_size = xyz.size(0);
  int pts_num = xyz.size(1);
  int boxes_num = boxes3d.size(1);
  int feature_in_len = pts_feature.size(2);
  int sampled_pts_num = pooled_features.size(2);

  roipoint_pool3d_forward_impl(batch_size, pts_num, boxes_num, feature_in_len,
                               sampled_pts_num, xyz, boxes3d, pts_feature,
                               pooled_features, pooled_empty_flag);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/rotated_feature_align.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_cuda.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void rotated_feature_align_forward_impl(const Tensor features,
                                        const Tensor best_bboxes,
                                        const float spatial_scale,
                                        const int points, Tensor output) {
  DISPATCH_DEVICE_IMPL(rotated_feature_align_forward_impl, features,
                       best_bboxes, spatial_scale, points, output);
}

void rotated_feature_align_backward_impl(const Tensor top_grad,
                                         const Tensor best_bboxes,
                                         const float spatial_scale,
                                         const int points, Tensor bottom_grad) {
  DISPATCH_DEVICE_IMPL(rotated_feature_align_backward_impl, top_grad,
                       best_bboxes, spatial_scale, points, bottom_grad);
}

void rotated_feature_align_forward(const Tensor features,
                                   const Tensor best_bboxes, Tensor output,
                                   const float spatial_scale,
                                   const int points) {
  rotated_feature_align_forward_impl(features, best_bboxes, spatial_scale,
                                     points, output);
}

void rotated_feature_align_backward(const Tensor top_grad,
                                    const Tensor best_bboxes,
                                    Tensor bottom_grad,
                                    const float spatial_scale,
                                    const int points) {
  rotated_feature_align_backward_impl(top_grad, best_bboxes, spatial_scale,
                                      points, bottom_grad);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/scatter_points.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;

std::vector<torch::Tensor> dynamic_point_to_voxel_forward_impl(
    const torch::Tensor &feats, const torch::Tensor &coors,
    const reduce_t reduce_type) {
  return DISPATCH_DEVICE_IMPL(dynamic_point_to_voxel_forward_impl, feats, coors,
                              reduce_type);
}

void dynamic_point_to_voxel_backward_impl(
    torch::Tensor &grad_feats, const torch::Tensor &grad_reduced_feats,
    const torch::Tensor &feats, const torch::Tensor &reduced_feats,
    const torch::Tensor &coors_idx, const torch::Tensor &reduce_count,
    const reduce_t reduce_type) {
  DISPATCH_DEVICE_IMPL(dynamic_point_to_voxel_backward_impl, grad_feats,
                       grad_reduced_feats, feats, reduced_feats, coors_idx,
                       reduce_count, reduce_type);
}

inline reduce_t convert_reduce_type(const std::string &reduce_type) {
  if (reduce_type == "max")
    return reduce_t::MAX;
  else if (reduce_type == "sum")
    return reduce_t::SUM;
  else if (reduce_type == "mean")
    return reduce_t::MEAN;
  else
    TORCH_CHECK(false, "do not support reduce type " + reduce_type)
  return reduce_t::SUM;
}

std::vector<torch::Tensor> dynamic_point_to_voxel_forward(
    const torch::Tensor &feats, const torch::Tensor &coors,
    const std::string &reduce_type) {
  return dynamic_point_to_voxel_forward_impl(feats, coors,
                                             convert_reduce_type(reduce_type));
}

void dynamic_point_to_voxel_backward(torch::Tensor &grad_feats,
                                     const torch::Tensor &grad_reduced_feats,
                                     const torch::Tensor &feats,
                                     const torch::Tensor &reduced_feats,
                                     const torch::Tensor &coors_idx,
                                     const torch::Tensor &reduce_count,
                                     const std::string &reduce_type) {
  dynamic_point_to_voxel_backward_impl(grad_feats, grad_reduced_feats, feats,
                                       reduced_feats, coors_idx, reduce_count,
                                       convert_reduce_type(reduce_type));
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/sync_bn.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void sync_bn_forward_mean_impl(const Tensor input, Tensor mean) {
  DISPATCH_DEVICE_IMPL(sync_bn_forward_mean_impl, input, mean);
}

void sync_bn_forward_var_impl(const Tensor input, const Tensor mean,
                              Tensor var) {
  DISPATCH_DEVICE_IMPL(sync_bn_forward_var_impl, input, mean, var);
}

void sync_bn_forward_output_impl(const Tensor input, const Tensor mean,
                                 const Tensor var, Tensor running_mean,
                                 Tensor running_var, const Tensor weight,
                                 const Tensor bias, Tensor norm, Tensor std,
                                 Tensor output, float eps, float momentum,
                                 int group_size) {
  DISPATCH_DEVICE_IMPL(sync_bn_forward_output_impl, input, mean, var,
                       running_mean, running_var, weight, bias, norm, std,
                       output, eps, momentum, group_size);
}

void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm,
                                 Tensor grad_weight, Tensor grad_bias) {
  DISPATCH_DEVICE_IMPL(sync_bn_backward_param_impl, grad_output, norm,
                       grad_weight, grad_bias);
}

void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight,
                                const Tensor grad_weight,
                                const Tensor grad_bias, const Tensor norm,
                                const Tensor std, Tensor grad_input) {
  DISPATCH_DEVICE_IMPL(sync_bn_backward_data_impl, grad_output, weight,
                       grad_weight, grad_bias, norm, std, grad_input);
}

void sync_bn_forward_mean(const Tensor input, Tensor mean) {
  sync_bn_forward_mean_impl(input, mean);
}

void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var) {
  sync_bn_forward_var_impl(input, mean, var);
}

void sync_bn_forward_output(const Tensor input, const Tensor mean,
                            const Tensor var, const Tensor weight,
                            const Tensor bias, Tensor running_mean,
                            Tensor running_var, Tensor norm, Tensor std,
                            Tensor output, float eps, float momentum,
                            int group_size) {
  sync_bn_forward_output_impl(input, mean, var, running_mean, running_var,
                              weight, bias, norm, std, output, eps, momentum,
                              group_size);
}

void sync_bn_backward_param(const Tensor grad_output, const Tensor norm,
                            Tensor grad_weight, Tensor grad_bias) {
  sync_bn_backward_param_impl(grad_output, norm, grad_weight, grad_bias);
}

void sync_bn_backward_data(const Tensor grad_output, const Tensor weight,
                           const Tensor grad_weight, const Tensor grad_bias,
                           const Tensor norm, const Tensor std,
                           Tensor grad_input) {
  sync_bn_backward_data_impl(grad_output, weight, grad_weight, grad_bias, norm,
                             std, grad_input);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/three_interpolate.cpp
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void three_interpolate_forward_impl(int b, int c, int m, int n,
                                    const Tensor points, const Tensor idx,
                                    const Tensor weight, Tensor out) {
  DISPATCH_DEVICE_IMPL(three_interpolate_forward_impl, b, c, m, n, points, idx,
                       weight, out);
}

void three_interpolate_backward_impl(int b, int c, int n, int m,
                                     const Tensor grad_out, const Tensor idx,
                                     const Tensor weight, Tensor grad_points) {
  DISPATCH_DEVICE_IMPL(three_interpolate_backward_impl, b, c, n, m, grad_out,
                       idx, weight, grad_points);
}

void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
                               Tensor weight_tensor, Tensor out_tensor, int b,
                               int c, int m, int n) {
  three_interpolate_forward_impl(b, c, m, n, points_tensor, idx_tensor,
                                 weight_tensor, out_tensor);
}

void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
                                Tensor weight_tensor, Tensor grad_points_tensor,
                                int b, int c, int n, int m) {
  three_interpolate_backward_impl(b, c, n, m, grad_out_tensor, idx_tensor,
                                  weight_tensor, grad_points_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/three_nn.cpp
================================================
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void three_nn_forward_impl(int b, int n, int m, const Tensor unknown,
                           const Tensor known, Tensor dist2, Tensor idx) {
  DISPATCH_DEVICE_IMPL(three_nn_forward_impl, b, n, m, unknown, known, dist2,
                       idx);
}

void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
                      Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
                      int m) {
  three_nn_forward_impl(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
                        idx_tensor);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/tin_shift.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output) {
  DISPATCH_DEVICE_IMPL(tin_shift_forward_impl, input, shift, output);
}

void tin_shift_backward_impl(Tensor grad_output, Tensor shift,
                             Tensor grad_input) {
  DISPATCH_DEVICE_IMPL(tin_shift_backward_impl, grad_output, shift, grad_input);
}

void tin_shift_forward(Tensor input, Tensor shift, Tensor output) {
  tin_shift_forward_impl(input, shift, output);
}

void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) {
  tin_shift_backward_impl(grad_output, shift, grad_input);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/upfirdn2d.cpp
================================================
// Modified from
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.cpp

/*
Copyright (c) 2021, NVIDIA Corporation. All rights reserved.

NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
Augmentation (ADA)
=======================================================================

1. Definitions

"Licensor" means any person or entity that distributes its Work.

"Software" means the original work of authorship made available under
this License.

"Work" means the Software and any additions to or derivative works of
the Software that are made available under this License.

The terms "reproduce," "reproduction," "derivative works," and
"distribution" have the meaning as provided under U.S. copyright law;
provided, however, that for the purposes of this License, derivative
works shall not include works that remain separable from, or merely
link (or bind by name) to the interfaces of, the Work.

Works, including the Software, are "made available" under this License
by including in or with the Work either (a) a copyright notice
referencing the applicability of this License to the Work, or (b) a
copy of this License.

2. License Grants

    2.1 Copyright Grant. Subject to the terms and conditions of this
    License, each Licensor grants to you a perpetual, worldwide,
    non-exclusive, royalty-free, copyright license to reproduce,
    prepare derivative works of, publicly display, publicly perform,
    sublicense and distribute its Work and any resulting derivative
    works in any form.

3. Limitations

    3.1 Redistribution. You may reproduce or distribute the Work only
    if (a) you do so under this License, (b) you include a complete
    copy of this License with your distribution, and (c) you retain
    without modification any copyright, patent, trademark, or
    attribution notices that are present in the Work.

    3.2 Derivative Works. You may specify that additional or different
    terms apply to the use, reproduction, and distribution of your
    derivative works of the Work ("Your Terms") only if (a) Your Terms
    provide that the use limitation in Section 3.3 applies to your
    derivative works, and (b) you identify the specific derivative
    works that are subject to Your Terms. Notwithstanding Your Terms,
    this License (including the redistribution requirements in Section
    3.1) will continue to apply to the Work itself.

    3.3 Use Limitation. The Work and any derivative works thereof only
    may be used or intended for use non-commercially. Notwithstanding
    the foregoing, NVIDIA and its affiliates may use the Work and any
    derivative works commercially. As used herein, "non-commercially"
    means for research or evaluation purposes only.

    3.4 Patent Claims. If you bring or threaten to bring a patent claim
    against any Licensor (including any claim, cross-claim or
    counterclaim in a lawsuit) to enforce any patents that you allege
    are infringed by any Work, then your rights under this License from
    such Licensor (including the grant in Section 2.1) will terminate
    immediately.

    3.5 Trademarks. This License does not grant any rights to use any
    Licensor’s or its affiliates’ names, logos, or trademarks, except
    as necessary to reproduce the notices described in this License.

    3.6 Termination. If you violate any term of this License, then your
    rights under this License (including the grant in Section 2.1) will
    terminate immediately.

4. Disclaimer of Warranty.

THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
THIS LICENSE.

5. Limitation of Liability.

EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
THE POSSIBILITY OF SUCH DAMAGES.

=======================================================================
*/

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input,
                                const torch::Tensor& kernel, int up_x, int up_y,
                                int down_x, int down_y, int pad_x0, int pad_x1,
                                int pad_y0, int pad_y1) {
  return DISPATCH_DEVICE_IMPL(upfirdn2d_op_impl, input, kernel, up_x, up_y,
                              down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1);
}

torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel,
                        int up_x, int up_y, int down_x, int down_y, int pad_x0,
                        int pad_x1, int pad_y0, int pad_y1) {
  return upfirdn2d_op_impl(input, kernel, up_x, up_y, down_x, down_y, pad_x0,
                           pad_x1, pad_y0, pad_y1);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/pytorch/voxelization.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"

int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels,
                               at::Tensor &coors,
                               at::Tensor &num_points_per_voxel,
                               const std::vector<float> voxel_size,
                               const std::vector<float> coors_range,
                               const int max_points, const int max_voxels,
                               const int NDim = 3) {
  return DISPATCH_DEVICE_IMPL(hard_voxelize_forward_impl, points, voxels, coors,
                              num_points_per_voxel, voxel_size, coors_range,
                              max_points, max_voxels, NDim);
}

void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors,
                                   const std::vector<float> voxel_size,
                                   const std::vector<float> coors_range,
                                   const int NDim = 3) {
  DISPATCH_DEVICE_IMPL(dynamic_voxelize_forward_impl, points, coors, voxel_size,
                       coors_range, NDim);
}

void hard_voxelize_forward(const at::Tensor &points,
                           const at::Tensor &voxel_size,
                           const at::Tensor &coors_range, at::Tensor &voxels,
                           at::Tensor &coors, at::Tensor &num_points_per_voxel,
                           at::Tensor &voxel_num, const int max_points,
                           const int max_voxels, const int NDim = 3) {
  int64_t *voxel_num_data = voxel_num.data_ptr<int64_t>();
  std::vector<float> voxel_size_v(
      voxel_size.data_ptr<float>(),
      voxel_size.data_ptr<float>() + voxel_size.numel());
  std::vector<float> coors_range_v(
      coors_range.data_ptr<float>(),
      coors_range.data_ptr<float>() + coors_range.numel());

  *voxel_num_data = hard_voxelize_forward_impl(
      points, voxels, coors, num_points_per_voxel, voxel_size_v, coors_range_v,
      max_points, max_voxels, NDim);
}

void dynamic_voxelize_forward(const at::Tensor &points,
                              const at::Tensor &voxel_size,
                              const at::Tensor &coors_range, at::Tensor &coors,
                              const int NDim = 3) {
  std::vector<float> voxel_size_v(
      voxel_size.data_ptr<float>(),
      voxel_size.data_ptr<float>() + voxel_size.numel());
  std::vector<float> coors_range_v(
      coors_range.data_ptr<float>(),
      coors_range.data_ptr<float>() + coors_range.numel());
  dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v,
                                NDim);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_corner_pool.hpp"

#include <assert.h>

#include "trt_serialize.hpp"

void CornerPoolForwardLauncher_float(const float *input, float *output,
                                     const int batch_size, const int channels,
                                     const int height, const int width,
                                     const int pool_type, cudaStream_t stream);

namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *CORNER_POOL_PLUGIN_NAME{"MMCVCornerPool"};
}  // namespace

CornerPoolPluginDynamic::CornerPoolPluginDynamic(const std::string &name,
                                                 TRT_CORNER_POOL_TYPE poolType)
    : mLayerName(name), mPoolType(poolType) {}

CornerPoolPluginDynamic::CornerPoolPluginDynamic(const std::string name,
                                                 const void *data,
                                                 size_t length)
    : mLayerName(name) {
  deserialize_value(&data, &length, &mPoolType);
}

CornerPoolPluginDynamic::~CornerPoolPluginDynamic() {}

nvinfer1::IPluginV2DynamicExt *CornerPoolPluginDynamic::clone() const {
  CornerPoolPluginDynamic *plugin =
      new CornerPoolPluginDynamic(mLayerName, mPoolType);
  plugin->setPluginNamespace(getPluginNamespace());

  return plugin;
}

nvinfer1::DimsExprs CornerPoolPluginDynamic::getOutputDimensions(
    int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
    nvinfer1::IExprBuilder &exprBuilder) {
  return inputs[0];
}

bool CornerPoolPluginDynamic::supportsFormatCombination(
    int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs,
    int nbOutputs) {
  switch (pos) {
    // input[0]
    case 0:
      return inOut[pos].type == nvinfer1::DataType::kFLOAT &&
             inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
    // output[0]
    case 1:
      return inOut[pos].type == inOut[0].type &&
             inOut[pos].format == inOut[0].format;
    default:
      return false;
  }
}

void CornerPoolPluginDynamic::configurePlugin(
    const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {}

size_t CornerPoolPluginDynamic::getWorkspaceSize(
    const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const {
  int sizeof_dtype = mmcv::getElementSize(outputs[0].type);
}

int CornerPoolPluginDynamic::enqueue(
    const nvinfer1::PluginTensorDesc *inputDesc,
    const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
    void *const *outputs, void *workSpace, cudaStream_t stream) {
  const void *input = inputs[0];
  void *output_value = outputs[0];

  const int batch_size = inputDesc[0].dims.d[0];
  const int channels = inputDesc[0].dims.d[1];
  const int height = inputDesc[0].dims.d[2];
  const int width = inputDesc[0].dims.d[3];

  CornerPoolForwardLauncher_float((float *)input, (float *)output_value,
                                  batch_size, channels, height, width,
                                  int(mPoolType), stream);

  return 0;
}

nvinfer1::DataType CornerPoolPluginDynamic::getOutputDataType(
    int index, const nvinfer1::DataType *inputTypes, int nbInputs) const {
  return inputTypes[0];
}

// IPluginV2 Methods
const char *CornerPoolPluginDynamic::getPluginType() const {
  switch (mPoolType) {
    case TRT_CORNER_POOL_TYPE::TRT_TOP_POOL:
    case TRT_CORNER_POOL_TYPE::TRT_BOTTOM_POOL:
    case TRT_CORNER_POOL_TYPE::TRT_LEFT_POOL:
    case TRT_CORNER_POOL_TYPE::TRT_RIGHT_POOL:
      return CORNER_POOL_PLUGIN_NAME;

    default:
      return "UnknownpoolType";
  }
}

const char *CornerPoolPluginDynamic::getPluginVersion() const {
  return PLUGIN_VERSION;
}

int CornerPoolPluginDynamic::getNbOutputs() const { return 1; }

int CornerPoolPluginDynamic::initialize() { return 0; }

void CornerPoolPluginDynamic::terminate() {}

size_t CornerPoolPluginDynamic::getSerializationSize() const {
  return sizeof(mPoolType);
}

void CornerPoolPluginDynamic::serialize(void *buffer) const {
  serialize_value(&buffer, mPoolType);
}

void CornerPoolPluginDynamic::destroy() {
  // This gets called when the network containing plugin is destroyed
  delete this;
}

void CornerPoolPluginDynamic::setPluginNamespace(const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *CornerPoolPluginDynamic::getPluginNamespace() const {
  return mNamespace.c_str();
}

CornerPoolPluginDynamicCreator::CornerPoolPluginDynamicCreator() {
  mPluginAttributes.clear();
  mPluginAttributes.emplace_back(nvinfer1::PluginField("mode"));
  mFC.nbFields = mPluginAttributes.size();
  mFC.fields = mPluginAttributes.data();
}

const char *CornerPoolPluginDynamicCreator::getPluginName() const {
  return CORNER_POOL_PLUGIN_NAME;
}

const char *CornerPoolPluginDynamicCreator::getPluginVersion() const {
  return PLUGIN_VERSION;
}

const nvinfer1::PluginFieldCollection *
CornerPoolPluginDynamicCreator::getFieldNames() {
  return &mFC;
}

nvinfer1::IPluginV2 *CornerPoolPluginDynamicCreator::createPlugin(
    const char *name, const nvinfer1::PluginFieldCollection *fc) {
  TRT_CORNER_POOL_TYPE poolType;
  int poolMode = -1;

  for (int i = 0; i < fc->nbFields; i++) {
    if (fc->fields[i].data == nullptr) {
      continue;
    }
    std::string field_name(fc->fields[i].name);

    if (field_name.compare("mode") == 0) {
      poolMode = static_cast<const int *>(fc->fields[i].data)[0];
    }
  }

  assert(poolMode >= 0 && poolMode <= 3);
  switch (poolMode) {
    case 0:
      poolType = TRT_CORNER_POOL_TYPE::TRT_TOP_POOL;
      break;
    case 1:
      poolType = TRT_CORNER_POOL_TYPE::TRT_BOTTOM_POOL;
      break;
    case 2:
      poolType = TRT_CORNER_POOL_TYPE::TRT_LEFT_POOL;
      break;
    case 3:
      poolType = TRT_CORNER_POOL_TYPE::TRT_RIGHT_POOL;
      break;

    default:
      break;
  }

  CornerPoolPluginDynamic *plugin = new CornerPoolPluginDynamic(name, poolType);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

nvinfer1::IPluginV2 *CornerPoolPluginDynamicCreator::deserializePlugin(
    const char *name, const void *serialData, size_t serialLength) {
  // This object will be deleted when the network is destroyed, which will
  // call FCPluginDynamic::destroy()
  auto plugin = new CornerPoolPluginDynamic(name, serialData, serialLength);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

void CornerPoolPluginDynamicCreator::setPluginNamespace(
    const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *CornerPoolPluginDynamicCreator::getPluginNamespace() const {
  return mNamespace.c_str();
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool_kernel.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "common_cuda_helper.hpp"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"

template <typename scalar_t>
__global__ void top_bottom_pool_kernel(const scalar_t *input, scalar_t *output,
                                       const int batch_size, const int channels,
                                       const int height, const int width,
                                       const int pool_type) {
  const int nthreads = batch_size * channels * width;
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int n_idx = index / (channels * width);  // batch
    int w_idx = index % width;               // width
    int c_idx = (index / width) % channels;  // channels
    int offset_n = n_idx * channels * width * height;
    int offset_n_c = offset_n + c_idx * width * height;
    int direction = -1;            // in [-1, 1], default for TopPool
    int index_start = height - 2;  // default for TopPool
    // pool_type in [0, 1]
    if (pool_type == 0) {
      // TopPool
      // directly copy the most bottom value from input to output
      output[offset_n_c + (height - 1) * width + w_idx] =
          input[offset_n_c + (height - 1) * width + w_idx];
    } else {
      // BottomPool
      // directly copy the most top value from input to output
      output[offset_n_c + w_idx] = input[offset_n_c + w_idx];
      index_start = 1;
      direction = 1;
    }
    // do pool
    for (int h = index_start; h >= 0 && h < height; h += direction) {
      output[offset_n_c + h * width + w_idx] =
          max(output[offset_n_c + (h - direction) * width + w_idx],
              input[offset_n_c + h * width + w_idx]);
    }
  }
}

template <typename scalar_t>
__global__ void left_right_pool_kernel(const scalar_t *input, scalar_t *output,
                                       const int batch_size, const int channels,
                                       const int height, const int width,
                                       const int pool_type) {
  const int nthreads = batch_size * channels * height;
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    int n_idx = index / (channels * height);  // batch
    int h_idx = index % height;               // height
    int c_idx = (index / height) % channels;  // channels
    int offset_n = n_idx * channels * width * height;
    int offset_n_c = offset_n + c_idx * width * height;
    int offset_n_c_h = offset_n_c + h_idx * width;
    int direction = -1;           // in [-1, 1], default for LeftPool
    int index_start = width - 2;  // default for LeftPool
    // pool_type in [2, 3]
    if (pool_type == 2) {
      // LeftPool
      // directly copy the most right value from input to output
      output[offset_n_c_h + width - 1] = input[offset_n_c_h + width - 1];
    } else {
      // RightPool
      // directly copy the most left value from input to output
      output[offset_n_c_h] = input[offset_n_c_h];
      index_start = 1;
      direction = 1;
    }
    // do pool
    for (int w = index_start; w >= 0 && w < width; w += direction) {
      output[offset_n_c_h + w] =
          max(output[offset_n_c_h + w - direction], input[offset_n_c_h + w]);
    }
  }
}

template <typename scalar_t>
void CornerPoolForwardLauncher(const scalar_t *input, scalar_t *output,
                               const int batch_size, const int channels,
                               const int height, const int width,
                               const int pool_type, cudaStream_t stream) {
  int nthreads = -1, col_block = -1;

  switch (pool_type) {
    case 0:
    case 1:
      nthreads = batch_size * channels * width;
      col_block = GET_BLOCKS(nthreads, THREADS_PER_BLOCK);
      top_bottom_pool_kernel<scalar_t>
          <<<col_block, THREADS_PER_BLOCK, 0, stream>>>(
              input, output, batch_size, channels, height, width, pool_type);
      break;
    case 2:
    case 3:
      nthreads = batch_size * channels * height;
      col_block = GET_BLOCKS(nthreads, THREADS_PER_BLOCK);
      left_right_pool_kernel<scalar_t>
          <<<col_block, THREADS_PER_BLOCK, 0, stream>>>(
              input, output, batch_size, channels, height, width, pool_type);
      break;
  }
}

void CornerPoolForwardLauncher_float(const float *input, float *output,
                                     const int batch_size, const int channels,
                                     const int height, const int width,
                                     const int pool_type, cudaStream_t stream) {
  CornerPoolForwardLauncher<float>(input, output, batch_size, channels, height,
                                   width, pool_type, stream);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_cuda_helper.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <cublas_v2.h>

#include "common_cuda_helper.hpp"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"

using mmcv::TensorDesc;

template <class scalar_t>
__global__ void copy_permute_kernel(scalar_t *dst, const scalar_t *src, int n,
                                    TensorDesc ts_src_stride,
                                    TensorDesc ts_dst_stride,
                                    TensorDesc ts_permute) {
  const int src_dim = ts_src_stride.dim;
  int *src_stride = &(ts_src_stride.stride[0]);
  int *dst_stride = &(ts_dst_stride.stride[0]);
  int *permute = &(ts_permute.shape[0]);
  CUDA_1D_KERNEL_LOOP(index, n) {
    size_t dst_index = index;
    size_t src_index = 0;
    for (int i = 0; i < src_dim; ++i) {
      int dim_index = dst_index / dst_stride[i];
      dst_index = dst_index % dst_stride[i];
      src_index += dim_index * src_stride[permute[i]];
    }
    dst[index] = src[src_index];
  }
}

template <class scalar_t>
void memcpyPermute(scalar_t *dst, const scalar_t *src, int *src_size,
                   int *permute, int src_dim, cudaStream_t stream) {
  size_t copy_size = 1;
  TensorDesc ts_permute;
  memcpy(&(ts_permute.shape[0]), permute, src_dim * sizeof(int));

  TensorDesc ts_src_stride;
  TensorDesc ts_dst_stride;
  ts_src_stride.dim = src_dim;
  ts_dst_stride.dim = src_dim;
  int *src_stride = &(ts_src_stride.stride[0]);
  int *dst_stride = &(ts_dst_stride.stride[0]);
  int *dst_size = &(ts_dst_stride.shape[0]);
  src_stride[src_dim - 1] = 1;
  dst_stride[src_dim - 1] = 1;

  for (int i = src_dim - 1; i >= 0; --i) {
    dst_size[i] = src_size[permute[i]];
    if (i < src_dim - 1) {
      src_stride[i] = src_stride[i + 1] * src_size[i + 1];
    }
  }

  for (int i = src_dim - 1; i >= 0; --i) {
    copy_size *= dst_size[i];
    if (i < src_dim - 1) {
      dst_stride[i] = dst_stride[i + 1] * dst_size[i + 1];
    }
  }

  copy_permute_kernel<scalar_t>
      <<<GET_BLOCKS(copy_size), THREADS_PER_BLOCK, 0, stream>>>(
          dst, src, copy_size, ts_src_stride, ts_dst_stride, ts_permute);
}

template void memcpyPermute<float>(float *dst, const float *src, int *src_size,
                                   int *permute, int src_dim,
                                   cudaStream_t stream);

template <>
cublasStatus_t cublasGemmWrap<float>(cublasHandle_t handle,
                                     cublasOperation_t transa,
                                     cublasOperation_t transb, int m, int n,
                                     int k, const float *alpha, const float *A,
                                     int lda, const float *B, int ldb,
                                     const float *beta, float *C, int ldc) {
  return cublasSgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb,
                     beta, C, ldc);
}

template <>
cublasStatus_t cublasGemmWrap<half>(cublasHandle_t handle,
                                    cublasOperation_t transa,
                                    cublasOperation_t transb, int m, int n,
                                    int k, const half *alpha, const half *A,
                                    int lda, const half *B, int ldb,
                                    const half *beta, half *C, int ldc) {
  return cublasHgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb,
                     beta, C, ldc);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_cummaxmin.hpp"

#include <assert.h>

#include "trt_serialize.hpp"

void CumMaxMinForwardLauncher_float(const float *input, float *output_value,
                                    int *output_index, const int *dims,
                                    int nbDims, int cum_dim, int cum_type,
                                    cudaStream_t stream);

void CumMaxMinForwardLauncher_int32(const int *input, int *output_value,
                                    int *output_index, const int *dims,
                                    int nbDims, int cum_dim, int cum_type,
                                    cudaStream_t stream);

namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *CUMMAXMIN_PLUGIN_NAME{"cummaxmin"};
static const char *CUMMAX_PLUGIN_NAME{"cummax"};
static const char *CUMMIN_PLUGIN_NAME{"cummin"};
}  // namespace

CumMaxMinPluginDynamic::CumMaxMinPluginDynamic(const std::string &name, int dim,
                                               TRT_CUMCMPTYPE cumType)
    : mLayerName(name), mDim(dim), mCumType(cumType) {}

CumMaxMinPluginDynamic::CumMaxMinPluginDynamic(const std::string name,
                                               const void *data, size_t length)
    : mLayerName(name) {
  deserialize_value(&data, &length, &mDim);
  deserialize_value(&data, &length, &mCumType);
}

CumMaxMinPluginDynamic::~CumMaxMinPluginDynamic() {}

nvinfer1::IPluginV2DynamicExt *CumMaxMinPluginDynamic::clone() const {
  CumMaxMinPluginDynamic *plugin =
      new CumMaxMinPluginDynamic(mLayerName, mDim, mCumType);
  plugin->setPluginNamespace(getPluginNamespace());

  return plugin;
}

nvinfer1::DimsExprs CumMaxMinPluginDynamic::getOutputDimensions(
    int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
    nvinfer1::IExprBuilder &exprBuilder) {
  return inputs[0];
}

bool CumMaxMinPluginDynamic::supportsFormatCombination(
    int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs,
    int nbOutputs) {
  switch (pos) {
    // input[0]
    case 0:
      return (inOut[pos].type == nvinfer1::DataType::kFLOAT ||
              inOut[pos].type == nvinfer1::DataType::kINT32) &&
             inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
    // output[0]
    case 1:
      return inOut[pos].type == inOut[0].type &&
             inOut[pos].format == inOut[0].format;
    // output[1]
    case 2:
      return inOut[pos].type == nvinfer1::DataType::kINT32 &&
             inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
    default:
      return false;
  }
}

void CumMaxMinPluginDynamic::configurePlugin(
    const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {}

size_t CumMaxMinPluginDynamic::getWorkspaceSize(
    const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const {
  int sizeof_dtype = mmcv::getElementSize(outputs[0].type);
}

int CumMaxMinPluginDynamic::enqueue(
    const nvinfer1::PluginTensorDesc *inputDesc,
    const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
    void *const *outputs, void *workSpace, cudaStream_t stream) {
  const void *input = inputs[0];
  void *output_value = outputs[0];
  int *output_index = (int *)outputs[1];

  const int *dims = &(inputDesc[0].dims.d[0]);
  int nbDims = inputDesc[0].dims.nbDims;

  switch (inputDesc[0].type) {
    case nvinfer1::DataType::kFLOAT:
      CumMaxMinForwardLauncher_float((float *)input, (float *)output_value,
                                     output_index, dims, nbDims, mDim,
                                     int(mCumType), stream);
      break;
    case nvinfer1::DataType::kINT32:
      CumMaxMinForwardLauncher_int32((int *)input, (int *)output_value,
                                     output_index, dims, nbDims, mDim,
                                     int(mCumType), stream);
      break;
    default:
      break;
  }

  return 0;
}

nvinfer1::DataType CumMaxMinPluginDynamic::getOutputDataType(
    int index, const nvinfer1::DataType *inputTypes, int nbInputs) const {
  switch (index) {
    case 0:
      return inputTypes[0];
    case 1:
      return nvinfer1::DataType::kINT32;
    default:
      break;
  }
}

// IPluginV2 Methods
const char *CumMaxMinPluginDynamic::getPluginType() const {
  switch (mCumType) {
    case TRT_CUMCMPTYPE::TRT_CUMMAX:
      return CUMMAX_PLUGIN_NAME;
    case TRT_CUMCMPTYPE::TRT_CUMMIN:
      return CUMMIN_PLUGIN_NAME;
    default:
      return "UnknownCumType";
  }
}

const char *CumMaxMinPluginDynamic::getPluginVersion() const {
  return PLUGIN_VERSION;
}

int CumMaxMinPluginDynamic::getNbOutputs() const { return 2; }

int CumMaxMinPluginDynamic::initialize() { return 0; }

void CumMaxMinPluginDynamic::terminate() {}

size_t CumMaxMinPluginDynamic::getSerializationSize() const {
  return sizeof(mDim) + sizeof(mCumType);
}

void CumMaxMinPluginDynamic::serialize(void *buffer) const {
  serialize_value(&buffer, mDim);
  serialize_value(&buffer, mCumType);
}

void CumMaxMinPluginDynamic::destroy() {
  // This gets called when the network containing plugin is destroyed
  delete this;
}

void CumMaxMinPluginDynamic::setPluginNamespace(const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *CumMaxMinPluginDynamic::getPluginNamespace() const {
  return mNamespace.c_str();
}

CumMaxMinPluginDynamicCreator::CumMaxMinPluginDynamicCreator(
    TRT_CUMCMPTYPE cumType)
    : mCumType(cumType) {
  mPluginAttributes.clear();
  mPluginAttributes.emplace_back(nvinfer1::PluginField("dim"));
  mFC.nbFields = mPluginAttributes.size();
  mFC.fields = mPluginAttributes.data();
}

const char *CumMaxMinPluginDynamicCreator::getPluginName() const {
  return CUMMAXMIN_PLUGIN_NAME;
}

const char *CumMaxMinPluginDynamicCreator::getPluginVersion() const {
  return PLUGIN_VERSION;
}

const nvinfer1::PluginFieldCollection *
CumMaxMinPluginDynamicCreator::getFieldNames() {
  return &mFC;
}

nvinfer1::IPluginV2 *CumMaxMinPluginDynamicCreator::createPlugin(
    const char *name, const nvinfer1::PluginFieldCollection *fc) {
  int dim = 0;

  for (int i = 0; i < fc->nbFields; i++) {
    if (fc->fields[i].data == nullptr) {
      continue;
    }
    std::string field_name(fc->fields[i].name);

    if (field_name.compare("dim") == 0) {
      dim = static_cast<const int *>(fc->fields[i].data)[0];
    }
  }

  CumMaxMinPluginDynamic *plugin =
      new CumMaxMinPluginDynamic(name, dim, mCumType);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

nvinfer1::IPluginV2 *CumMaxMinPluginDynamicCreator::deserializePlugin(
    const char *name, const void *serialData, size_t serialLength) {
  // This object will be deleted when the network is destroyed, which will
  // call FCPluginDynamic::destroy()
  auto plugin = new CumMaxMinPluginDynamic(name, serialData, serialLength);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

void CumMaxMinPluginDynamicCreator::setPluginNamespace(
    const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *CumMaxMinPluginDynamicCreator::getPluginNamespace() const {
  return mNamespace.c_str();
}

CumMaxPluginDynamicCreator::CumMaxPluginDynamicCreator()
    : CumMaxMinPluginDynamicCreator(TRT_CUMCMPTYPE::TRT_CUMMAX) {}

const char *CumMaxPluginDynamicCreator::getPluginName() const {
  return CUMMAX_PLUGIN_NAME;
}

CumMinPluginDynamicCreator::CumMinPluginDynamicCreator()
    : CumMaxMinPluginDynamicCreator(TRT_CUMCMPTYPE::TRT_CUMMIN) {}

const char *CumMinPluginDynamicCreator::getPluginName() const {
  return CUMMIN_PLUGIN_NAME;
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin_kernel.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved

#include "common_cuda_helper.hpp"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"

using mmcv::TensorDesc;

template <typename scalar_t>
__global__ void cummaxmin_kernel(const scalar_t *input, scalar_t *output_value,
                                 int *output_index, TensorDesc tensor_desc,
                                 int cum_dim, int cum_type) {
  const size_t cum_size = tensor_desc.shape[cum_dim];
  const size_t cum_stride = tensor_desc.stride[cum_dim];
  const size_t data_size =
      tensor_desc.stride[0] * tensor_desc.shape[0] / cum_size;
  CUDA_1D_KERNEL_LOOP(index, data_size) {
    size_t cum_offset =
        index / cum_stride * (cum_size * cum_stride) + index % cum_stride;
    int cum_index = 0;
    auto cum_value = input[cum_offset];
    output_value[cum_offset] = cum_value;
    output_index[cum_offset] = cum_index;

    for (size_t cum_index_current = 1; cum_index_current < cum_size;
         ++cum_index_current) {
      cum_offset += cum_stride;
      const auto cum_value_current = input[cum_offset];
      switch (cum_type) {
        case 0:  // max
          if (cum_value_current > cum_value) {
            cum_value = cum_value_current;
            cum_index = cum_index_current;
          }
          break;
        case 1:  // min
          if (cum_value_current < cum_value) {
            cum_value = cum_value_current;
            cum_index = cum_index_current;
          }
          break;
      }
      output_value[cum_offset] = cum_value;
      output_index[cum_offset] = cum_index;
    }
  }
}

template <typename scalar_t>
void CumMaxMinForwardLauncher(const scalar_t *input, scalar_t *output_value,
                              int *output_index, const int *dims, int nbDims,
                              int cum_dim, int cum_type, cudaStream_t stream) {
  // fill tensordesc and initial
  TensorDesc tensor_desc;
  memset((void *)&tensor_desc, 0, sizeof(TensorDesc));
  tensor_desc.dim = nbDims;
  tensor_desc.shape[nbDims - 1] = dims[nbDims - 1];
  tensor_desc.stride[nbDims - 1] = 1;
  for (int i = nbDims - 2; i >= 0; --i) {
    tensor_desc.shape[i] = dims[i];
    tensor_desc.stride[i] = dims[i + 1] * tensor_desc.stride[i + 1];
  }

  // cum dim should be larger than 0
  cum_dim = cum_dim >= 0 ? cum_dim : (nbDims + cum_dim);

  const int data_size =
      tensor_desc.stride[0] * tensor_desc.shape[0] / tensor_desc.shape[cum_dim];

  const int col_block = GET_BLOCKS(data_size, THREADS_PER_BLOCK);

  cummaxmin_kernel<scalar_t><<<col_block, THREADS_PER_BLOCK, 0, stream>>>(
      input, output_value, output_index, tensor_desc, cum_dim, cum_type);
}

void CumMaxMinForwardLauncher_float(const float *input, float *output_value,
                                    int *output_index, const int *dims,
                                    int nbDims, int cum_dim, int cum_type,
                                    cudaStream_t stream) {
  CumMaxMinForwardLauncher<float>(input, output_value, output_index, dims,
                                  nbDims, cum_dim, cum_type, stream);
}

void CumMaxMinForwardLauncher_int32(const int *input, int *output_value,
                                    int *output_index, const int *dims,
                                    int nbDims, int cum_dim, int cum_type,
                                    cudaStream_t stream) {
  CumMaxMinForwardLauncher<int>(input, output_value, output_index, dims, nbDims,
                                cum_dim, cum_type, stream);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_deform_conv.hpp"

#include <assert.h>

#include <chrono>

#include "trt_serialize.hpp"

void DeformConvForwardCUDAKernelLauncher_float(
    const float *input, const float *weight, const float *offset, float *output,
    void *workspace, int batchSize, int nInputPlane, int inputHeight,
    int inputWidth, int nOutputPlane, int kW, int kH, int dW, int dH, int padW,
    int padH, int dilationW, int dilationH, int group, int deformable_group,
    int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream);

namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"MMCVDeformConv2d"};
}  // namespace

nvinfer1::PluginFieldCollection DeformableConvPluginDynamicCreator::mFC{};
std::vector<nvinfer1::PluginField>
    DeformableConvPluginDynamicCreator::mPluginAttributes;

DeformableConvPluginDynamic::DeformableConvPluginDynamic(
    const std::string &name, const nvinfer1::Dims &stride,
    const nvinfer1::Dims &padding, const nvinfer1::Dims &dilation,
    const int deformableGroup, const int group, int im2colStep)
    : mLayerName(name),
      mStride(stride),
      mPadding(padding),
      mDilation(dilation),
      mDeformableGroup(deformableGroup),
      mGroup(group),
      mIm2colStep(im2colStep) {}

DeformableConvPluginDynamic::DeformableConvPluginDynamic(const std::string name,
                                                         const void *data,
                                                         size_t length)
    : mLayerName(name) {
  deserialize_value(&data, &length, &mStride);
  deserialize_value(&data, &length, &mPadding);
  deserialize_value(&data, &length, &mDilation);
  deserialize_value(&data, &length, &mDeformableGroup);
  deserialize_value(&data, &length, &mGroup);
  deserialize_value(&data, &length, &mIm2colStep);
}
DeformableConvPluginDynamic::~DeformableConvPluginDynamic() {}

nvinfer1::IPluginV2DynamicExt *DeformableConvPluginDynamic::clone() const {
  DeformableConvPluginDynamic *plugin =
      new DeformableConvPluginDynamic(mLayerName, mStride, mPadding, mDilation,
                                      mDeformableGroup, mGroup, mIm2colStep);
  plugin->setPluginNamespace(getPluginNamespace());

  return plugin;
}

nvinfer1::DimsExprs DeformableConvPluginDynamic::getOutputDimensions(
    int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
    nvinfer1::IExprBuilder &exprBuilder) {
  nvinfer1::DimsExprs ret;
  ret.nbDims = 4;
  ret.d[0] = inputs[0].d[0];
  ret.d[1] = inputs[2].d[0];

  ret.d[2] = inputs[1].d[2];
  ret.d[3] = inputs[1].d[3];

  return ret;
}

bool DeformableConvPluginDynamic::supportsFormatCombination(
    int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs,
    int nbOutputs) {
  if (pos == 0) {
    return (inOut[pos].type == nvinfer1::DataType::kFLOAT &&
            inOut[pos].format == nvinfer1::TensorFormat::kLINEAR);

  } else {
    return inOut[pos].type == inOut[0].type &&
           inOut[pos].format == inOut[0].format;
  }
}

void DeformableConvPluginDynamic::configurePlugin(
    const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {}

size_t DeformableConvPluginDynamic::getWorkspaceSize(
    const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const {
  int sizeof_dtype = mmcv::getElementSize(outputs[0].type);

  int batch_size = inputs[0].dims.d[0];
  int nInputPlane = inputs[0].dims.d[1];
  int inputHeight = inputs[0].dims.d[2];
  int inputWidth = inputs[0].dims.d[3];

  int nOutputPlane = outputs[0].dims.d[1];
  int outputHeight = outputs[0].dims.d[2];
  int outputWidth = outputs[0].dims.d[3];

  int kW = inputs[2].dims.d[2];
  int kH = inputs[2].dims.d[3];
  int im2col_step = std::min(batch_size, mIm2colStep);

  size_t col_size =
      mmcv::getAlignedSize(nInputPlane * kW * kH * im2col_step * outputHeight *
                           outputWidth * sizeof_dtype);

  size_t out_size = 0;
  if (im2col_step != 1)
    out_size = mmcv::getAlignedSize(batch_size * nOutputPlane * outputHeight *
                                    outputWidth * sizeof_dtype);

  return col_size + out_size;
}

int DeformableConvPluginDynamic::enqueue(
    const nvinfer1::PluginTensorDesc *inputDesc,
    const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
    void *const *outputs, void *workSpace, cudaStream_t stream) {
  int batch_size = inputDesc[0].dims.d[0];
  int inputChannel = inputDesc[0].dims.d[1];
  int inputHeight = inputDesc[0].dims.d[2];
  int inputWidth = inputDesc[0].dims.d[3];
  int outputChannel = outputDesc[0].dims.d[1];
  int kernelHeight = inputDesc[2].dims.d[2];
  int kernelWidth = inputDesc[2].dims.d[3];

  const void *x = inputs[0];
  const void *offset = inputs[1];
  const void *weight = inputs[2];
  void *output = outputs[0];
  int im2col_step = std::min(batch_size, mIm2colStep);

  // TODO: add fp16 support
  auto data_type = inputDesc[0].type;
  switch (data_type) {
    case nvinfer1::DataType::kFLOAT:
      DeformConvForwardCUDAKernelLauncher_float(
          (float *)x, (float *)weight, (float *)offset, (float *)output,
          workSpace, batch_size, inputChannel, inputHeight, inputWidth,
          outputChannel, kernelWidth, kernelHeight, mStride.d[0], mStride.d[1],
          mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], mGroup,
          mDeformableGroup, im2col_step, m_cublas_handle, stream);
      break;
    default:
      return 1;
      break;
  }

  return 0;
}

nvinfer1::DataType DeformableConvPluginDynamic::getOutputDataType(
    int index, const nvinfer1::DataType *inputTypes, int nbInputs) const {
  return inputTypes[0];
}

// IPluginV2 Methods
const char *DeformableConvPluginDynamic::getPluginType() const {
  return PLUGIN_NAME;
}

const char *DeformableConvPluginDynamic::getPluginVersion() const {
  return PLUGIN_VERSION;
}

int DeformableConvPluginDynamic::getNbOutputs() const { return 1; }

int DeformableConvPluginDynamic::initialize() { return 0; }

void DeformableConvPluginDynamic::terminate() {}

size_t DeformableConvPluginDynamic::getSerializationSize() const {
  return sizeof(mStride) + sizeof(mPadding) + sizeof(mDilation) +
         sizeof(mDeformableGroup) + sizeof(mGroup) + sizeof(mIm2colStep);
}

void DeformableConvPluginDynamic::serialize(void *buffer) const {
  serialize_value(&buffer, mStride);
  serialize_value(&buffer, mPadding);
  serialize_value(&buffer, mDilation);
  serialize_value(&buffer, mDeformableGroup);
  serialize_value(&buffer, mGroup);
  serialize_value(&buffer, mIm2colStep);
}

void DeformableConvPluginDynamic::destroy() {
  // This gets called when the network containing plugin is destroyed
  delete this;
}

void DeformableConvPluginDynamic::attachToContext(
    cudnnContext *cudnnContext, cublasContext *cublasContext,
    nvinfer1::IGpuAllocator *gpuAllocator) {
  m_cublas_handle = cublasContext;
}

void DeformableConvPluginDynamic::detachFromContext() {}

void DeformableConvPluginDynamic::setPluginNamespace(const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *DeformableConvPluginDynamic::getPluginNamespace() const {
  return mNamespace.c_str();
}

////////////////////// creator /////////////////////////////

DeformableConvPluginDynamicCreator::DeformableConvPluginDynamicCreator() {
  mPluginAttributes.emplace_back(nvinfer1::PluginField("stride"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("padding"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("dilation"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("groups"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("deform_groups"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("bias"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("im2col_step"));
  mFC.nbFields = mPluginAttributes.size();
  mFC.fields = mPluginAttributes.data();
}

const char *DeformableConvPluginDynamicCreator::getPluginName() const {
  return PLUGIN_NAME;
}

const char *DeformableConvPluginDynamicCreator::getPluginVersion() const {
  return PLUGIN_VERSION;
}

const nvinfer1::PluginFieldCollection *
DeformableConvPluginDynamicCreator::getFieldNames() {
  return &mFC;
}

nvinfer1::IPluginV2 *DeformableConvPluginDynamicCreator::createPlugin(
    const char *name, const nvinfer1::PluginFieldCollection *fc) {
  nvinfer1::Dims stride{2, {1, 1}};
  nvinfer1::Dims padding{2, {0, 0}};
  nvinfer1::Dims dilation{2, {1, 1}};
  int deformableGroup = 1;
  int group = 1;
  int im2col_step = 32;

  for (int i = 0; i < fc->nbFields; i++) {
    if (fc->fields[i].data == nullptr) {
      continue;
    }
    std::string field_name(fc->fields[i].name);

    if (field_name.compare("stride") == 0) {
      stride.nbDims = 2;
      stride.d[0] = static_cast<const int *>(fc->fields[i].data)[0];
      if (fc->fields[i].length == 1) {
        stride.d[1] = stride.d[0];
      } else {
        stride.d[1] = static_cast<const int *>(fc->fields[i].data)[1];
      }
    }

    if (field_name.compare("padding") == 0) {
      padding.nbDims = 2;
      padding.d[0] = static_cast<const int *>(fc->fields[i].data)[0];
      if (fc->fields[i].length == 1) {
        padding.d[1] = padding.d[0];
      } else {
        padding.d[1] = static_cast<const int *>(fc->fields[i].data)[1];
      }
    }

    if (field_name.compare("dilation") == 0) {
      dilation.nbDims = 2;
      dilation.d[0] = static_cast<const int *>(fc->fields[i].data)[0];
      if (fc->fields[i].length == 1) {
        dilation.d[1] = dilation.d[0];
      } else {
        dilation.d[1] = static_cast<const int *>(fc->fields[i].data)[1];
      }
    }

    if (field_name.compare("deformable_group") == 0) {
      deformableGroup = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("group") == 0) {
      group = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("im2col_step") == 0) {
      im2col_step = static_cast<const int *>(fc->fields[i].data)[0];
    }
  }

  DeformableConvPluginDynamic *plugin = new DeformableConvPluginDynamic(
      name, stride, padding, dilation, deformableGroup, group, im2col_step);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

nvinfer1::IPluginV2 *DeformableConvPluginDynamicCreator::deserializePlugin(
    const char *name, const void *serialData, size_t serialLength) {
  auto plugin = new DeformableConvPluginDynamic(name, serialData, serialLength);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

void DeformableConvPluginDynamicCreator::setPluginNamespace(
    const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *DeformableConvPluginDynamicCreator::getPluginNamespace() const {
  return mNamespace.c_str();
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv_kernel.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <cuda_fp16.h>

#include "common_cuda_helper.hpp"
#include "deform_conv_cuda_kernel.cuh"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"

template <typename T>
void trt_deformable_im2col(const T* data_input, const T* data_offset,
                           const int channels, const int height,
                           const int width, const int ksize_h,
                           const int ksize_w, const int pad_h, const int pad_w,
                           const int stride_h, const int stride_w,
                           const int dilation_h, const int dilation_w,
                           const int parallel_imgs, const int deformable_group,
                           T* data_col, cudaStream_t stream) {
  int height_col =
      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
  int width_col =
      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
  int num_kernels = channels * height_col * width_col * parallel_imgs;
  int channel_per_deformable_group = channels / deformable_group;

  deformable_im2col_gpu_kernel<T>
      <<<GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
          num_kernels, data_input, data_offset, height, width, ksize_h, ksize_w,
          pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
          channel_per_deformable_group, parallel_imgs, channels,
          deformable_group, height_col, width_col, data_col);

  cudaCheckError();
}

template <typename scalar_t>
void DeformConvForwardCUDAKernelLauncher(
    const scalar_t* input, const scalar_t* weight, const scalar_t* offset,
    scalar_t* output, void* workspace, int batchSize, int nInputPlane,
    int inputHeight, int inputWidth, int nOutputPlane, int kW, int kH, int dW,
    int dH, int padW, int padH, int dilationW, int dilationH, int group,
    int deformable_group, int im2col_step, cublasHandle_t cublas_handle,
    cudaStream_t stream) {
  size_t word_size = sizeof(scalar_t);

  im2col_step = std::min(int(batchSize), im2col_step);
  long outputWidth =
      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
  long outputHeight =
      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

  long long columns_size =
      mmcv::getAlignedSize(nInputPlane * kW * kH * im2col_step * outputHeight *
                           outputWidth * word_size);

  // column buffer for img2col
  scalar_t* columns = (scalar_t*)workspace;
  workspace = workspace + columns_size;

  scalar_t* output_buffer;
  long long output_buffer_size = 0;
  if (im2col_step == 1) {
    output_buffer = output;
  } else {
    // output need permute when im2col_step!=1
    output_buffer = (scalar_t*)workspace;
    output_buffer_size = batchSize * nOutputPlane * outputWidth * outputHeight;
  }

  long long input_elt_step =
      im2col_step * nInputPlane * inputHeight * inputWidth;
  long long offset_elt_step =
      im2col_step * deformable_group * 2 * kH * kW * outputHeight * outputWidth;
  long long out_buffer_step =
      nOutputPlane * im2col_step * outputHeight * outputWidth;
  long long col_g_step =
      nInputPlane * kW * kH / group * im2col_step * outputHeight * outputWidth;
  long long weight_g_step =
      nOutputPlane / group * nInputPlane / group * kH * kW;
  long long out_buffer_g_step =
      nOutputPlane / group * im2col_step * outputHeight * outputWidth;
  int m = nOutputPlane / group;
  int n = im2col_step * outputHeight * outputWidth;
  int k = nInputPlane / group * kH * kW;
  scalar_t alpha = 1.;
  scalar_t beta = 0.;

  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
    const scalar_t* input_start = input + elt * input_elt_step;
    const scalar_t* offset_start = offset + elt * offset_elt_step;

    trt_deformable_im2col<scalar_t>(input_start, offset_start, nInputPlane,
                                    inputHeight, inputWidth, kH, kW, padH, padW,
                                    dH, dW, dilationH, dilationW, im2col_step,
                                    deformable_group, columns, stream);

    for (int g = 0; g < group; ++g) {
      const scalar_t* weight_start = weight + g * weight_g_step;
      scalar_t* col_start = columns + g * col_g_step;
      scalar_t* out_buffer_start =
          output_buffer + elt * out_buffer_step + g * out_buffer_g_step;

      cublasGemmWrap<scalar_t>(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k,
                               &alpha, col_start, n, weight_start, k, &beta,
                               out_buffer_start, n);
      cudaCheckError();
    }
  }

  if (im2col_step != 1) {
    int output_buffer_shape[5] = {batchSize / im2col_step, nOutputPlane,
                                  im2col_step, outputHeight, outputWidth};
    int output_buffer_permute[5] = {0, 2, 1, 3, 4};
    memcpyPermute<scalar_t>(output, output_buffer, &output_buffer_shape[0],
                            &output_buffer_permute[0], 5, stream);
  }
}

void DeformConvForwardCUDAKernelLauncher_float(
    const float* input, const float* weight, const float* offset, float* output,
    void* workspace, int batchSize, int nInputPlane, int inputHeight,
    int inputWidth, int nOutputPlane, int kW, int kH, int dW, int dH, int padW,
    int padH, int dilationW, int dilationH, int group, int deformable_group,
    int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream) {
  DeformConvForwardCUDAKernelLauncher<float>(
      input, weight, offset, output, workspace, batchSize, nInputPlane,
      inputHeight, inputWidth, nOutputPlane, kW, kH, dW, dH, padW, padH,
      dilationW, dilationH, group, deformable_group, im2col_step, cublas_handle,
      stream);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_grid_sampler.hpp"

#include <assert.h>
#include <stdio.h>

#include <chrono>

#include "trt_serialize.hpp"

using mmcv::GridSamplerInterpolation;
using mmcv::GridSamplerPadding;

void grid_sample_float(float *output, const float *input, const float *grid,
                       int *output_dims, int *input_dims, int *grid_dims,
                       int nb_dims, GridSamplerInterpolation interp,
                       GridSamplerPadding padding, bool align_corners,
                       cudaStream_t stream);

namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"grid_sampler"};
}  // namespace

nvinfer1::PluginFieldCollection GridSamplerDynamicCreator::mFC{};
std::vector<nvinfer1::PluginField> GridSamplerDynamicCreator::mPluginAttributes;

GridSamplerDynamic::GridSamplerDynamic(const std::string &name, int mode,
                                       int paddingMode, bool alignCorners)
    : mLayerName(name),
      mMode(mode),
      mPaddingMode(paddingMode),
      mAlignCorners(alignCorners) {}

GridSamplerDynamic::GridSamplerDynamic(const std::string name, const void *data,
                                       size_t length)
    : mLayerName(name) {
  deserialize_value(&data, &length, &mMode);
  deserialize_value(&data, &length, &mPaddingMode);
  deserialize_value(&data, &length, &mAlignCorners);
}

nvinfer1::IPluginV2DynamicExt *GridSamplerDynamic::clone() const {
  GridSamplerDynamic *plugin =
      new GridSamplerDynamic(mLayerName, mMode, mPaddingMode, mAlignCorners);
  plugin->setPluginNamespace(getPluginNamespace());

  return plugin;
}

nvinfer1::DimsExprs GridSamplerDynamic::getOutputDimensions(
    int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
    nvinfer1::IExprBuilder &exprBuilder) {
  nvinfer1::DimsExprs ret;
  ret.nbDims = inputs[0].nbDims;
  ret.d[0] = inputs[0].d[0];
  ret.d[1] = inputs[0].d[1];
  for (int i = 2; i < ret.nbDims; ++i) {
    ret.d[i] = inputs[1].d[i - 1];
  }
  return ret;
}

bool GridSamplerDynamic::supportsFormatCombination(
    int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs,
    int nbOutputs) {
  if (pos == 0) {
    return (inOut[pos].type == nvinfer1::DataType::kFLOAT &&
            inOut[pos].format == nvinfer1::TensorFormat::kLINEAR);
  } else {
    return inOut[pos].type == inOut[0].type &&
           inOut[pos].format == inOut[0].format;
  }
}

void GridSamplerDynamic::configurePlugin(
    const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {
  // Validate input arguments
}

size_t GridSamplerDynamic::getWorkspaceSize(
    const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const {
  return 0;
}

int GridSamplerDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
                                const nvinfer1::PluginTensorDesc *outputDesc,
                                const void *const *inputs, void *const *outputs,
                                void *workSpace, cudaStream_t stream) {
  nvinfer1::Dims input_dims = inputDesc[0].dims;
  nvinfer1::Dims grid_dims = inputDesc[1].dims;
  nvinfer1::Dims output_dims = outputDesc[0].dims;

  using mmcv::GridSamplerInterpolation;
  using mmcv::GridSamplerPadding;

  GridSamplerInterpolation interp_mode = GridSamplerInterpolation::Bilinear;
  switch (mMode) {
    case 0:
      interp_mode = GridSamplerInterpolation::Bilinear;
      break;
    case 1:
      interp_mode = GridSamplerInterpolation::Nearest;
      break;
    default:
      break;
  }

  GridSamplerPadding padding_mode = GridSamplerPadding::Zeros;
  switch (mPaddingMode) {
    case 0:
      padding_mode = GridSamplerPadding::Zeros;
      break;

    case 1:
      padding_mode = GridSamplerPadding::Border;
      break;

    case 2:
      padding_mode = GridSamplerPadding::Reflection;
      break;
    default:
      break;
  }

  auto data_type = inputDesc[0].type;

  switch (data_type) {
    case nvinfer1::DataType::kFLOAT:
      grid_sample_float(
          (float *)outputs[0], (float *)inputs[0], (float *)inputs[1],
          &(output_dims.d[0]), &(input_dims.d[0]), &(grid_dims.d[0]),
          input_dims.nbDims, interp_mode, padding_mode, mAlignCorners, stream);
      break;
    default:
      return 1;
      break;
  }

  return 0;
}

nvinfer1::DataType GridSamplerDynamic::getOutputDataType(
    int index, const nvinfer1::DataType *inputTypes, int nbInputs) const {
  return inputTypes[0];
}

// IPluginV2 Methods
const char *GridSamplerDynamic::getPluginType() const { return PLUGIN_NAME; }

const char *GridSamplerDynamic::getPluginVersion() const {
  return PLUGIN_VERSION;
}

int GridSamplerDynamic::getNbOutputs() const { return 1; }

int GridSamplerDynamic::initialize() { return 0; }

void GridSamplerDynamic::terminate() {}

size_t GridSamplerDynamic::getSerializationSize() const {
  return sizeof(mMode) + sizeof(mPaddingMode) + sizeof(mAlignCorners);
}

void GridSamplerDynamic::serialize(void *buffer) const {
  serialize_value(&buffer, mMode);
  serialize_value(&buffer, mPaddingMode);
  serialize_value(&buffer, mAlignCorners);
}

void GridSamplerDynamic::destroy() {
  // This gets called when the network containing plugin is destroyed
  delete this;
}

void GridSamplerDynamic::setPluginNamespace(const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *GridSamplerDynamic::getPluginNamespace() const {
  return mNamespace.c_str();
}

////////////////////// creator /////////////////////////////

GridSamplerDynamicCreator::GridSamplerDynamicCreator() {
  mPluginAttributes.clear();
  mPluginAttributes.emplace_back(nvinfer1::PluginField("interpolation_mode"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("padding_mode"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("align_corners"));
  mFC.nbFields = mPluginAttributes.size();
  mFC.fields = mPluginAttributes.data();
}

const char *GridSamplerDynamicCreator::getPluginName() const {
  return PLUGIN_NAME;
}

const char *GridSamplerDynamicCreator::getPluginVersion() const {
  return PLUGIN_VERSION;
}

const nvinfer1::PluginFieldCollection *
GridSamplerDynamicCreator::getFieldNames() {
  return &mFC;
}

nvinfer1::IPluginV2 *GridSamplerDynamicCreator::createPlugin(
    const char *name, const nvinfer1::PluginFieldCollection *fc) {
  int mode = 0;
  int paddingMode = 0;
  bool alignCorners = false;

  for (int i = 0; i < fc->nbFields; i++) {
    if (fc->fields[i].data == nullptr) {
      continue;
    }
    std::string field_name(fc->fields[i].name);

    if (field_name.compare("interpolation_mode") == 0) {
      mode = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("padding_mode") == 0) {
      paddingMode = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("align_corners") == 0) {
      alignCorners = (bool)(static_cast<const int *>(fc->fields[i].data)[0]);
    }
  }

  GridSamplerDynamic *plugin =
      new GridSamplerDynamic(name, mode, paddingMode, alignCorners);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

nvinfer1::IPluginV2 *GridSamplerDynamicCreator::deserializePlugin(
    const char *name, const void *serialData, size_t serialLength) {
  // This object will be deleted when the network is destroyed, which will
  // call FCPluginDynamic::destroy()
  auto plugin = new GridSamplerDynamic(name, serialData, serialLength);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

void GridSamplerDynamicCreator::setPluginNamespace(const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *GridSamplerDynamicCreator::getPluginNamespace() const {
  return mNamespace.c_str();
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler_kernel.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/pytorch/pytorch/blob/ec683299ebabf297a3504c76248d37be830e4342/aten/src/ATen/native/cuda/GridSampler.cuh
// and
// https://github.com/pytorch/pytorch/blob/ec683299ebabf297a3504c76248d37be830e4342/aten/src/ATen/native/cuda/GridSampler.cu

#include <cuda_fp16.h>
#include <stdio.h>

#include <algorithm>
#include <cmath>
#include <vector>

#include "common_cuda_helper.hpp"
#include "trt_cuda_helper.cuh"
#include "trt_grid_sampler.hpp"
#include "trt_plugin_helper.hpp"

using mmcv::GridSamplerInterpolation;
using mmcv::GridSamplerPadding;
using mmcv::TensorDesc;

// Unnormalizes a coordinate from the -1 to +1 scale to its pixel index value,
// where we view each pixel as an area between (idx - 0.5) and (idx + 0.5).
// if align_corners: -1 and +1 get sent to the centers of the corner pixels
//     -1 --> 0
//     +1 --> (size - 1)
//     scale_factor = (size - 1) / 2
// if not align_corners: -1 and +1 get sent to the image edges
//     -1 --> -0.5
//     +1 --> (size - 1) + 0.5 == size - 0.5
//     scale_factor = size / 2
template <typename scalar_t>
static __forceinline__ __device__ scalar_t
grid_sampler_unnormalize(scalar_t coord, int size, bool align_corners) {
  if (align_corners) {
    // unnormalize coord from [-1, 1] to [0, size - 1]
    return ((coord + 1.f) / 2) * (size - 1);
  } else {
    // unnormalize coord from [-1, 1] to [-0.5, size - 0.5]
    return ((coord + 1.f) * size - 1) / 2;
  }
}

// Clips coordinates to between 0 and clip_limit - 1
template <typename scalar_t>
static __forceinline__ __device__ scalar_t clip_coordinates(scalar_t in,
                                                            int clip_limit) {
  return ::min(static_cast<scalar_t>(clip_limit - 1),
               ::max(in, static_cast<scalar_t>(0)));
}

// Reflects coordinates until they fall between low and high (inclusive).
// The bounds are passed as twice their value so that half-integer values
// can be represented as ints.
template <typename scalar_t>
static __forceinline__ __device__ scalar_t reflect_coordinates(scalar_t in,
                                                               int twice_low,
                                                               int twice_high) {
  if (twice_low == twice_high) {
    return static_cast<scalar_t>(0);
  }
  scalar_t min = static_cast<scalar_t>(twice_low) / 2;
  scalar_t span = static_cast<scalar_t>(twice_high - twice_low) / 2;
  in = ::fabs(in - min);
  // `fmod` returns same sign as `in`, which is positive after the `fabs` above.
  scalar_t extra = ::fmod(in, span);
  int flips = static_cast<int>(::floor(in / span));
  if (flips % 2 == 0) {
    return extra + min;
  } else {
    return span - extra + min;
  }
}

template <typename scalar_t>
static __forceinline__ __device__ scalar_t
safe_downgrade_to_int_range(scalar_t x) {
  // -100.0 does not have special meaning. This is just to make sure
  // it's not within_bounds_2d or within_bounds_3d, and does not cause
  // undefined behavior. See #35506.
  if (x > INT_MAX - 1 || x < INT_MIN || !::isfinite(static_cast<double>(x)))
    return static_cast<scalar_t>(-100.0);
  return x;
}

// Computes the pixel source index value for a grid coordinate
template <typename scalar_t>
static __forceinline__ __device__ scalar_t grid_sampler_compute_source_index(
    scalar_t coord, int size, GridSamplerPadding padding_mode,
    bool align_corners) {
  coord = grid_sampler_unnormalize(coord, size, align_corners);
  if (padding_mode == GridSamplerPadding::Border) {
    // clip coordinates to image borders
    coord = clip_coordinates(coord, size);
  } else if (padding_mode == GridSamplerPadding::Reflection) {
    // reflect coordinates by image borders
    if (align_corners) {
      coord = reflect_coordinates(coord, 0, 2 * (size - 1));
    } else {
      coord = reflect_coordinates(coord, -1, 2 * size - 1);
    }
    // clip coordinates to image borders
    coord = clip_coordinates(coord, size);
  }

  coord = safe_downgrade_to_int_range(coord);
  return coord;
}

static __forceinline__ __device__ bool within_bounds_2d(int h, int w, int H,
                                                        int W) {
  return h >= 0 && h < H && w >= 0 && w < W;
}

static __forceinline__ __device__ bool within_bounds_3d(int d, int h, int w,
                                                        int D, int H, int W) {
  return d >= 0 && d < D && h >= 0 && h < H && w >= 0 && w < W;
}

template <typename scalar_t>
__global__ void grid_sampler_2d_kernel(
    const int nthreads, const scalar_t *input, const scalar_t *grid,
    scalar_t *output, TensorDesc input_desc, TensorDesc grid_desc,
    TensorDesc output_desc, const GridSamplerInterpolation interpolation_mode,
    const GridSamplerPadding padding_mode, bool align_corners) {
  int C = input_desc.shape[1];
  int inp_H = input_desc.shape[2];
  int inp_W = input_desc.shape[3];
  int out_H = grid_desc.shape[1];
  int out_W = grid_desc.shape[2];
  int inp_sN = input_desc.stride[0];
  int inp_sC = input_desc.stride[1];
  int inp_sH = input_desc.stride[2];
  int inp_sW = input_desc.stride[3];
  int grid_sN = grid_desc.stride[0];
  int grid_sH = grid_desc.stride[1];
  int grid_sW = grid_desc.stride[2];
  int grid_sCoor = grid_desc.stride[3];
  int out_sN = output_desc.stride[0];
  int out_sC = output_desc.stride[1];
  int out_sH = output_desc.stride[2];
  int out_sW = output_desc.stride[3];

  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    const int w = index % out_W;
    const int h = (index / out_W) % out_H;
    const int n = index / (out_H * out_W);
    const int grid_offset = n * grid_sN + h * grid_sH + w * grid_sW;

    // get the corresponding input x, y coordinates from grid
    scalar_t ix = grid[grid_offset];
    scalar_t iy = grid[grid_offset + grid_sCoor];

    ix = grid_sampler_compute_source_index(ix, inp_W, padding_mode,
                                           align_corners);
    iy = grid_sampler_compute_source_index(iy, inp_H, padding_mode,
                                           align_corners);

    if (interpolation_mode == GridSamplerInterpolation::Bilinear) {
      // get NE, NW, SE, SW pixel values from (x, y)
      int ix_nw = static_cast<int>(::floor(ix));
      int iy_nw = static_cast<int>(::floor(iy));
      int ix_ne = ix_nw + 1;
      int iy_ne = iy_nw;
      int ix_sw = ix_nw;
      int iy_sw = iy_nw + 1;
      int ix_se = ix_nw + 1;
      int iy_se = iy_nw + 1;

      // get surfaces to each neighbor:
      scalar_t nw = (ix_se - ix) * (iy_se - iy);
      scalar_t ne = (ix - ix_sw) * (iy_sw - iy);
      scalar_t sw = (ix_ne - ix) * (iy - iy_ne);
      scalar_t se = (ix - ix_nw) * (iy - iy_nw);

      // calculate bilinear weighted pixel value and set output pixel
      auto inp_ptr_NC = input + n * inp_sN;
      auto out_ptr_NCHW = output + n * out_sN + h * out_sH + w * out_sW;
      for (int c = 0; c < C;
           ++c, inp_ptr_NC += inp_sC, out_ptr_NCHW += out_sC) {
        *out_ptr_NCHW = static_cast<scalar_t>(0);
        if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) {
          *out_ptr_NCHW += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw;
        }
        if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) {
          *out_ptr_NCHW += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne;
        }
        if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) {
          *out_ptr_NCHW += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw;
        }
        if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) {
          *out_ptr_NCHW += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se;
        }
      }
    } else if (interpolation_mode == GridSamplerInterpolation::Nearest) {
      int ix_nearest = static_cast<int>(::round(ix));
      int iy_nearest = static_cast<int>(::round(iy));

      // assign nearest neighbor pixel value to output pixel
      auto inp_ptr_NC = input + n * inp_sN;
      auto out_ptr_NCHW = output + n * out_sN + h * out_sH + w * out_sW;
      for (int c = 0; c < C;
           ++c, inp_ptr_NC += inp_sC, out_ptr_NCHW += out_sC) {
        if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) {
          *out_ptr_NCHW = inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW];
        } else {
          *out_ptr_NCHW = static_cast<scalar_t>(0);
        }
      }
    }
  }
}

template <typename scalar_t>
__global__ void grid_sampler_3d_kernel(
    const int nthreads, const scalar_t *input, const scalar_t *grid,
    scalar_t *output, TensorDesc input_desc, TensorDesc grid_desc,
    TensorDesc output_desc, const GridSamplerInterpolation interpolation_mode,
    const GridSamplerPadding padding_mode, bool align_corners) {
  int C = input_desc.shape[1];
  int inp_D = input_desc.shape[2];
  int inp_H = input_desc.shape[3];
  int inp_W = input_desc.shape[4];
  int out_D = grid_desc.shape[1];
  int out_H = grid_desc.shape[2];
  int out_W = grid_desc.shape[3];
  int inp_sN = input_desc.stride[0];
  int inp_sC = input_desc.stride[1];
  int inp_sD = input_desc.stride[2];
  int inp_sH = input_desc.stride[3];
  int inp_sW = input_desc.stride[4];
  int grid_sN = grid_desc.stride[0];
  int grid_sD = grid_desc.stride[1];
  int grid_sH = grid_desc.stride[2];
  int grid_sW = grid_desc.stride[3];
  int grid_sCoor = grid_desc.stride[4];
  int out_sN = output_desc.stride[0];
  int out_sC = output_desc.stride[1];
  int out_sD = output_desc.stride[2];
  int out_sH = output_desc.stride[3];
  int out_sW = output_desc.stride[4];

  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    const int w = index % out_W;
    const int h = (index / out_W) % out_H;
    const int d = (index / (out_H * out_W)) % out_D;
    const int n = index / (out_D * out_H * out_W);
    const int grid_offset =
        n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW;

    // get the corresponding input x, y, z coordinates from grid
    scalar_t ix = grid[grid_offset];
    scalar_t iy = grid[grid_offset + grid_sCoor];
    scalar_t iz = grid[grid_offset + 2 * grid_sCoor];

    ix = grid_sampler_compute_source_index(ix, inp_W, padding_mode,
                                           align_corners);
    iy = grid_sampler_compute_source_index(iy, inp_H, padding_mode,
                                           align_corners);
    iz = grid_sampler_compute_source_index(iz, inp_D, padding_mode,
                                           align_corners);

    if (interpolation_mode == GridSamplerInterpolation::Bilinear) {
      // get corner pixel values from (x, y, z)
      // for 4d, we used north-east-south-west
      // for 5d, we add top-bottom
      int ix_tnw = static_cast<int>(::floor(ix));
      int iy_tnw = static_cast<int>(::floor(iy));
      int iz_tnw = static_cast<int>(::floor(iz));

      int ix_tne = ix_tnw + 1;
      int iy_tne = iy_tnw;
      int iz_tne = iz_tnw;

      int ix_tsw = ix_tnw;
      int iy_tsw = iy_tnw + 1;
      int iz_tsw = iz_tnw;

      int ix_tse = ix_tnw + 1;
      int iy_tse = iy_tnw + 1;
      int iz_tse = iz_tnw;

      int ix_bnw = ix_tnw;
      int iy_bnw = iy_tnw;
      int iz_bnw = iz_tnw + 1;

      int ix_bne = ix_tnw + 1;
      int iy_bne = iy_tnw;
      int iz_bne = iz_tnw + 1;

      int ix_bsw = ix_tnw;
      int iy_bsw = iy_tnw + 1;
      int iz_bsw = iz_tnw + 1;

      int ix_bse = ix_tnw + 1;
      int iy_bse = iy_tnw + 1;
      int iz_bse = iz_tnw + 1;

      // get surfaces to each neighbor:
      scalar_t tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz);
      scalar_t tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz);
      scalar_t tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz);
      scalar_t tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz);
      scalar_t bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse);
      scalar_t bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw);
      scalar_t bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne);
      scalar_t bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw);

      auto inp_ptr_NC = input + n * inp_sN;
      auto out_ptr_NCDHW =
          output + n * out_sN + d * out_sD + h * out_sH + w * out_sW;
      for (int c = 0; c < C;
           ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) {
        //   (c, iz_tnw, iy_tnw, ix_tnw) * tnw + (c, iz_tne, iy_tne, ix_tne) *
        //   tne
        // + (c, iz_tsw, iy_tsw, ix_tsw) * tsw + (c, iz_tse, iy_tse, ix_tse) *
        // tse
        // + (c, iz_bnw, iy_bnw, ix_bnw) * bnw + (c, iz_bne, iy_bne, ix_bne) *
        // bne
        // + (c, iz_bsw, iy_bsw, ix_bsw) * bsw + (c, iz_bse, iy_bse, ix_bse) *
        // bse
        *out_ptr_NCDHW = static_cast<scalar_t>(0);
        if (within_bounds_3d(iz_tnw, iy_tnw, ix_tnw, inp_D, inp_H, inp_W)) {
          *out_ptr_NCDHW +=
              inp_ptr_NC[iz_tnw * inp_sD + iy_tnw * inp_sH + ix_tnw * inp_sW] *
              tnw;
        }
        if (within_bounds_3d(iz_tne, iy_tne, ix_tne, inp_D, inp_H, inp_W)) {
          *out_ptr_NCDHW +=
              inp_ptr_NC[iz_tne * inp_sD + iy_tne * inp_sH + ix_tne * inp_sW] *
              tne;
        }
        if (within_bounds_3d(iz_tsw, iy_tsw, ix_tsw, inp_D, inp_H, inp_W)) {
          *out_ptr_NCDHW +=
              inp_ptr_NC[iz_tsw * inp_sD + iy_tsw * inp_sH + ix_tsw * inp_sW] *
              tsw;
        }
        if (within_bounds_3d(iz_tse, iy_tse, ix_tse, inp_D, inp_H, inp_W)) {
          *out_ptr_NCDHW +=
              inp_ptr_NC[iz_tse * inp_sD + iy_tse * inp_sH + ix_tse * inp_sW] *
              tse;
        }
        if (within_bounds_3d(iz_bnw, iy_bnw, ix_bnw, inp_D, inp_H, inp_W)) {
          *out_ptr_NCDHW +=
              inp_ptr_NC[iz_bnw * inp_sD + iy_bnw * inp_sH + ix_bnw * inp_sW] *
              bnw;
        }
        if (within_bounds_3d(iz_bne, iy_bne, ix_bne, inp_D, inp_H, inp_W)) {
          *out_ptr_NCDHW +=
              inp_ptr_NC[iz_bne * inp_sD + iy_bne * inp_sH + ix_bne * inp_sW] *
              bne;
        }
        if (within_bounds_3d(iz_bsw, iy_bsw, ix_bsw, inp_D, inp_H, inp_W)) {
          *out_ptr_NCDHW +=
              inp_ptr_NC[iz_bsw * inp_sD + iy_bsw * inp_sH + ix_bsw * inp_sW] *
              bsw;
        }
        if (within_bounds_3d(iz_bse, iy_bse, ix_bse, inp_D, inp_H, inp_W)) {
          *out_ptr_NCDHW +=
              inp_ptr_NC[iz_bse * inp_sD + iy_bse * inp_sH + ix_bse * inp_sW] *
              bse;
        }
      }
    } else if (interpolation_mode == GridSamplerInterpolation::Nearest) {
      int ix_nearest = static_cast<int>(::round(ix));
      int iy_nearest = static_cast<int>(::round(iy));
      int iz_nearest = static_cast<int>(::round(iz));

      // assign nearest neighbor pixel value to output pixel
      auto inp_ptr_NC = input + n * inp_sN;
      auto out_ptr_NCDHW =
          output + n * out_sN + d * out_sD + h * out_sH + w * out_sW;
      for (int c = 0; c < C;
           ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) {
        if (within_bounds_3d(iz_nearest, iy_nearest, ix_nearest, inp_D, inp_H,
                             inp_W)) {
          *out_ptr_NCDHW =
              inp_ptr_NC[iz_nearest * inp_sD + iy_nearest * inp_sH +
                         ix_nearest * inp_sW];
        } else {
          *out_ptr_NCDHW = static_cast<scalar_t>(0);
        }
      }
    }
  }
}

void create_desc(const int *dims, int nb_dims, TensorDesc &desc) {
  memcpy(&desc.shape[0], dims, sizeof(int) * nb_dims);
  desc.stride[nb_dims - 1] = 1;
  for (int i = nb_dims - 2; i >= 0; --i) {
    desc.stride[i] = desc.stride[i + 1] * desc.shape[i + 1];
  }
}

template <typename T>
void grid_sample(T *output, const T *input, const T *grid, int *output_dims,
                 int *input_dims, int *grid_dims, int nb_dims,
                 GridSamplerInterpolation interp, GridSamplerPadding padding,
                 bool align_corners, cudaStream_t stream) {
  TensorDesc input_desc;
  create_desc(input_dims, nb_dims, input_desc);

  TensorDesc output_desc;
  create_desc(output_dims, nb_dims, output_desc);

  TensorDesc grid_desc;
  create_desc(grid_dims, nb_dims, grid_desc);

  int count = 1;
  for (int i = 0; i < nb_dims; ++i) {
    if (i == 1) {
      continue;
    }
    count *= output_desc.shape[i];
  }

  if (nb_dims == 4) {
    grid_sampler_2d_kernel<T>
        <<<GET_BLOCKS(count), THREADS_PER_BLOCK, 0, stream>>>(
            count, input, grid, output, input_desc, grid_desc, output_desc,
            interp, padding, align_corners);
  } else if (nb_dims == 5) {
    grid_sampler_3d_kernel<T>
        <<<GET_BLOCKS(count), THREADS_PER_BLOCK, 0, stream>>>(
            count, input, grid, output, input_desc, grid_desc, output_desc,
            interp, padding, align_corners);
  } else {
    printf("input and grid dims should be 4 or 5\n");
  }
}

void grid_sample_float(float *output, const float *input, const float *grid,
                       int *output_dims, int *input_dims, int *grid_dims,
                       int nb_dims, GridSamplerInterpolation interp,
                       GridSamplerPadding padding, bool align_corners,
                       cudaStream_t stream) {
  grid_sample<float>(output, input, grid, output_dims, input_dims, grid_dims,
                     nb_dims, interp, padding, align_corners, stream);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_instance_norm.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
// Modified from:
// https://github.com/NVIDIA/TensorRT/blob/master/plugin/instanceNormalizationPlugin/instanceNormalizationPlugin.cpp

#include "trt_instance_norm.hpp"

#include <cuda_fp16.h>

#include <stdexcept>

#include "trt_serialize.hpp"

using namespace nvinfer1;

cudnnStatus_t convert_trt2cudnn_dtype(nvinfer1::DataType trt_dtype,
                                      cudnnDataType_t* cudnn_dtype) {
  switch (trt_dtype) {
    case nvinfer1::DataType::kFLOAT:
      *cudnn_dtype = CUDNN_DATA_FLOAT;
      break;
    case nvinfer1::DataType::kHALF:
      *cudnn_dtype = CUDNN_DATA_HALF;
      break;
    default:
      return CUDNN_STATUS_BAD_PARAM;
  }
  return CUDNN_STATUS_SUCCESS;
}

namespace {
constexpr const char* PLUGIN_VERSION{"1"};
constexpr const char* PLUGIN_NAME{"MMCVInstanceNormalization"};
}  // namespace

PluginFieldCollection InstanceNormalizationDynamicCreator::mFC{};
std::vector<PluginField> InstanceNormalizationDynamicCreator::mPluginAttributes;

InstanceNormalizationDynamic::InstanceNormalizationDynamic(
    const std::string& name, float epsilon)
    : mLayerName(name), mEpsilon(epsilon) {}

InstanceNormalizationDynamic::InstanceNormalizationDynamic(
    const std::string& name, void const* serialData, size_t serialLength)
    : mLayerName(name) {
  deserialize_value(&serialData, &serialLength, &mEpsilon);
}

InstanceNormalizationDynamic::~InstanceNormalizationDynamic() {}

// InstanceNormalizationDynamic returns one output.
int InstanceNormalizationDynamic::getNbOutputs() const { return 1; }

DimsExprs InstanceNormalizationDynamic::getOutputDimensions(
    int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs,
    nvinfer1::IExprBuilder& exprBuilder) {
  nvinfer1::DimsExprs output(inputs[0]);
  return output;
}

int InstanceNormalizationDynamic::initialize() { return 0; }

void InstanceNormalizationDynamic::terminate() {}

size_t InstanceNormalizationDynamic::getWorkspaceSize(
    const nvinfer1::PluginTensorDesc* inputs, int nbInputs,
    const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const {
  int n = inputs[0].dims.d[0];
  int c = inputs[0].dims.d[1];
  int elem_size = mmcv::getElementSize(inputs[1].type);
  return mmcv::getAlignedSize(n * c * elem_size) * 2;
}

int InstanceNormalizationDynamic::enqueue(
    const nvinfer1::PluginTensorDesc* inputDesc,
    const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs,
    void* const* outputs, void* workspace, cudaStream_t stream) {
  nvinfer1::Dims input_dims = inputDesc[0].dims;
  int n = input_dims.d[0];
  int c = input_dims.d[1];
  int h = input_dims.d[2];
  int w = input_dims.nbDims > 3 ? input_dims.d[3] : 1;
  int elem_size = mmcv::getElementSize(inputDesc[1].type);

  void* n_scales = (void*)workspace;
  void* n_bias = (void*)(workspace + mmcv::getAlignedSize(n * c * elem_size));

  const void* scales = (const void*)inputs[1];
  const void* bias = (const void*)inputs[2];

  for (int i = 0; i < n; ++i) {
    cudaMemcpyAsync(n_scales + i * c * elem_size, scales, c * elem_size,
                    cudaMemcpyDeviceToDevice, stream);
    cudaMemcpyAsync(n_bias + i * c * elem_size, bias, c * elem_size,
                    cudaMemcpyDeviceToDevice, stream);
  }

  cudnnSetTensor4dDescriptor(_b_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1,
                             n * c, 1, 1);
  cudnnDataType_t cudnn_dtype{};
  convert_trt2cudnn_dtype(inputDesc[0].type, &cudnn_dtype);
  cudnnSetTensor4dDescriptor(_x_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, n * c,
                             h, w);
  cudnnSetTensor4dDescriptor(_y_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, n * c,
                             h, w);
  float alpha = 1;
  float beta = 0;
  void const* x_ptr = inputs[0];
  void* y_ptr = outputs[0];
  cudnnSetStream(_cudnn_handle, stream);
  // Note: Use of CUDNN_BATCHNORM_SPATIAL_PERSISTENT can cause numerical
  //       overflows (NaNs) for fp32 data in some circumstances. The lower-
  //       performance CUDNN_BATCHNORM_SPATIAL should be used if this is not
  //       acceptable.
  cudnnBatchNormalizationForwardTraining(
      _cudnn_handle, CUDNN_BATCHNORM_SPATIAL_PERSISTENT, &alpha, &beta, _x_desc,
      x_ptr, _y_desc, y_ptr, _b_desc, n_scales, n_bias, 1., nullptr, nullptr,
      mEpsilon, nullptr, nullptr);
  return 0;
}

size_t InstanceNormalizationDynamic::getSerializationSize() const {
  return serialized_size(mEpsilon);
}

void InstanceNormalizationDynamic::serialize(void* buffer) const {
  serialize_value(&buffer, mEpsilon);
}

bool InstanceNormalizationDynamic::supportsFormatCombination(
    int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs,
    int nbOutputs) {
  return ((inOut[pos].type == nvinfer1::DataType::kFLOAT ||
           inOut[pos].type == nvinfer1::DataType::kHALF) &&
          inOut[pos].format == nvinfer1::PluginFormat::kLINEAR &&
          inOut[pos].type == inOut[0].type);
}

const char* InstanceNormalizationDynamic::getPluginType() const {
  return PLUGIN_NAME;
}

const char* InstanceNormalizationDynamic::getPluginVersion() const {
  return PLUGIN_VERSION;
}

void InstanceNormalizationDynamic::destroy() { delete this; }

IPluginV2DynamicExt* InstanceNormalizationDynamic::clone() const {
  auto* plugin = new InstanceNormalizationDynamic{mLayerName, mEpsilon};
  plugin->setPluginNamespace(mPluginNamespace.c_str());
  return plugin;
}

// Set plugin namespace
void InstanceNormalizationDynamic::setPluginNamespace(
    const char* pluginNamespace) {
  mPluginNamespace = pluginNamespace;
}

const char* InstanceNormalizationDynamic::getPluginNamespace() const {
  return mPluginNamespace.c_str();
}

nvinfer1::DataType InstanceNormalizationDynamic::getOutputDataType(
    int index, const nvinfer1::DataType* inputTypes, int nbInputs) const {
  return inputTypes[0];
}

// Attach the plugin object to an execution context and grant the plugin the
// access to some context resource.
void InstanceNormalizationDynamic::attachToContext(
    cudnnContext* cudnnContext, cublasContext* cublasContext,
    IGpuAllocator* gpuAllocator) {
  _cudnn_handle = cudnnContext;
  cudnnCreateTensorDescriptor(&_b_desc);
  cudnnCreateTensorDescriptor(&_x_desc);
  cudnnCreateTensorDescriptor(&_y_desc);
}

// Detach the plugin object from its execution context.
void InstanceNormalizationDynamic::detachFromContext() {
  cudnnDestroyTensorDescriptor(_y_desc);
  cudnnDestroyTensorDescriptor(_x_desc);
  cudnnDestroyTensorDescriptor(_b_desc);
}

void InstanceNormalizationDynamic::configurePlugin(
    const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs,
    const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) {}

// InstanceNormalizationDynamicCreator methods
InstanceNormalizationDynamicCreator::InstanceNormalizationDynamicCreator() {
  mPluginAttributes.clear();
  mPluginAttributes.emplace_back(
      PluginField("epsilon", nullptr, PluginFieldType::kFLOAT32, 1));

  mFC.nbFields = mPluginAttributes.size();
  mFC.fields = mPluginAttributes.data();
}

const char* InstanceNormalizationDynamicCreator::getPluginName() const {
  return PLUGIN_NAME;
}

const char* InstanceNormalizationDynamicCreator::getPluginVersion() const {
  return PLUGIN_VERSION;
}

const PluginFieldCollection*
InstanceNormalizationDynamicCreator::getFieldNames() {
  return &mFC;
}

IPluginV2DynamicExt* InstanceNormalizationDynamicCreator::createPlugin(
    const char* name, const nvinfer1::PluginFieldCollection* fc) {
  float epsilon = 1e-5;
  const PluginField* fields = fc->fields;
  for (int i = 0; i < fc->nbFields; ++i) {
    const char* attrName = fields[i].name;
    if (!strcmp(attrName, "epsilon")) {
      epsilon = *(static_cast<const float*>(fields[i].data));
    }
  }

  InstanceNormalizationDynamic* obj =
      new InstanceNormalizationDynamic(name, epsilon);
  obj->setPluginNamespace(mNamespace.c_str());
  return obj;
}

IPluginV2DynamicExt* InstanceNormalizationDynamicCreator::deserializePlugin(
    const char* name, const void* serialData, size_t serialLength) {
  InstanceNormalizationDynamic* obj =
      new InstanceNormalizationDynamic{name, serialData, serialLength};
  obj->setPluginNamespace(mNamespace.c_str());
  return obj;
}

void InstanceNormalizationDynamicCreator::setPluginNamespace(
    const char* libNamespace) {
  mNamespace = libNamespace;
}

const char* InstanceNormalizationDynamicCreator::getPluginNamespace() const {
  return mNamespace.c_str();
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_modulated_deform_conv.hpp"

#include <assert.h>

#include <chrono>

#include "trt_serialize.hpp"

void ModulatedDeformConvForwardCUDAKernelLauncher_float(
    const float *input, const float *weight, const float *bias,
    const float *offset, const float *mask, float *output, void *workspace,
    int batch, int channels, int height, int width, int channels_out,
    int kernel_w, int kernel_h, int stride_w, int stride_h, int pad_w,
    int pad_h, int dilation_w, int dilation_h, int group, int deformable_group,
    int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream);

namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"MMCVModulatedDeformConv2d"};
}  // namespace

nvinfer1::PluginFieldCollection
    ModulatedDeformableConvPluginDynamicCreator::mFC{};
std::vector<nvinfer1::PluginField>
    ModulatedDeformableConvPluginDynamicCreator::mPluginAttributes;

ModulatedDeformableConvPluginDynamic::ModulatedDeformableConvPluginDynamic(
    const std::string &name, const nvinfer1::Dims stride,
    const nvinfer1::Dims padding, const nvinfer1::Dims dilation,
    const int deformableGroup, const int group)
    : mLayerName(name),
      mStride(stride),
      mPadding(padding),
      mDilation(dilation),
      mDeformableGroup(deformableGroup),
      mGroup(group) {
  mWithBias = false;
}

ModulatedDeformableConvPluginDynamic::ModulatedDeformableConvPluginDynamic(
    const std::string name, const void *data, size_t length)
    : mLayerName(name) {
  deserialize_value(&data, &length, &mStride);
  deserialize_value(&data, &length, &mPadding);
  deserialize_value(&data, &length, &mDilation);
  deserialize_value(&data, &length, &mDeformableGroup);
  deserialize_value(&data, &length, &mGroup);
  mWithBias = false;
}
ModulatedDeformableConvPluginDynamic::~ModulatedDeformableConvPluginDynamic() {}

nvinfer1::IPluginV2DynamicExt *ModulatedDeformableConvPluginDynamic::clone()
    const {
  ModulatedDeformableConvPluginDynamic *plugin =
      new ModulatedDeformableConvPluginDynamic(
          mLayerName, mStride, mPadding, mDilation, mDeformableGroup, mGroup);
  plugin->setPluginNamespace(getPluginNamespace());

  return plugin;
}

nvinfer1::DimsExprs ModulatedDeformableConvPluginDynamic::getOutputDimensions(
    int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
    nvinfer1::IExprBuilder &exprBuilder) {
  nvinfer1::DimsExprs ret;
  ret.nbDims = 4;
  ret.d[0] = inputs[0].d[0];
  ret.d[1] = inputs[3].d[0];

  ret.d[2] = inputs[1].d[2];
  ret.d[3] = inputs[1].d[3];

  return ret;
}

bool ModulatedDeformableConvPluginDynamic::supportsFormatCombination(
    int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs,
    int nbOutputs) {
  if (pos == 0) {
    return (inOut[pos].type == nvinfer1::DataType::kFLOAT &&
            inOut[pos].format == nvinfer1::TensorFormat::kLINEAR);

  } else {
    return inOut[pos].type == inOut[0].type &&
           inOut[pos].format == inOut[0].format;
  }
}

void ModulatedDeformableConvPluginDynamic::configurePlugin(
    const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {
  if (nbInputs == 5) {
    mWithBias = true;
  }
}

size_t ModulatedDeformableConvPluginDynamic::getWorkspaceSize(
    const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const {
  int sizeof_dtype = mmcv::getElementSize(outputs[0].type);

  int batch_size = inputs[0].dims.d[0];
  int nInputPlane = inputs[0].dims.d[1];
  int inputHeight = inputs[0].dims.d[2];
  int inputWidth = inputs[0].dims.d[3];

  int nOutputPlane = outputs[0].dims.d[1];
  int outputHeight = outputs[0].dims.d[2];
  int outputWidth = outputs[0].dims.d[3];

  int kW = inputs[3].dims.d[2];
  int kH = inputs[3].dims.d[3];
  int im2col_step = std::min(32, batch_size);

  size_t col_size = mmcv::getAlignedSize(nInputPlane * kW * kH * outputHeight *
                                         outputWidth * sizeof_dtype);

  return col_size;
}

int ModulatedDeformableConvPluginDynamic::enqueue(
    const nvinfer1::PluginTensorDesc *inputDesc,
    const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
    void *const *outputs, void *workSpace, cudaStream_t stream) {
  int batch = inputDesc[0].dims.d[0];
  int channels = inputDesc[0].dims.d[1];
  int height = inputDesc[0].dims.d[2];
  int width = inputDesc[0].dims.d[3];
  int channels_out = outputDesc[0].dims.d[1];
  int kernel_h = inputDesc[3].dims.d[2];
  int kernel_w = inputDesc[3].dims.d[3];

  const void *x = inputs[0];
  const void *offset = inputs[1];
  const void *mask = inputs[2];
  const void *weight = inputs[3];
  const void *bias = mWithBias ? inputs[4] : nullptr;
  void *output = outputs[0];
  int im2col_step = std::min(batch, 32);

  // TODO: add fp16 support
  auto data_type = inputDesc[0].type;
  switch (data_type) {
    case nvinfer1::DataType::kFLOAT:
      ModulatedDeformConvForwardCUDAKernelLauncher_float(
          (float *)x, (float *)weight, (float *)bias, (float *)offset,
          (float *)mask, (float *)output, workSpace, batch, channels, height,
          width, channels_out, kernel_w, kernel_h, mStride.d[0], mStride.d[1],
          mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], mGroup,
          mDeformableGroup, im2col_step, m_cublas_handle, stream);
      break;
    default:
      return 1;
      break;
  }

  return 0;
}

nvinfer1::DataType ModulatedDeformableConvPluginDynamic::getOutputDataType(
    int index, const nvinfer1::DataType *inputTypes, int nbInputs) const {
  return inputTypes[0];
}

// IPluginV2 Methods
const char *ModulatedDeformableConvPluginDynamic::getPluginType() const {
  return PLUGIN_NAME;
}

const char *ModulatedDeformableConvPluginDynamic::getPluginVersion() const {
  return PLUGIN_VERSION;
}

int ModulatedDeformableConvPluginDynamic::getNbOutputs() const { return 1; }

int ModulatedDeformableConvPluginDynamic::initialize() { return 0; }

void ModulatedDeformableConvPluginDynamic::terminate() {}

size_t ModulatedDeformableConvPluginDynamic::getSerializationSize() const {
  return sizeof(mStride) + sizeof(mPadding) + sizeof(mDilation) +
         sizeof(mDeformableGroup) + sizeof(mGroup);
}

void ModulatedDeformableConvPluginDynamic::serialize(void *buffer) const {
  serialize_value(&buffer, mStride);
  serialize_value(&buffer, mPadding);
  serialize_value(&buffer, mDilation);
  serialize_value(&buffer, mDeformableGroup);
  serialize_value(&buffer, mGroup);
}

void ModulatedDeformableConvPluginDynamic::destroy() {
  // This gets called when the network containing plugin is destroyed
  delete this;
}

void ModulatedDeformableConvPluginDynamic::attachToContext(
    cudnnContext *cudnnContext, cublasContext *cublasContext,
    nvinfer1::IGpuAllocator *gpuAllocator) {
  m_cublas_handle = cublasContext;
}

void ModulatedDeformableConvPluginDynamic::detachFromContext() {}

void ModulatedDeformableConvPluginDynamic::setPluginNamespace(
    const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *ModulatedDeformableConvPluginDynamic::getPluginNamespace() const {
  return mNamespace.c_str();
}

////////////////////// creator /////////////////////////////

ModulatedDeformableConvPluginDynamicCreator::
    ModulatedDeformableConvPluginDynamicCreator() {
  mPluginAttributes.emplace_back(nvinfer1::PluginField("stride"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("padding"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("dilation"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("groups"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("deform_groups"));
  mFC.nbFields = mPluginAttributes.size();
  mFC.fields = mPluginAttributes.data();
}

const char *ModulatedDeformableConvPluginDynamicCreator::getPluginName() const {
  return PLUGIN_NAME;
}

const char *ModulatedDeformableConvPluginDynamicCreator::getPluginVersion()
    const {
  return PLUGIN_VERSION;
}

const nvinfer1::PluginFieldCollection *
ModulatedDeformableConvPluginDynamicCreator::getFieldNames() {
  return &mFC;
}

nvinfer1::IPluginV2 *ModulatedDeformableConvPluginDynamicCreator::createPlugin(
    const char *name, const nvinfer1::PluginFieldCollection *fc) {
  nvinfer1::Dims stride{2, {1, 1}};
  nvinfer1::Dims padding{2, {0, 0}};
  nvinfer1::Dims dilation{2, {1, 1}};
  int deformableGroup = 1;
  int group = 1;

  for (int i = 0; i < fc->nbFields; i++) {
    if (fc->fields[i].data == nullptr) {
      continue;
    }
    std::string field_name(fc->fields[i].name);

    if (field_name.compare("deformable_group") == 0) {
      deformableGroup = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("group") == 0) {
      group = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("stride") == 0) {
      stride.nbDims = 2;
      stride.d[0] = static_cast<const int *>(fc->fields[i].data)[0];
      stride.d[1] = static_cast<const int *>(fc->fields[i].data)[1];
    }

    if (field_name.compare("padding") == 0) {
      padding.nbDims = 2;
      padding.d[0] = static_cast<const int *>(fc->fields[i].data)[0];
      padding.d[1] = static_cast<const int *>(fc->fields[i].data)[1];
    }

    if (field_name.compare("dilation") == 0) {
      dilation.nbDims = 2;
      dilation.d[0] = static_cast<const int *>(fc->fields[i].data)[0];
      dilation.d[1] = static_cast<const int *>(fc->fields[i].data)[1];
    }
  }

  ModulatedDeformableConvPluginDynamic *plugin =
      new ModulatedDeformableConvPluginDynamic(name, stride, padding, dilation,
                                               deformableGroup, group);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

nvinfer1::IPluginV2 *
ModulatedDeformableConvPluginDynamicCreator::deserializePlugin(
    const char *name, const void *serialData, size_t serialLength) {
  auto plugin =
      new ModulatedDeformableConvPluginDynamic(name, serialData, serialLength);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

void ModulatedDeformableConvPluginDynamicCreator::setPluginNamespace(
    const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *ModulatedDeformableConvPluginDynamicCreator::getPluginNamespace()
    const {
  return mNamespace.c_str();
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv_kernel.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <assert.h>
#include <cuda_fp16.h>

#include "common_cuda_helper.hpp"
#include "modulated_deform_conv_cuda_kernel.cuh"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"

template <typename T>
void trt_modulated_deformable_im2col(
    const T* data_im_, const T* data_offset_, const T* data_mask_,
    const int batch_size, const int channels, const int height_im,
    const int width_im, const int height_col, const int width_col,
    const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w,
    const int stride_h, const int stride_w, const int dilation_h,
    const int dilation_w, const int deformable_group, T* data_col_,
    cudaStream_t stream) {
  // num_axes should be smaller than block size
  const int channel_per_deformable_group = channels / deformable_group;
  const int num_kernels = channels * batch_size * height_col * width_col;

  modulated_deformable_im2col_gpu_kernel<T>
      <<<GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>(
          num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im,
          kernel_h, kenerl_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
          dilation_w, channel_per_deformable_group, batch_size, channels,
          deformable_group, height_col, width_col, data_col_);

  cudaCheckError();
}

template <typename scalar_t>
__global__ void output_add_bias_kernel(scalar_t* output, const scalar_t* bias,
                                       size_t step_batch, size_t step_channel,
                                       size_t n) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    output[index] += bias[(index % step_batch) / step_channel];
  }
}

template <typename scalar_t>
static void output_add_bias(scalar_t* output, const scalar_t* bias,
                            size_t batch, size_t channel, size_t height,
                            size_t width, cudaStream_t stream) {
  size_t step_channel = height * width;
  size_t step_batch = step_channel * channel;
  size_t n = step_batch * batch;
  output_add_bias_kernel<<<GET_BLOCKS(n), THREADS_PER_BLOCK, 0, stream>>>(
      output, bias, step_batch, step_channel, n);
}

template <typename scalar_t>
void ModulatedDeformConvForwardCUDAKernelLauncher(
    const scalar_t* input, const scalar_t* weight, const scalar_t* bias,
    const scalar_t* offset, const scalar_t* mask, scalar_t* output,
    void* workspace, int batch, int channels, int height, int width,
    int channels_out, int kernel_w, int kernel_h, int stride_w, int stride_h,
    int pad_w, int pad_h, int dilation_w, int dilation_h, int group,
    int deformable_group, int im2col_step, cublasHandle_t cublas_handle,
    cudaStream_t stream) {
  size_t sizeof_dtype = sizeof(scalar_t);
  bool with_bias = (bias != nullptr);

  im2col_step = std::min(int(batch), im2col_step);
  assert(batch % im2col_step == 0);
  const int channels_kernel = channels / group;

  const int height_out =
      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
  const int width_out =
      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;

  scalar_t* columns = (scalar_t*)workspace;

  const size_t input_step = channels * height * width;
  const size_t offset_step =
      deformable_group * kernel_h * kernel_w * 2 * height * width;
  const size_t mask_step =
      deformable_group * kernel_h * kernel_w * height * width;
  const size_t out_step = channels_out * height_out * width_out;
  const size_t out_group_step = out_step / group;
  const size_t col_g_step =
      channels * kernel_w * kernel_h / group * height_out * width_out;
  const size_t weight_g_step =
      channels_out / group * channels / group * kernel_h * kernel_w;

  const int m = channels_out / group;
  const int n = height_out * width_out;
  const int k = channels / group * kernel_h * kernel_w;
  scalar_t alpha = 1.;
  scalar_t beta = 0.;

  for (int b = 0; b < batch; b++) {
    const scalar_t* input_start = input + b * input_step;
    const scalar_t* offset_start = offset + b * offset_step;
    const scalar_t* mask_start = mask + b * mask_step;
    trt_modulated_deformable_im2col<scalar_t>(
        input_start, offset_start, mask_start, 1, channels, height, width,
        height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h,
        stride_w, dilation_h, dilation_w, deformable_group, columns, stream);

    for (int g = 0; g < group; g++) {
      const scalar_t* weight_start = weight + g * weight_g_step;
      scalar_t* col_start = columns + g * col_g_step;
      scalar_t* out_buffer_start = output + b * out_step + g * out_group_step;

      // cudaMemsetAsync(out_buffer_start, 0, 1, stream);
      cublasGemmWrap<scalar_t>(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k,
                               &alpha, col_start, n, weight_start, k, &beta,
                               out_buffer_start, n);
      cudaCheckError();
    }
  }

  if (with_bias) {
    output_add_bias<scalar_t>(output, bias, batch, channels_out, height_out,
                              width_out, stream);
  }
}

void ModulatedDeformConvForwardCUDAKernelLauncher_float(
    const float* input, const float* weight, const float* bias,
    const float* offset, const float* mask, float* output, void* workspace,
    int batch, int channels, int height, int width, int channels_out,
    int kernel_w, int kernel_h, int stride_w, int stride_h, int pad_w,
    int pad_h, int dilation_w, int dilation_h, int group, int deformable_group,
    int im2col_step, cublasHandle_t cublas_handle, cudaStream_t stream) {
  ModulatedDeformConvForwardCUDAKernelLauncher<float>(
      input, weight, bias, offset, mask, output, workspace, batch, channels,
      height, width, channels_out, kernel_w, kernel_h, stride_w, stride_h,
      pad_w, pad_h, dilation_w, dilation_h, group, deformable_group,
      im2col_step, cublas_handle, stream);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_nms.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_nms.hpp"

#include <assert.h>
#include <stdio.h>

#include <chrono>

#include "trt_serialize.hpp"

extern size_t get_onnxnms_workspace_size(
    size_t num_batches, size_t spatial_dimension, size_t num_classes,
    size_t boxes_word_size, int center_point_box, size_t output_length);

extern void TRTNMSCUDAKernelLauncher_float(
    const float *boxes, const float *scores,
    const int max_output_boxes_per_class, const float iou_threshold,
    const float score_threshold, const int offset, int *output,
    int center_point_box, int num_batches, int spatial_dimension,
    int num_classes, size_t output_length, void *workspace,
    cudaStream_t stream);

namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"NonMaxSuppression"};
}  // namespace

nvinfer1::PluginFieldCollection NonMaxSuppressionDynamicCreator::mFC{};
std::vector<nvinfer1::PluginField>
    NonMaxSuppressionDynamicCreator::mPluginAttributes;

NonMaxSuppressionDynamic::NonMaxSuppressionDynamic(
    const std::string &name, int centerPointBox, int maxOutputBoxesPerClass,
    float iouThreshold, float scoreThreshold, int offset)
    : mLayerName(name),
      mCenterPointBox(centerPointBox),
      mMaxOutputBoxesPerClass(maxOutputBoxesPerClass),
      mIouThreshold(iouThreshold),
      mScoreThreshold(scoreThreshold),
      mOffset(offset) {}

NonMaxSuppressionDynamic::NonMaxSuppressionDynamic(const std::string name,
                                                   const void *data,
                                                   size_t length)
    : mLayerName(name) {
  deserialize_value(&data, &length, &mCenterPointBox);
  deserialize_value(&data, &length, &mMaxOutputBoxesPerClass);
  deserialize_value(&data, &length, &mIouThreshold);
  deserialize_value(&data, &length, &mScoreThreshold);
  deserialize_value(&data, &length, &mOffset);
}

nvinfer1::IPluginV2DynamicExt *NonMaxSuppressionDynamic::clone() const {
  NonMaxSuppressionDynamic *plugin = new NonMaxSuppressionDynamic(
      mLayerName, mCenterPointBox, mMaxOutputBoxesPerClass, mIouThreshold,
      mScoreThreshold, mOffset);
  plugin->setPluginNamespace(getPluginNamespace());

  return plugin;
}

nvinfer1::DimsExprs NonMaxSuppressionDynamic::getOutputDimensions(
    int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
    nvinfer1::IExprBuilder &exprBuilder) {
  nvinfer1::DimsExprs ret;
  ret.nbDims = 2;
  auto num_batches = inputs[0].d[0];
  auto spatial_dimension = inputs[0].d[1];
  if (mMaxOutputBoxesPerClass > 0) {
    spatial_dimension = exprBuilder.operation(
        nvinfer1::DimensionOperation::kMIN, *spatial_dimension,
        *exprBuilder.constant(mMaxOutputBoxesPerClass));
  }
  auto num_classes = inputs[1].d[1];
  ret.d[0] = exprBuilder.operation(
      nvinfer1::DimensionOperation::kPROD, *num_batches,
      *exprBuilder.operation(nvinfer1::DimensionOperation::kPROD,
                             *spatial_dimension, *num_classes));
  ret.d[1] = exprBuilder.constant(3);

  return ret;
}

bool NonMaxSuppressionDynamic::supportsFormatCombination(
    int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs,
    int nbOutputs) {
  if (pos < nbInputs) {
    switch (pos) {
      case 0:
        // boxes
        return inOut[pos].type == nvinfer1::DataType::kFLOAT &&
               inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
      case 1:
        // scores
        return inOut[pos].type == nvinfer1::DataType::kFLOAT &&
               inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
      default:
        return true;
    }
  } else {
    switch (pos - nbInputs) {
      case 0:
        // selected_indices
        return inOut[pos].type == nvinfer1::DataType::kINT32 &&
               inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
      default:
        return true;
    }
  }
  return true;
}

void NonMaxSuppressionDynamic::configurePlugin(
    const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {}

size_t NonMaxSuppressionDynamic::getWorkspaceSize(
    const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const {
  size_t boxes_word_size = mmcv::getElementSize(inputs[0].type);
  size_t num_batches = inputs[0].dims.d[0];
  size_t spatial_dimension = inputs[0].dims.d[1];
  size_t num_classes = inputs[1].dims.d[1];
  size_t output_length = outputs[0].dims.d[0];

  return get_onnxnms_workspace_size(num_batches, spatial_dimension, num_classes,
                                    boxes_word_size, mCenterPointBox,
                                    output_length);
}

int NonMaxSuppressionDynamic::enqueue(
    const nvinfer1::PluginTensorDesc *inputDesc,
    const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
    void *const *outputs, void *workSpace, cudaStream_t stream) {
  int num_batches = inputDesc[0].dims.d[0];
  int spatial_dimension = inputDesc[0].dims.d[1];
  int num_classes = inputDesc[1].dims.d[1];
  int output_length = outputDesc[0].dims.d[0];

  const float *boxes = (const float *)inputs[0];
  const float *scores = (const float *)inputs[1];
  int *output = (int *)outputs[0];
  TRTNMSCUDAKernelLauncher_float(
      boxes, scores, mMaxOutputBoxesPerClass, mIouThreshold, mScoreThreshold,
      mOffset, output, mCenterPointBox, num_batches, spatial_dimension,
      num_classes, output_length, workSpace, stream);

  return 0;
}

nvinfer1::DataType NonMaxSuppressionDynamic::getOutputDataType(
    int index, const nvinfer1::DataType *inputTypes, int nbInputs) const {
  return nvinfer1::DataType::kINT32;
}

// IPluginV2 Methods
const char *NonMaxSuppressionDynamic::getPluginType() const {
  return PLUGIN_NAME;
}

const char *NonMaxSuppressionDynamic::getPluginVersion() const {
  return PLUGIN_VERSION;
}

int NonMaxSuppressionDynamic::getNbOutputs() const { return 1; }

int NonMaxSuppressionDynamic::initialize() { return 0; }

void NonMaxSuppressionDynamic::terminate() {}

size_t NonMaxSuppressionDynamic::getSerializationSize() const {
  return sizeof(mCenterPointBox) + sizeof(mMaxOutputBoxesPerClass) +
         sizeof(mIouThreshold) + sizeof(mScoreThreshold) + sizeof(mOffset);
}

void NonMaxSuppressionDynamic::serialize(void *buffer) const {
  serialize_value(&buffer, mCenterPointBox);
  serialize_value(&buffer, mMaxOutputBoxesPerClass);
  serialize_value(&buffer, mIouThreshold);
  serialize_value(&buffer, mScoreThreshold);
  serialize_value(&buffer, mOffset);
}

void NonMaxSuppressionDynamic::destroy() {
  // This gets called when the network containing plugin is destroyed
  delete this;
}

void NonMaxSuppressionDynamic::setPluginNamespace(const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *NonMaxSuppressionDynamic::getPluginNamespace() const {
  return mNamespace.c_str();
}

////////////////////// creator /////////////////////////////

NonMaxSuppressionDynamicCreator::NonMaxSuppressionDynamicCreator() {
  mPluginAttributes.clear();
  mPluginAttributes.emplace_back(nvinfer1::PluginField("center_point_box"));
  mPluginAttributes.emplace_back(
      nvinfer1::PluginField("max_output_boxes_per_class"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("iou_threshold"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("score_threshold"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("offset"));
  mFC.nbFields = mPluginAttributes.size();
  mFC.fields = mPluginAttributes.data();
}

const char *NonMaxSuppressionDynamicCreator::getPluginName() const {
  return PLUGIN_NAME;
}

const char *NonMaxSuppressionDynamicCreator::getPluginVersion() const {
  return PLUGIN_VERSION;
}

const nvinfer1::PluginFieldCollection *
NonMaxSuppressionDynamicCreator::getFieldNames() {
  return &mFC;
}

nvinfer1::IPluginV2 *NonMaxSuppressionDynamicCreator::createPlugin(
    const char *name, const nvinfer1::PluginFieldCollection *fc) {
  int centerPointBox = 0;
  int maxOutputBoxesPerClass = 0;
  float iouThreshold = 0.0f;
  float scoreThreshold = 0.0f;
  int offset = 0;

  for (int i = 0; i < fc->nbFields; i++) {
    if (fc->fields[i].data == nullptr) {
      continue;
    }
    std::string field_name(fc->fields[i].name);

    if (field_name.compare("center_point_box") == 0) {
      centerPointBox = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("max_output_boxes_per_class") == 0) {
      maxOutputBoxesPerClass = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("iou_threshold") == 0) {
      iouThreshold = static_cast<const float *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("score_threshold") == 0) {
      scoreThreshold = static_cast<const float *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("offset") == 0) {
      offset = static_cast<const int *>(fc->fields[i].data)[0];
    }
  }
  NonMaxSuppressionDynamic *plugin =
      new NonMaxSuppressionDynamic(name, centerPointBox, maxOutputBoxesPerClass,
                                   iouThreshold, scoreThreshold, offset);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

nvinfer1::IPluginV2 *NonMaxSuppressionDynamicCreator::deserializePlugin(
    const char *name, const void *serialData, size_t serialLength) {
  auto plugin = new NonMaxSuppressionDynamic(name, serialData, serialLength);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

void NonMaxSuppressionDynamicCreator::setPluginNamespace(
    const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *NonMaxSuppressionDynamicCreator::getPluginNamespace() const {
  return mNamespace.c_str();
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_nms_kernel.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <stdio.h>
#include <thrust/execution_policy.h>
#include <thrust/gather.h>
#include <thrust/sort.h>
#include <thrust/transform.h>

#include <chrono>
#include <thread>
#include <vector>

#include "common_cuda_helper.hpp"
#include "nms_cuda_kernel.cuh"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"

struct NMSBox {
  float box[4];
};

struct nms_centerwh2xyxy {
  __host__ __device__ NMSBox operator()(const NMSBox box) {
    NMSBox out;
    out.box[0] = box.box[0] - box.box[2] / 2.0f;
    out.box[1] = box.box[1] - box.box[3] / 2.0f;
    out.box[2] = box.box[0] + box.box[2] / 2.0f;
    out.box[3] = box.box[1] + box.box[3] / 2.0f;
    return out;
  }
};

struct nms_sbox_idle {
  const float* idle_box_;
  __host__ __device__ nms_sbox_idle(const float* idle_box) {
    idle_box_ = idle_box;
  }

  __host__ __device__ NMSBox operator()(const NMSBox box) {
    return {idle_box_[0], idle_box_[1], idle_box_[2], idle_box_[3]};
  }
};

struct nms_score_threshold {
  float score_threshold_;
  __host__ __device__ nms_score_threshold(const float score_threshold) {
    score_threshold_ = score_threshold;
  }

  __host__ __device__ bool operator()(const float score) {
    return score < score_threshold_;
  }
};

__global__ void nms_reindex_kernel(int n, int* output, int* index_cache) {
  CUDA_1D_KERNEL_LOOP(index, n) {
    const int old_index = output[index * 3 + 2];
    output[index * 3 + 2] = index_cache[old_index];
  }
}

__global__ void mask_to_output_kernel(const unsigned long long* dev_mask,
                                      const int* index, int* output,
                                      int* output_count, int batch_id,
                                      int cls_id, int spatial_dimension,
                                      int col_blocks,
                                      int max_output_boxes_per_class) {
  extern __shared__ unsigned long long remv[];

  // fill remv with 0
  CUDA_1D_KERNEL_LOOP(i, col_blocks) { remv[i] = 0; }
  __syncthreads();

  int start = *output_count;
  int out_per_class_count = 0;
  for (int i = 0; i < spatial_dimension; i++) {
    const int nblock = i / threadsPerBlock;
    const int inblock = i % threadsPerBlock;
    if (!(remv[nblock] & (1ULL << inblock))) {
      if (threadIdx.x == 0) {
        output[start * 3 + 0] = batch_id;
        output[start * 3 + 1] = cls_id;
        output[start * 3 + 2] = index[i];
        start += 1;
      }
      out_per_class_count += 1;
      if (out_per_class_count >= max_output_boxes_per_class) {
        break;
      }
      __syncthreads();
      // set every overlap box with bit 1 in remv
      const unsigned long long* p = dev_mask + i * col_blocks;
      CUDA_1D_KERNEL_LOOP(j, col_blocks) {
        if (j >= nblock) {
          remv[j] |= p[j];
        }
      }  // j
      __syncthreads();
    }
  }  // i
  if (threadIdx.x == 0) {
    *output_count = start;
  }
}

size_t get_onnxnms_workspace_size(size_t num_batches, size_t spatial_dimension,
                                  size_t num_classes, size_t boxes_word_size,
                                  int center_point_box, size_t output_length) {
  size_t boxes_xyxy_workspace = 0;
  if (center_point_box == 1) {
    boxes_xyxy_workspace = mmcv::getAlignedSize(
        num_batches * spatial_dimension * 4 * boxes_word_size);
  }
  size_t scores_workspace =
      mmcv::getAlignedSize(spatial_dimension * boxes_word_size);
  size_t boxes_workspace =
      mmcv::getAlignedSize(spatial_dimension * 4 * boxes_word_size);
  const int col_blocks =
      (spatial_dimension + threadsPerBlock - 1) / threadsPerBlock;
  size_t mask_workspace = mmcv::getAlignedSize(spatial_dimension * col_blocks *
                                               sizeof(unsigned long long));
  size_t index_template_workspace =
      mmcv::getAlignedSize(spatial_dimension * sizeof(int));
  size_t index_workspace =
      mmcv::getAlignedSize(spatial_dimension * sizeof(int));
  size_t count_workspace = mmcv::getAlignedSize(sizeof(int));
  return scores_workspace + boxes_xyxy_workspace + boxes_workspace +
         mask_workspace + index_template_workspace + index_workspace +
         count_workspace;
}

/**
 * Launch the NonMaxSuppression kernel
 *
 * The NMS will be performed on each batch/class, share the kernel implement
 * `nms_cuda`. For each batch/class, the `boxes_sorted` and `index_cache` will
 * be sorted by scores, boxes_sorted will be used in `nms_cuda` kernel. After
 * that, the output would be generated by `mask_to_output_kernel` with
 * `dev_mask` and `sorted_cache`.
 *
 * @param[in] bboxes with shape [num_batch, spatial_dimension, 4], input boxes
 * @param[in] scores with shape [num_batch, num_classes, spatial_dimension],
 *     input scores
 * @param[in] max_output_boxes_per_class max output boxes per class
 * @param[in] iou_threshold threshold of iou
 * @param[in] score_threshold threshold of scores
 * @param[in] offset box offset, only 0 or 1 is valid
 * @param[out] output with shape [output_length, 3], each row contain index
 *     (batch_id, class_id, boxes_id), filling -1 if result is not valid.
 * @param[in] center_point_box 0 if boxes is [left, top, right, bottom] 1 if
 *     boxes is [center_x, center_y, width, height]
 * @param[in] num_batches batch size of boxes and scores
 * @param[in] spatial_dimension boxes numbers each batch
 * @param[in] num_classes class numbers
 * @param[in] output_length the max output rows
 * @param[in] workspace memory for all temporary variables.
 * @param[in] stream cuda stream
 */
void TRTNMSCUDAKernelLauncher_float(const float* boxes, const float* scores,
                                    const int max_output_boxes_per_class,
                                    const float iou_threshold,
                                    const float score_threshold,
                                    const int offset, int* output,
                                    int center_point_box, int num_batches,
                                    int spatial_dimension, int num_classes,
                                    size_t output_length, void* workspace,
                                    cudaStream_t stream) {
  const int col_blocks =
      (spatial_dimension + threadsPerBlock - 1) / threadsPerBlock;
  float* boxes_sorted = (float*)workspace;
  workspace = static_cast<char*>(workspace) +
              mmcv::getAlignedSize(spatial_dimension * 4 * sizeof(float));

  float* boxes_xyxy = nullptr;
  if (center_point_box == 1) {
    boxes_xyxy = (float*)workspace;
    workspace = static_cast<char*>(workspace) +
                mmcv::getAlignedSize(num_batches * spatial_dimension * 4 *
                                     sizeof(float));
    thrust::transform(thrust::cuda::par.on(stream), (NMSBox*)boxes,
                      (NMSBox*)(boxes + num_batches * spatial_dimension * 4),
                      (NMSBox*)boxes_xyxy, nms_centerwh2xyxy());
    cudaCheckError();
  }

  float* scores_sorted = (float*)workspace;
  workspace = static_cast<char*>(workspace) +
              mmcv::getAlignedSize(spatial_dimension * sizeof(float));

  unsigned long long* dev_mask = (unsigned long long*)workspace;
  workspace = static_cast<char*>(workspace) +
              mmcv::getAlignedSize(spatial_dimension * col_blocks *
                                   sizeof(unsigned long long));

  int* index_cache = (int*)workspace;
  workspace = static_cast<char*>(workspace) +
              mmcv::getAlignedSize(spatial_dimension * sizeof(int));

  // generate sequence [0,1,2,3,4 ....]
  int* index_template = (int*)workspace;
  workspace = static_cast<char*>(workspace) +
              mmcv::getAlignedSize(spatial_dimension * sizeof(int));
  thrust::sequence(thrust::cuda::par.on(stream), index_template,
                   index_template + spatial_dimension, 0);

  int max_output_boxes_per_class_cpu = max_output_boxes_per_class;
  if (max_output_boxes_per_class_cpu <= 0) {
    max_output_boxes_per_class_cpu = spatial_dimension;
  }

  int* output_count = (int*)workspace;
  workspace = static_cast<char*>(workspace) + mmcv::getAlignedSize(sizeof(int));
  cudaMemsetAsync(output_count, 0, sizeof(int), stream);

  // fill output with -1
  thrust::fill(thrust::cuda::par.on(stream), output, output + output_length * 3,
               -1);
  cudaCheckError();

  dim3 blocks(col_blocks, col_blocks);
  dim3 threads(threadsPerBlock);

  for (int batch_id = 0; batch_id < num_batches; ++batch_id) {
    for (int cls_id = 0; cls_id < num_classes; ++cls_id) {
      const int batch_cls_id = batch_id * num_classes + cls_id;

      // sort boxes by score
      cudaMemcpyAsync(scores_sorted, scores + batch_cls_id * spatial_dimension,
                      spatial_dimension * sizeof(float),
                      cudaMemcpyDeviceToDevice, stream);
      cudaCheckError();

      cudaMemcpyAsync(index_cache, index_template,
                      spatial_dimension * sizeof(int), cudaMemcpyDeviceToDevice,
                      stream);
      cudaCheckError();

      thrust::sort_by_key(thrust::cuda::par.on(stream), scores_sorted,
                          scores_sorted + spatial_dimension, index_cache,
                          thrust::greater<float>());

      if (center_point_box == 1) {
        thrust::gather(thrust::cuda::par.on(stream), index_cache,
                       index_cache + spatial_dimension,
                       (NMSBox*)(boxes_xyxy + batch_id * spatial_dimension * 4),
                       (NMSBox*)boxes_sorted);
      } else {
        thrust::gather(thrust::cuda::par.on(stream), index_cache,
                       index_cache + spatial_dimension,
                       (NMSBox*)(boxes + batch_id * spatial_dimension * 4),
                       (NMSBox*)boxes_sorted);
      }

      cudaCheckError();

      if (score_threshold > 0.0f) {
        thrust::transform_if(
            thrust::cuda::par.on(stream), (NMSBox*)boxes_sorted,
            (NMSBox*)(boxes_sorted + spatial_dimension * 4), scores_sorted,
            (NMSBox*)boxes_sorted, nms_sbox_idle(boxes_sorted),
            nms_score_threshold(score_threshold));
      }

      nms_cuda<<<blocks, threads, 0, stream>>>(spatial_dimension, iou_threshold,
                                               offset, boxes_sorted, dev_mask);

      // will be performed when dev_mask is full.
      mask_to_output_kernel<<<1, threadsPerBlock,
                              col_blocks * sizeof(unsigned long long),
                              stream>>>(
          dev_mask, index_cache, output, output_count, batch_id, cls_id,
          spatial_dimension, col_blocks, max_output_boxes_per_class_cpu);
    }  // cls_id
  }    // batch_id
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_plugin.hpp"

#include "trt_corner_pool.hpp"
#include "trt_cummaxmin.hpp"
#include "trt_deform_conv.hpp"
#include "trt_grid_sampler.hpp"
#include "trt_instance_norm.hpp"
#include "trt_modulated_deform_conv.hpp"
#include "trt_nms.hpp"
#include "trt_roi_align.hpp"
#include "trt_scatternd.hpp"

REGISTER_TENSORRT_PLUGIN(CumMaxPluginDynamicCreator);
REGISTER_TENSORRT_PLUGIN(CumMinPluginDynamicCreator);
REGISTER_TENSORRT_PLUGIN(GridSamplerDynamicCreator);
REGISTER_TENSORRT_PLUGIN(DeformableConvPluginDynamicCreator);
REGISTER_TENSORRT_PLUGIN(ModulatedDeformableConvPluginDynamicCreator);
REGISTER_TENSORRT_PLUGIN(NonMaxSuppressionDynamicCreator);
REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator);
REGISTER_TENSORRT_PLUGIN(ONNXScatterNDDynamicCreator);
REGISTER_TENSORRT_PLUGIN(InstanceNormalizationDynamicCreator);
REGISTER_TENSORRT_PLUGIN(CornerPoolPluginDynamicCreator);

extern "C" {
bool initLibMMCVInferPlugins() { return true; }
}  // extern "C"


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_roi_align.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_roi_align.hpp"

#include <assert.h>

#include <chrono>

#include "trt_serialize.hpp"

extern void TRTRoIAlignForwardCUDAKernelLauncher_float(
    const float *input, const float *rois, float *output, float *argmax_y,
    float *argmax_x, int output_size, int channels, int height, int width,
    int aligned_height, int aligned_width, float spatial_scale,
    int sampling_ratio, int pool_mode, bool aligned, cudaStream_t stream);

namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"MMCVRoiAlign"};
}  // namespace

nvinfer1::PluginFieldCollection RoIAlignPluginDynamicCreator::mFC{};
std::vector<nvinfer1::PluginField>
    RoIAlignPluginDynamicCreator::mPluginAttributes;

RoIAlignPluginDynamic::RoIAlignPluginDynamic(const std::string &name,
                                             int outWidth, int outHeight,
                                             float spatialScale,
                                             int sampleRatio, int poolMode,
                                             bool aligned)
    : mLayerName(name),
      mOutWidth(outWidth),
      mOutHeight(outHeight),
      mSpatialScale(spatialScale),
      mSampleRatio(sampleRatio),
      mPoolMode(poolMode),
      mAligned(aligned) {}

RoIAlignPluginDynamic::RoIAlignPluginDynamic(const std::string name,
                                             const void *data, size_t length)
    : mLayerName(name) {
  deserialize_value(&data, &length, &mOutWidth);
  deserialize_value(&data, &length, &mOutHeight);
  deserialize_value(&data, &length, &mSpatialScale);
  deserialize_value(&data, &length, &mSampleRatio);
  deserialize_value(&data, &length, &mPoolMode);
  deserialize_value(&data, &length, &mAligned);
}

nvinfer1::IPluginV2DynamicExt *RoIAlignPluginDynamic::clone() const {
  RoIAlignPluginDynamic *plugin = new RoIAlignPluginDynamic(
      mLayerName, mOutWidth, mOutHeight, mSpatialScale, mSampleRatio, mPoolMode,
      mAligned);
  plugin->setPluginNamespace(getPluginNamespace());

  return plugin;
}

nvinfer1::DimsExprs RoIAlignPluginDynamic::getOutputDimensions(
    int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
    nvinfer1::IExprBuilder &exprBuilder) {
  nvinfer1::DimsExprs ret;
  ret.nbDims = 4;
  ret.d[0] = inputs[1].d[0];
  ret.d[1] = inputs[0].d[1];
  ret.d[2] = exprBuilder.constant(mOutHeight);
  ret.d[3] = exprBuilder.constant(mOutWidth);

  return ret;
}

bool RoIAlignPluginDynamic::supportsFormatCombination(
    int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs,
    int nbOutputs) {
  return inOut[pos].type == nvinfer1::DataType::kFLOAT &&
         inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
}

void RoIAlignPluginDynamic::configurePlugin(
    const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {}

size_t RoIAlignPluginDynamic::getWorkspaceSize(
    const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const {
  size_t output_size = 0;
  size_t word_size = 0;
  switch (mPoolMode) {
    case 0:  // max
      output_size = outputs[0].dims.d[0] * outputs[0].dims.d[1] *
                    outputs[0].dims.d[2] * outputs[0].dims.d[3];
      word_size = mmcv::getElementSize(outputs[0].type);
      return output_size * word_size * 2;
      break;
    case 1:
      return 0;
      break;
    default:
      return 0;
  }
  return 0;
}

int RoIAlignPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
                                   const nvinfer1::PluginTensorDesc *outputDesc,
                                   const void *const *inputs,
                                   void *const *outputs, void *workSpace,
                                   cudaStream_t stream) {
  int channels = inputDesc[0].dims.d[1];
  int height = inputDesc[0].dims.d[2];
  int width = inputDesc[0].dims.d[3];

  int output_size = outputDesc[0].dims.d[0] * outputDesc[0].dims.d[1] *
                    outputDesc[0].dims.d[2] * outputDesc[0].dims.d[3];
  int word_size = mmcv::getElementSize(outputDesc[0].type);

  const void *feat = inputs[0];
  const void *rois = inputs[1];
  void *output = outputs[0];
  void *argmax_y = nullptr;
  void *argmax_x = nullptr;

  switch (mPoolMode) {
    case 0:  // max
      argmax_y = workSpace;
      argmax_x = argmax_y + output_size * word_size;
      break;
    case 1:  // avg
      break;
  }

  switch (outputDesc[0].type) {
    case nvinfer1::DataType::kFLOAT:
      TRTRoIAlignForwardCUDAKernelLauncher_float(
          (const float *)feat, (const float *)rois, (float *)output,
          (float *)argmax_y, (float *)argmax_x, output_size, channels, height,
          width, mOutHeight, mOutWidth, mSpatialScale, mSampleRatio, mPoolMode,
          mAligned, stream);
      break;

    default:
      break;
  }

  return 0;
}

nvinfer1::DataType RoIAlignPluginDynamic::getOutputDataType(
    int index, const nvinfer1::DataType *inputTypes, int nbInputs) const {
  return inputTypes[0];
}

// IPluginV2 Methods
const char *RoIAlignPluginDynamic::getPluginType() const { return PLUGIN_NAME; }

const char *RoIAlignPluginDynamic::getPluginVersion() const {
  return PLUGIN_VERSION;
}

int RoIAlignPluginDynamic::getNbOutputs() const { return 1; }

int RoIAlignPluginDynamic::initialize() { return 0; }

void RoIAlignPluginDynamic::terminate() {}

size_t RoIAlignPluginDynamic::getSerializationSize() const {
  return sizeof(mOutWidth) + sizeof(mOutHeight) + sizeof(mSpatialScale) +
         sizeof(mSampleRatio) + sizeof(mPoolMode) + sizeof(mAligned);
}

void RoIAlignPluginDynamic::serialize(void *buffer) const {
  serialize_value(&buffer, mOutWidth);
  serialize_value(&buffer, mOutHeight);
  serialize_value(&buffer, mSpatialScale);
  serialize_value(&buffer, mSampleRatio);
  serialize_value(&buffer, mPoolMode);
  serialize_value(&buffer, mAligned);
}

void RoIAlignPluginDynamic::destroy() {
  // This gets called when the network containing plugin is destroyed
  delete this;
}

void RoIAlignPluginDynamic::setPluginNamespace(const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *RoIAlignPluginDynamic::getPluginNamespace() const {
  return mNamespace.c_str();
}

////////////////////// creator /////////////////////////////

RoIAlignPluginDynamicCreator::RoIAlignPluginDynamicCreator() {
  mPluginAttributes.emplace_back(nvinfer1::PluginField("output_height"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("output_width"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("spatial_scale"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("sampling_ratio"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("mode"));
  mPluginAttributes.emplace_back(nvinfer1::PluginField("aligned"));
  mFC.nbFields = mPluginAttributes.size();
  mFC.fields = mPluginAttributes.data();
}

const char *RoIAlignPluginDynamicCreator::getPluginName() const {
  return PLUGIN_NAME;
}

const char *RoIAlignPluginDynamicCreator::getPluginVersion() const {
  return PLUGIN_VERSION;
}

const nvinfer1::PluginFieldCollection *
RoIAlignPluginDynamicCreator::getFieldNames() {
  return &mFC;
}

nvinfer1::IPluginV2 *RoIAlignPluginDynamicCreator::createPlugin(
    const char *name, const nvinfer1::PluginFieldCollection *fc) {
  int outWidth = 7;
  int outHeight = 7;
  float spatialScale = 1.0;
  int sampleRatio = 0;
  int poolMode = -1;
  bool aligned = true;
  for (int i = 0; i < fc->nbFields; i++) {
    if (fc->fields[i].data == nullptr) {
      continue;
    }
    std::string field_name(fc->fields[i].name);

    if (field_name.compare("output_height") == 0) {
      outHeight = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("output_width") == 0) {
      outWidth = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("spatial_scale") == 0) {
      spatialScale = static_cast<const float *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("sampling_ratio") == 0) {
      sampleRatio = static_cast<const int *>(fc->fields[i].data)[0];
    }

    if (field_name.compare("mode") == 0) {
      int data_size = fc->fields[i].length;
      const char *data_start = static_cast<const char *>(fc->fields[i].data);
      std::string poolModeStr(data_start, data_size);
      if (poolModeStr == "avg") {
        poolMode = 1;
      } else if (poolModeStr == "max") {
        poolMode = 0;
      } else {
        std::cout << "Unknown pool mode \"" << poolModeStr << "\"."
                  << std::endl;
      }
      assert(poolMode >= 0);
    }

    if (field_name.compare("aligned") == 0) {
      int aligned_int = static_cast<const int *>(fc->fields[i].data)[0];
      aligned = aligned_int != 0;
    }
  }

  assert(outHeight > 0);
  assert(outWidth > 0);
  assert(spatialScale > 0.);
  assert(poolMode >= 0);

  RoIAlignPluginDynamic *plugin = new RoIAlignPluginDynamic(
      name, outWidth, outHeight, spatialScale, sampleRatio, poolMode, aligned);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

nvinfer1::IPluginV2 *RoIAlignPluginDynamicCreator::deserializePlugin(
    const char *name, const void *serialData, size_t serialLength) {
  auto plugin = new RoIAlignPluginDynamic(name, serialData, serialLength);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

void RoIAlignPluginDynamicCreator::setPluginNamespace(
    const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *RoIAlignPluginDynamicCreator::getPluginNamespace() const {
  return mNamespace.c_str();
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_roi_align_kernel.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "common_cuda_helper.hpp"
#include "roi_align_cuda_kernel.cuh"

template <typename scalar_t>
void TRTRoIAlignForwardCUDAKernelLauncher(
    const scalar_t* input, const scalar_t* rois, scalar_t* output,
    scalar_t* argmax_y, scalar_t* argmax_x, int output_size, int channels,
    int height, int width, int aligned_height, int aligned_width,
    scalar_t spatial_scale, int sampling_ratio, int pool_mode, bool aligned,
    cudaStream_t stream) {
  roi_align_forward_cuda_kernel<scalar_t>
      <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
          output_size, input, rois, output, argmax_y, argmax_x, aligned_height,
          aligned_width, static_cast<scalar_t>(spatial_scale), sampling_ratio,
          pool_mode, aligned, channels, height, width);
}

void TRTRoIAlignForwardCUDAKernelLauncher_float(
    const float* input, const float* rois, float* output, float* argmax_y,
    float* argmax_x, int output_size, int channels, int height, int width,
    int aligned_height, int aligned_width, float spatial_scale,
    int sampling_ratio, int pool_mode, bool aligned, cudaStream_t stream) {
  TRTRoIAlignForwardCUDAKernelLauncher<float>(
      input, rois, output, argmax_y, argmax_x, output_size, channels, height,
      width, aligned_height, aligned_width, spatial_scale, sampling_ratio,
      pool_mode, aligned, stream);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_scatternd.cpp
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_scatternd.hpp"

#include <assert.h>
#include <stdio.h>

#include <chrono>

#include "trt_serialize.hpp"

extern void TRTONNXScatterNDKernelLauncher_float(
    const float *data, const int *indices, const float *update, const int *dims,
    int nbDims, const int *indices_dims, int indice_nbDims, float *output,
    cudaStream_t stream);

extern void TRTONNXScatterNDKernelLauncher_int32(
    const int *data, const int *indices, const int *update, const int *dims,
    int nbDims, const int *indices_dims, int indice_nbDims, int *output,
    cudaStream_t stream);

namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"ScatterND"};
}  // namespace

nvinfer1::PluginFieldCollection ONNXScatterNDDynamicCreator::mFC{};
std::vector<nvinfer1::PluginField>
    ONNXScatterNDDynamicCreator::mPluginAttributes;

ONNXScatterNDDynamic::ONNXScatterNDDynamic(const std::string &name)
    : mLayerName(name) {}

ONNXScatterNDDynamic::ONNXScatterNDDynamic(const std::string name,
                                           const void *data, size_t length)
    : mLayerName(name) {}

nvinfer1::IPluginV2DynamicExt *ONNXScatterNDDynamic::clone() const {
  ONNXScatterNDDynamic *plugin = new ONNXScatterNDDynamic(mLayerName);
  plugin->setPluginNamespace(getPluginNamespace());

  return plugin;
}

nvinfer1::DimsExprs ONNXScatterNDDynamic::getOutputDimensions(
    int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
    nvinfer1::IExprBuilder &exprBuilder) {
  return inputs[0];
}

bool ONNXScatterNDDynamic::supportsFormatCombination(
    int pos, const nvinfer1::PluginTensorDesc *inOut, int nbInputs,
    int nbOutputs) {
  if (pos < nbInputs) {
    switch (pos) {
      case 0:
        // data
        return (inOut[pos].type == nvinfer1::DataType::kFLOAT &&
                inOut[pos].format == nvinfer1::TensorFormat::kLINEAR) ||
               (inOut[pos].type == nvinfer1::DataType::kINT32 &&
                inOut[pos].format == nvinfer1::TensorFormat::kLINEAR);
      case 1:
        // indices
        return inOut[pos].type == nvinfer1::DataType::kINT32 &&
               inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
      case 2:
        // updates
        return inOut[pos].type == inOut[0].type &&
               inOut[pos].format == inOut[0].format;
      default:
        return true;
    }
  } else {
    switch (pos - nbInputs) {
      case 0:
        // output
        return inOut[pos].type == inOut[0].type &&
               inOut[pos].format == inOut[0].format;
      default:
        return true;
    }
  }
  return true;
}

void ONNXScatterNDDynamic::configurePlugin(
    const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::DynamicPluginTensorDesc *outputs, int nbOutputs) {}

size_t ONNXScatterNDDynamic::getWorkspaceSize(
    const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
    const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const {
  return 0;
}

int ONNXScatterNDDynamic::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
                                  const nvinfer1::PluginTensorDesc *outputDesc,
                                  const void *const *inputs,
                                  void *const *outputs, void *workSpace,
                                  cudaStream_t stream) {
  const int *dims = &(inputDesc[0].dims.d[0]);
  const int *indices_dims = &(inputDesc[1].dims.d[0]);
  int nbDims = inputDesc[0].dims.nbDims;
  int indice_nbDims = inputDesc[1].dims.nbDims;

  const void *data = inputs[0];
  const void *indices = inputs[1];
  const void *update = inputs[2];
  void *output = outputs[0];

  auto data_type = inputDesc[0].type;

  switch (data_type) {
    case nvinfer1::DataType::kFLOAT:
      TRTONNXScatterNDKernelLauncher_float(
          (float *)data, (int *)indices, (float *)update, dims, nbDims,
          indices_dims, indice_nbDims, (float *)output, stream);
      break;

    case nvinfer1::DataType::kINT32:
      TRTONNXScatterNDKernelLauncher_int32(
          (int *)data, (int *)indices, (int *)update, dims, nbDims,
          indices_dims, indice_nbDims, (int *)output, stream);
      break;
    default:
      break;
  }

  return 0;
}

nvinfer1::DataType ONNXScatterNDDynamic::getOutputDataType(
    int index, const nvinfer1::DataType *inputTypes, int nbInputs) const {
  return inputTypes[0];
}

// IPluginV2 Methods
const char *ONNXScatterNDDynamic::getPluginType() const { return PLUGIN_NAME; }

const char *ONNXScatterNDDynamic::getPluginVersion() const {
  return PLUGIN_VERSION;
}

int ONNXScatterNDDynamic::getNbOutputs() const { return 1; }

int ONNXScatterNDDynamic::initialize() { return 0; }

void ONNXScatterNDDynamic::terminate() {}

size_t ONNXScatterNDDynamic::getSerializationSize() const { return 0; }

void ONNXScatterNDDynamic::serialize(void *buffer) const {}

void ONNXScatterNDDynamic::destroy() {
  // This gets called when the network containing plugin is destroyed
  delete this;
}

void ONNXScatterNDDynamic::setPluginNamespace(const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *ONNXScatterNDDynamic::getPluginNamespace() const {
  return mNamespace.c_str();
}

////////////////////// creator /////////////////////////////

ONNXScatterNDDynamicCreator::ONNXScatterNDDynamicCreator() {
  mPluginAttributes.clear();
  mFC.nbFields = mPluginAttributes.size();
  mFC.fields = mPluginAttributes.data();
}

const char *ONNXScatterNDDynamicCreator::getPluginName() const {
  return PLUGIN_NAME;
}

const char *ONNXScatterNDDynamicCreator::getPluginVersion() const {
  return PLUGIN_VERSION;
}

const nvinfer1::PluginFieldCollection *
ONNXScatterNDDynamicCreator::getFieldNames() {
  return &mFC;
}

nvinfer1::IPluginV2 *ONNXScatterNDDynamicCreator::createPlugin(
    const char *name, const nvinfer1::PluginFieldCollection *fc) {
  ONNXScatterNDDynamic *plugin = new ONNXScatterNDDynamic(name);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

nvinfer1::IPluginV2 *ONNXScatterNDDynamicCreator::deserializePlugin(
    const char *name, const void *serialData, size_t serialLength) {
  auto plugin = new ONNXScatterNDDynamic(name, serialData, serialLength);
  plugin->setPluginNamespace(getPluginNamespace());
  return plugin;
}

void ONNXScatterNDDynamicCreator::setPluginNamespace(const char *libNamespace) {
  mNamespace = libNamespace;
}

const char *ONNXScatterNDDynamicCreator::getPluginNamespace() const {
  return mNamespace.c_str();
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/plugins/trt_scatternd_kernel.cu
================================================
// Copyright (c) OpenMMLab. All rights reserved
#include <stdio.h>

#include <vector>

#include "common_cuda_helper.hpp"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"

static int const threadsPerBlock = sizeof(unsigned long long int) * 8;

using mmcv::TensorDesc;

template <typename T>
__global__ void onnx_scatternd_kernel(const int n, const int* indices,
                                      const T* update, T* output,
                                      TensorDesc tensor_desc,
                                      TensorDesc indice_desc) {
  const int indice_cols = indice_desc.shape[indice_desc.dim - 1];
  const int copy_stride = tensor_desc.stride[indice_cols - 1];
  const int* stride = &(tensor_desc.stride[0]);
  CUDA_1D_KERNEL_LOOP(index, n) {
    int output_offset = 0;
    const int* indices_current = indices + index * indice_cols;
    for (int i = 0; i < indice_cols; ++i) {
      output_offset += stride[i] * indices_current[i];
    }
    memcpy(output + output_offset, update + index * copy_stride,
           copy_stride * sizeof(T));
  }
}

template <typename T>
void TRTONNXScatterNDKernelLauncher(const T* data, const int* indices,
                                    const T* update, const int* dims,
                                    int nbDims, const int* indices_dims,
                                    int indice_nbDims, T* output,
                                    cudaStream_t stream) {
  // fill tensordesc and initial
  TensorDesc tensor_desc;
  memset((void*)&tensor_desc, 0, sizeof(TensorDesc));
  tensor_desc.dim = nbDims;
  tensor_desc.shape[nbDims - 1] = dims[nbDims - 1];
  tensor_desc.stride[nbDims - 1] = 1;
  for (int i = nbDims - 2; i >= 0; --i) {
    tensor_desc.shape[i] = dims[i];
    tensor_desc.stride[i] = dims[i + 1] * tensor_desc.stride[i + 1];
  }
  const int data_size = tensor_desc.stride[0] * tensor_desc.shape[0];

  TensorDesc indice_desc;
  memset((void*)&indice_desc, 0, sizeof(TensorDesc));
  indice_desc.dim = indice_nbDims;
  indice_desc.shape[indice_nbDims - 1] = indices_dims[indice_nbDims - 1];
  indice_desc.stride[indice_nbDims - 1] = 1;
  for (int i = indice_nbDims - 2; i >= 0; --i) {
    indice_desc.shape[i] = indices_dims[i];
    indice_desc.stride[i] = indices_dims[i + 1] * indice_desc.stride[i + 1];
  }

  // output = np.copy(data)
  cudaMemcpyAsync(output, data, data_size * sizeof(T),
                  cudaMemcpyDeviceToDevice);

  int num_update_indice = 1;
  for (int i = 0; i < indice_nbDims - 1; ++i) {
    num_update_indice *= indice_desc.shape[i];
  }
  // scatter
  const int col_block = GET_BLOCKS(num_update_indice, threadsPerBlock);
  onnx_scatternd_kernel<<<col_block, threadsPerBlock, 0, stream>>>(
      num_update_indice, indices, update, output, tensor_desc, indice_desc);
}

void TRTONNXScatterNDKernelLauncher_float(const float* data, const int* indices,
                                          const float* update, const int* dims,
                                          int nbDims, const int* indices_dims,
                                          int indice_nbDims, float* output,
                                          cudaStream_t stream) {
  TRTONNXScatterNDKernelLauncher<float>(data, indices, update, dims, nbDims,
                                        indices_dims, indice_nbDims, output,
                                        stream);
}

void TRTONNXScatterNDKernelLauncher_int32(const int* data, const int* indices,
                                          const int* update, const int* dims,
                                          int nbDims, const int* indices_dims,
                                          int indice_nbDims, int* output,
                                          cudaStream_t stream) {
  TRTONNXScatterNDKernelLauncher<int>(data, indices, update, dims, nbDims,
                                      indices_dims, indice_nbDims, output,
                                      stream);
}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_corner_pool.hpp
================================================
#ifndef TRT_CORNER_POOL_HPP
#define TRT_CORNER_POOL_HPP
#include <string>
#include <vector>

#include "trt_plugin_helper.hpp"

enum TRT_CORNER_POOL_TYPE {
  TRT_TOP_POOL = 0,
  TRT_BOTTOM_POOL = 1,
  TRT_LEFT_POOL = 2,
  TRT_RIGHT_POOL = 3
};

// implement of CornerPool
class CornerPoolPluginDynamic : public nvinfer1::IPluginV2DynamicExt {
 public:
  CornerPoolPluginDynamic(const std::string &name,
                          TRT_CORNER_POOL_TYPE poolType);

  CornerPoolPluginDynamic(const std::string name, const void *data,
                          size_t length);

  CornerPoolPluginDynamic() = delete;

  ~CornerPoolPluginDynamic();

  // IPluginV2DynamicExt Methods
  nvinfer1::IPluginV2DynamicExt *clone() const override;
  nvinfer1::DimsExprs getOutputDimensions(
      int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
      nvinfer1::IExprBuilder &exprBuilder) override;
  bool supportsFormatCombination(int pos,
                                 const nvinfer1::PluginTensorDesc *inOut,
                                 int nbInputs, int nbOutputs) override;
  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
                       int nbInputs,
                       const nvinfer1::DynamicPluginTensorDesc *out,
                       int nbOutputs) override;
  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
                          int nbInputs,
                          const nvinfer1::PluginTensorDesc *outputs,
                          int nbOutputs) const override;
  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
              const nvinfer1::PluginTensorDesc *outputDesc,
              const void *const *inputs, void *const *outputs, void *workspace,
              cudaStream_t stream) override;

  // IPluginV2Ext Methods
  nvinfer1::DataType getOutputDataType(int index,
                                       const nvinfer1::DataType *inputTypes,
                                       int nbInputs) const override;

  // IPluginV2 Methods
  const char *getPluginType() const override;
  const char *getPluginVersion() const override;
  int getNbOutputs() const override;
  int initialize() override;
  void terminate() override;
  size_t getSerializationSize() const override;
  void serialize(void *buffer) const override;
  void destroy() override;
  void setPluginNamespace(const char *pluginNamespace) override;
  const char *getPluginNamespace() const override;

 protected:
  const std::string mLayerName;
  std::string mNamespace;

  TRT_CORNER_POOL_TYPE mPoolType;

 protected:
  // To prevent compiler warnings.
  using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::configurePlugin;
  using nvinfer1::IPluginV2DynamicExt::enqueue;
  using nvinfer1::IPluginV2DynamicExt::getOutputDimensions;
  using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize;
  using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::supportsFormat;
};

// CornerPool creator
class CornerPoolPluginDynamicCreator : public nvinfer1::IPluginCreator {
 public:
  CornerPoolPluginDynamicCreator();

  const char *getPluginName() const override;

  const char *getPluginVersion() const override;

  const nvinfer1::PluginFieldCollection *getFieldNames() override;

  nvinfer1::IPluginV2 *createPlugin(
      const char *name, const nvinfer1::PluginFieldCollection *fc) override;

  nvinfer1::IPluginV2 *deserializePlugin(const char *name,
                                         const void *serialData,
                                         size_t serialLength) override;

  void setPluginNamespace(const char *pluginNamespace) override;

  const char *getPluginNamespace() const override;

 protected:
  nvinfer1::PluginFieldCollection mFC;
  std::vector<nvinfer1::PluginField> mPluginAttributes;
  std::string mNamespace;
};

#endif TRT_CORNER_POOL_HPP  // TRT_CORNER_POOL_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_cuda_helper.cuh
================================================
// Copyright (c) OpenMMLab. All rights reserved
#ifndef TRT_CUDA_HELPER_HPP
#define TRT_CUDA_HELPER_HPP
#include <cublas_v2.h>

#define cudaCheckError()                                       \
  {                                                            \
    cudaError_t e = cudaGetLastError();                        \
    if (e != cudaSuccess) {                                    \
      printf("Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, \
             cudaGetErrorString(e));                           \
      exit(0);                                                 \
    }                                                          \
  }

/**
 * Returns a view of the original tensor with its dimensions permuted.
 *
 * @param[out] dst pointer to the destination tensor
 * @param[in] src pointer to the source tensor
 * @param[in] src_size shape of the src tensor
 * @param[in] permute The desired ordering of dimensions
 * @param[in] src_dim dim of src tensor
 * @param[in] stream cuda stream handle
 */
template <class scalar_t>
void memcpyPermute(scalar_t* dst, const scalar_t* src, int* src_size,
                   int* permute, int src_dim, cudaStream_t stream = 0);

template <typename scalar_t>
cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cublasOperation_t transa,
                              cublasOperation_t transb, int m, int n, int k,
                              const scalar_t* alpha, const scalar_t* A, int lda,
                              const scalar_t* B, int ldb, const scalar_t* beta,
                              scalar_t* C, int ldc) {
  return CUBLAS_STATUS_INTERNAL_ERROR;
}

#endif  // TRT_CUDA_HELPER_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_cummaxmin.hpp
================================================
#ifndef TRT_CUMMAXMIN_HPP
#define TRT_CUMMAXMIN_HPP
#include <string>
#include <vector>

#include "trt_plugin_helper.hpp"

enum TRT_CUMCMPTYPE { TRT_CUMMAX = 0, TRT_CUMMIN = 1 };

// implement of cummax and cummin
class CumMaxMinPluginDynamic : public nvinfer1::IPluginV2DynamicExt {
 public:
  CumMaxMinPluginDynamic(const std::string &name, int dim,
                         TRT_CUMCMPTYPE cumType);

  CumMaxMinPluginDynamic(const std::string name, const void *data,
                         size_t length);

  CumMaxMinPluginDynamic() = delete;

  ~CumMaxMinPluginDynamic();

  // IPluginV2DynamicExt Methods
  nvinfer1::IPluginV2DynamicExt *clone() const override;
  nvinfer1::DimsExprs getOutputDimensions(
      int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
      nvinfer1::IExprBuilder &exprBuilder) override;
  bool supportsFormatCombination(int pos,
                                 const nvinfer1::PluginTensorDesc *inOut,
                                 int nbInputs, int nbOutputs) override;
  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
                       int nbInputs,
                       const nvinfer1::DynamicPluginTensorDesc *out,
                       int nbOutputs) override;
  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
                          int nbInputs,
                          const nvinfer1::PluginTensorDesc *outputs,
                          int nbOutputs) const override;
  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
              const nvinfer1::PluginTensorDesc *outputDesc,
              const void *const *inputs, void *const *outputs, void *workspace,
              cudaStream_t stream) override;

  // IPluginV2Ext Methods
  nvinfer1::DataType getOutputDataType(int index,
                                       const nvinfer1::DataType *inputTypes,
                                       int nbInputs) const override;

  // IPluginV2 Methods
  const char *getPluginType() const override;
  const char *getPluginVersion() const override;
  int getNbOutputs() const override;
  int initialize() override;
  void terminate() override;
  size_t getSerializationSize() const override;
  void serialize(void *buffer) const override;
  void destroy() override;
  void setPluginNamespace(const char *pluginNamespace) override;
  const char *getPluginNamespace() const override;

 protected:
  const std::string mLayerName;
  std::string mNamespace;

  int mDim;
  TRT_CUMCMPTYPE mCumType;

 protected:
  // To prevent compiler warnings.
  using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::configurePlugin;
  using nvinfer1::IPluginV2DynamicExt::enqueue;
  using nvinfer1::IPluginV2DynamicExt::getOutputDimensions;
  using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize;
  using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::supportsFormat;
};

// cummax and cummin creator
class CumMaxMinPluginDynamicCreator : public nvinfer1::IPluginCreator {
 public:
  CumMaxMinPluginDynamicCreator(TRT_CUMCMPTYPE cumType);

  const char *getPluginName() const override;

  const char *getPluginVersion() const override;

  const nvinfer1::PluginFieldCollection *getFieldNames() override;

  nvinfer1::IPluginV2 *createPlugin(
      const char *name, const nvinfer1::PluginFieldCollection *fc) override;

  nvinfer1::IPluginV2 *deserializePlugin(const char *name,
                                         const void *serialData,
                                         size_t serialLength) override;

  void setPluginNamespace(const char *pluginNamespace) override;

  const char *getPluginNamespace() const override;

 protected:
  TRT_CUMCMPTYPE mCumType;
  nvinfer1::PluginFieldCollection mFC;
  std::vector<nvinfer1::PluginField> mPluginAttributes;
  std::string mNamespace;
};

// cummax creator
class CumMaxPluginDynamicCreator : public CumMaxMinPluginDynamicCreator {
 public:
  CumMaxPluginDynamicCreator();
  const char *getPluginName() const override;
};

// cummin creator
class CumMinPluginDynamicCreator : public CumMaxMinPluginDynamicCreator {
 public:
  CumMinPluginDynamicCreator();
  const char *getPluginName() const override;
};

#endif TRT_CUMMAXMIN_HPP  // TRT_CUMMAXMIN_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_deform_conv.hpp
================================================
#ifndef TRT_DEFORM_CONV_HPP
#define TRT_DEFORM_CONV_HPP
#include <cublas_v2.h>

#include <memory>
#include <string>
#include <vector>

#include "trt_plugin_helper.hpp"

class DeformableConvPluginDynamic : public nvinfer1::IPluginV2DynamicExt {
 public:
  DeformableConvPluginDynamic(const std::string &name,
                              const nvinfer1::Dims &stride,
                              const nvinfer1::Dims &padding,
                              const nvinfer1::Dims &dilation,
                              const int deformableGroup, const int group,
                              int im2colStep);

  DeformableConvPluginDynamic(const std::string name, const void *data,
                              size_t length);

  DeformableConvPluginDynamic() = delete;

  ~DeformableConvPluginDynamic();

  // IPluginV2DynamicExt Methods
  nvinfer1::IPluginV2DynamicExt *clone() const override;
  nvinfer1::DimsExprs getOutputDimensions(
      int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
      nvinfer1::IExprBuilder &exprBuilder) override;
  bool supportsFormatCombination(int pos,
                                 const nvinfer1::PluginTensorDesc *inOut,
                                 int nbInputs, int nbOutputs) override;
  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
                       int nbInputs,
                       const nvinfer1::DynamicPluginTensorDesc *out,
                       int nbOutputs) override;
  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
                          int nbInputs,
                          const nvinfer1::PluginTensorDesc *outputs,
                          int nbOutputs) const override;
  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
              const nvinfer1::PluginTensorDesc *outputDesc,
              const void *const *inputs, void *const *outputs, void *workspace,
              cudaStream_t stream) override;
  void attachToContext(cudnnContext *cudnnContext, cublasContext *cublasContext,
                       nvinfer1::IGpuAllocator *gpuAllocator) override;
  void detachFromContext() override;

  // IPluginV2Ext Methods
  nvinfer1::DataType getOutputDataType(int index,
                                       const nvinfer1::DataType *inputTypes,
                                       int nbInputs) const override;

  // IPluginV2 Methods
  const char *getPluginType() const override;
  const char *getPluginVersion() const override;
  int getNbOutputs() const override;
  int initialize() override;
  void terminate() override;
  size_t getSerializationSize() const override;
  void serialize(void *buffer) const override;
  void destroy() override;
  void setPluginNamespace(const char *pluginNamespace) override;
  const char *getPluginNamespace() const override;

 private:
  const std::string mLayerName;
  std::string mNamespace;

  nvinfer1::Dims mStride;
  nvinfer1::Dims mPadding;
  nvinfer1::Dims mDilation;
  int mDeformableGroup;
  int mGroup;
  int mIm2colStep;

  cublasHandle_t m_cublas_handle;

 protected:
  // To prevent compiler warnings.
  using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::configurePlugin;
  using nvinfer1::IPluginV2DynamicExt::enqueue;
  using nvinfer1::IPluginV2DynamicExt::getOutputDimensions;
  using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize;
  using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::supportsFormat;
};

class DeformableConvPluginDynamicCreator : public nvinfer1::IPluginCreator {
 public:
  DeformableConvPluginDynamicCreator();

  const char *getPluginName() const override;

  const char *getPluginVersion() const override;

  const nvinfer1::PluginFieldCollection *getFieldNames() override;

  nvinfer1::IPluginV2 *createPlugin(
      const char *name, const nvinfer1::PluginFieldCollection *fc) override;

  nvinfer1::IPluginV2 *deserializePlugin(const char *name,
                                         const void *serialData,
                                         size_t serialLength) override;

  void setPluginNamespace(const char *pluginNamespace) override;

  const char *getPluginNamespace() const override;

 private:
  static nvinfer1::PluginFieldCollection mFC;
  static std::vector<nvinfer1::PluginField> mPluginAttributes;
  std::string mNamespace;
};
#endif  // TRT_DEFORM_CONV_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_grid_sampler.hpp
================================================
#ifndef TRT_GRID_SAMPLER_HPP
#define TRT_GRID_SAMPLER_HPP
#include <cublas_v2.h>

#include <memory>
#include <string>
#include <vector>

#include "trt_plugin_helper.hpp"

namespace mmcv {
enum class GridSamplerInterpolation { Bilinear, Nearest };
enum class GridSamplerPadding { Zeros, Border, Reflection };
}  // namespace mmcv

class GridSamplerDynamic : public nvinfer1::IPluginV2DynamicExt {
 public:
  GridSamplerDynamic(const std::string &name, int mode, int paddingMode,
                     bool alignCorners);

  GridSamplerDynamic(const std::string name, const void *data, size_t length);

  GridSamplerDynamic() = delete;

  // IPluginV2DynamicExt Methods
  nvinfer1::IPluginV2DynamicExt *clone() const override;
  nvinfer1::DimsExprs getOutputDimensions(
      int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
      nvinfer1::IExprBuilder &exprBuilder) override;
  bool supportsFormatCombination(int pos,
                                 const nvinfer1::PluginTensorDesc *inOut,
                                 int nbInputs, int nbOutputs) override;
  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
                       int nbInputs,
                       const nvinfer1::DynamicPluginTensorDesc *out,
                       int nbOutputs) override;
  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
                          int nbInputs,
                          const nvinfer1::PluginTensorDesc *outputs,
                          int nbOutputs) const override;
  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
              const nvinfer1::PluginTensorDesc *outputDesc,
              const void *const *inputs, void *const *outputs, void *workspace,
              cudaStream_t stream) override;

  // IPluginV2Ext Methods
  nvinfer1::DataType getOutputDataType(int index,
                                       const nvinfer1::DataType *inputTypes,
                                       int nbInputs) const override;

  // IPluginV2 Methods
  const char *getPluginType() const override;
  const char *getPluginVersion() const override;
  int getNbOutputs() const override;
  int initialize() override;
  void terminate() override;
  size_t getSerializationSize() const override;
  void serialize(void *buffer) const override;
  void destroy() override;
  void setPluginNamespace(const char *pluginNamespace) override;
  const char *getPluginNamespace() const override;

 private:
  const std::string mLayerName;
  std::string mNamespace;

  int mMode;
  int mPaddingMode;
  bool mAlignCorners;

 protected:
  // To prevent compiler warnings.
  using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::configurePlugin;
  using nvinfer1::IPluginV2DynamicExt::enqueue;
  using nvinfer1::IPluginV2DynamicExt::getOutputDimensions;
  using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize;
  using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::supportsFormat;
};

class GridSamplerDynamicCreator : public nvinfer1::IPluginCreator {
 public:
  GridSamplerDynamicCreator();

  const char *getPluginName() const override;

  const char *getPluginVersion() const override;

  const nvinfer1::PluginFieldCollection *getFieldNames() override;

  nvinfer1::IPluginV2 *createPlugin(
      const char *name, const nvinfer1::PluginFieldCollection *fc) override;

  nvinfer1::IPluginV2 *deserializePlugin(const char *name,
                                         const void *serialData,
                                         size_t serialLength) override;

  void setPluginNamespace(const char *pluginNamespace) override;

  const char *getPluginNamespace() const override;

 private:
  static nvinfer1::PluginFieldCollection mFC;
  static std::vector<nvinfer1::PluginField> mPluginAttributes;
  std::string mNamespace;
};
#endif  // TRT_GRID_SAMPLER_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_instance_norm.hpp
================================================
// Modified from:
// https://github.com/NVIDIA/TensorRT/blob/master/plugin/instanceNormalizationPlugin/instanceNormalizationPlugin.h

#ifndef TRT_INSTANCE_NORMALIZATION_PLUGIN_H
#define TRT_INSTANCE_NORMALIZATION_PLUGIN_H
#include <cudnn.h>

#include <iostream>
#include <string>
#include <vector>

#include "trt_plugin_helper.hpp"

typedef unsigned short half_type;

class InstanceNormalizationDynamic final
    : public nvinfer1::IPluginV2DynamicExt {
 public:
  InstanceNormalizationDynamic(const std::string& name, float epsilon);

  InstanceNormalizationDynamic(const std::string& name, void const* serialData,
                               size_t serialLength);

  InstanceNormalizationDynamic() = delete;

  ~InstanceNormalizationDynamic() override;

  int getNbOutputs() const override;

  // DynamicExt plugins returns DimsExprs class instead of Dims
  nvinfer1::DimsExprs getOutputDimensions(
      int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs,
      nvinfer1::IExprBuilder& exprBuilder) override;

  int initialize() override;

  void terminate() override;

  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs,
                          int nbInputs,
                          const nvinfer1::PluginTensorDesc* outputs,
                          int nbOutputs) const override;

  int enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
              const nvinfer1::PluginTensorDesc* outputDesc,
              const void* const* inputs, void* const* outputs, void* workspace,
              cudaStream_t stream) override;

  size_t getSerializationSize() const override;

  void serialize(void* buffer) const override;

  // DynamicExt plugin supportsFormat update.
  bool supportsFormatCombination(int pos,
                                 const nvinfer1::PluginTensorDesc* inOut,
                                 int nbInputs, int nbOutputs) override;

  const char* getPluginType() const override;

  const char* getPluginVersion() const override;

  void destroy() override;

  nvinfer1::IPluginV2DynamicExt* clone() const override;

  void setPluginNamespace(const char* pluginNamespace) override;

  const char* getPluginNamespace() const override;

  nvinfer1::DataType getOutputDataType(int index,
                                       const nvinfer1::DataType* inputTypes,
                                       int nbInputs) const override;

  void attachToContext(cudnnContext* cudnn, cublasContext* cublas,
                       nvinfer1::IGpuAllocator* allocator) override;

  void detachFromContext() override;

  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in,
                       int nbInputs,
                       const nvinfer1::DynamicPluginTensorDesc* out,
                       int nbOutputs) override;

 private:
  const std::string mLayerName;
  float mEpsilon{};
  cudnnHandle_t _cudnn_handle{};
  cudnnTensorDescriptor_t _x_desc{}, _y_desc{}, _b_desc{};
  std::string mPluginNamespace{};
};

class InstanceNormalizationDynamicCreator : public nvinfer1::IPluginCreator {
 public:
  InstanceNormalizationDynamicCreator();

  ~InstanceNormalizationDynamicCreator() override = default;

  const char* getPluginName() const override;

  const char* getPluginVersion() const override;

  const nvinfer1::PluginFieldCollection* getFieldNames() override;

  nvinfer1::IPluginV2DynamicExt* createPlugin(
      const char* name, const nvinfer1::PluginFieldCollection* fc) override;

  nvinfer1::IPluginV2DynamicExt* deserializePlugin(
      const char* name, const void* serialData, size_t serialLength) override;

  void setPluginNamespace(const char* pluginNamespace) override;

  const char* getPluginNamespace() const override;

 private:
  static nvinfer1::PluginFieldCollection mFC;
  static std::vector<nvinfer1::PluginField> mPluginAttributes;
  std::string mNamespace;
};

#endif  // TRT_INSTANCE_NORMALIZATION_PLUGIN_H


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_modulated_deform_conv.hpp
================================================
#ifndef TRT_MODULATED_DEFORM_CONV_HPP
#define TRT_MODULATED_DEFORM_CONV_HPP
#include <cublas_v2.h>

#include <memory>
#include <string>
#include <vector>

#include "trt_plugin_helper.hpp"

class ModulatedDeformableConvPluginDynamic
    : public nvinfer1::IPluginV2DynamicExt {
 public:
  ModulatedDeformableConvPluginDynamic(const std::string &name,
                                       const nvinfer1::Dims stride,
                                       const nvinfer1::Dims padding,
                                       const nvinfer1::Dims dilation,
                                       const int deformableGroup,
                                       const int group);

  ModulatedDeformableConvPluginDynamic(const std::string name, const void *data,
                                       size_t length);

  ModulatedDeformableConvPluginDynamic() = delete;

  ~ModulatedDeformableConvPluginDynamic();

  // IPluginV2DynamicExt Methods
  nvinfer1::IPluginV2DynamicExt *clone() const override;
  nvinfer1::DimsExprs getOutputDimensions(
      int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
      nvinfer1::IExprBuilder &exprBuilder) override;
  bool supportsFormatCombination(int pos,
                                 const nvinfer1::PluginTensorDesc *inOut,
                                 int nbInputs, int nbOutputs) override;
  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
                       int nbInputs,
                       const nvinfer1::DynamicPluginTensorDesc *out,
                       int nbOutputs) override;
  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
                          int nbInputs,
                          const nvinfer1::PluginTensorDesc *outputs,
                          int nbOutputs) const override;
  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
              const nvinfer1::PluginTensorDesc *outputDesc,
              const void *const *inputs, void *const *outputs, void *workspace,
              cudaStream_t stream) override;
  void attachToContext(cudnnContext *cudnnContext, cublasContext *cublasContext,
                       nvinfer1::IGpuAllocator *gpuAllocator) override;
  void detachFromContext() override;

  // IPluginV2Ext Methods
  nvinfer1::DataType getOutputDataType(int index,
                                       const nvinfer1::DataType *inputTypes,
                                       int nbInputs) const override;

  // IPluginV2 Methods
  const char *getPluginType() const override;
  const char *getPluginVersion() const override;
  int getNbOutputs() const override;
  int initialize() override;
  void terminate() override;
  size_t getSerializationSize() const override;
  void serialize(void *buffer) const override;
  void destroy() override;
  void setPluginNamespace(const char *pluginNamespace) override;
  const char *getPluginNamespace() const override;

 private:
  const std::string mLayerName;
  std::string mNamespace;

  nvinfer1::Dims mStride;
  nvinfer1::Dims mPadding;
  nvinfer1::Dims mDilation;
  int mDeformableGroup;
  int mGroup;
  bool mWithBias;

  cublasHandle_t m_cublas_handle;

 protected:
  // To prevent compiler warnings.
  using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::configurePlugin;
  using nvinfer1::IPluginV2DynamicExt::enqueue;
  using nvinfer1::IPluginV2DynamicExt::getOutputDimensions;
  using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize;
  using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::supportsFormat;
};

class ModulatedDeformableConvPluginDynamicCreator
    : public nvinfer1::IPluginCreator {
 public:
  ModulatedDeformableConvPluginDynamicCreator();

  const char *getPluginName() const override;

  const char *getPluginVersion() const override;

  const nvinfer1::PluginFieldCollection *getFieldNames() override;

  nvinfer1::IPluginV2 *createPlugin(
      const char *name, const nvinfer1::PluginFieldCollection *fc) override;

  nvinfer1::IPluginV2 *deserializePlugin(const char *name,
                                         const void *serialData,
                                         size_t serialLength) override;

  void setPluginNamespace(const char *pluginNamespace) override;

  const char *getPluginNamespace() const override;

 private:
  static nvinfer1::PluginFieldCollection mFC;
  static std::vector<nvinfer1::PluginField> mPluginAttributes;
  std::string mNamespace;
};
#endif  // TRT_MODULATED_DEFORM_CONV_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_nms.hpp
================================================
#ifndef TRT_NMS_HPP
#define TRT_NMS_HPP
#include <cublas_v2.h>

#include <memory>
#include <string>
#include <vector>

#include "trt_plugin_helper.hpp"

class NonMaxSuppressionDynamic : public nvinfer1::IPluginV2DynamicExt {
 public:
  NonMaxSuppressionDynamic(const std::string &name, int centerPointBox,
                           int maxOutputBoxesPerClass, float iouThreshold,
                           float scoreThreshold, int offset);

  NonMaxSuppressionDynamic(const std::string name, const void *data,
                           size_t length);

  NonMaxSuppressionDynamic() = delete;

  // IPluginV2DynamicExt Methods
  nvinfer1::IPluginV2DynamicExt *clone() const override;
  nvinfer1::DimsExprs getOutputDimensions(
      int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
      nvinfer1::IExprBuilder &exprBuilder) override;
  bool supportsFormatCombination(int pos,
                                 const nvinfer1::PluginTensorDesc *inOut,
                                 int nbInputs, int nbOutputs) override;
  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
                       int nbInputs,
                       const nvinfer1::DynamicPluginTensorDesc *out,
                       int nbOutputs) override;
  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
                          int nbInputs,
                          const nvinfer1::PluginTensorDesc *outputs,
                          int nbOutputs) const override;
  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
              const nvinfer1::PluginTensorDesc *outputDesc,
              const void *const *inputs, void *const *outputs, void *workspace,
              cudaStream_t stream) override;

  // IPluginV2Ext Methods
  nvinfer1::DataType getOutputDataType(int index,
                                       const nvinfer1::DataType *inputTypes,
                                       int nbInputs) const override;

  // IPluginV2 Methods
  const char *getPluginType() const override;
  const char *getPluginVersion() const override;
  int getNbOutputs() const override;
  int initialize() override;
  void terminate() override;
  size_t getSerializationSize() const override;
  void serialize(void *buffer) const override;
  void destroy() override;
  void setPluginNamespace(const char *pluginNamespace) override;
  const char *getPluginNamespace() const override;

 private:
  const std::string mLayerName;
  std::string mNamespace;

  int mCenterPointBox;
  int mMaxOutputBoxesPerClass;
  float mIouThreshold;
  float mScoreThreshold;
  int mOffset;

 protected:
  // To prevent compiler warnings.
  using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::configurePlugin;
  using nvinfer1::IPluginV2DynamicExt::enqueue;
  using nvinfer1::IPluginV2DynamicExt::getOutputDimensions;
  using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize;
  using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::supportsFormat;
};

class NonMaxSuppressionDynamicCreator : public nvinfer1::IPluginCreator {
 public:
  NonMaxSuppressionDynamicCreator();

  const char *getPluginName() const override;

  const char *getPluginVersion() const override;

  const nvinfer1::PluginFieldCollection *getFieldNames() override;

  nvinfer1::IPluginV2 *createPlugin(
      const char *name, const nvinfer1::PluginFieldCollection *fc) override;

  nvinfer1::IPluginV2 *deserializePlugin(const char *name,
                                         const void *serialData,
                                         size_t serialLength) override;

  void setPluginNamespace(const char *pluginNamespace) override;

  const char *getPluginNamespace() const override;

 private:
  static nvinfer1::PluginFieldCollection mFC;
  static std::vector<nvinfer1::PluginField> mPluginAttributes;
  std::string mNamespace;
};
#endif  // TRT_NMS_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_plugin.hpp
================================================
#ifndef TRT_PLUGIN_HPP
#define TRT_PLUGIN_HPP

extern "C" {
bool initLibMMCVInferPlugins();
}  // extern "C"
#endif  // TRT_PLUGIN_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_plugin_helper.hpp
================================================
#ifndef TRT_PLUGIN_HELPER_HPP
#define TRT_PLUGIN_HELPER_HPP
#include <stdexcept>

#include "NvInferPlugin.h"

namespace mmcv {

const int MAXTENSORDIMS = 10;

struct TensorDesc {
  int shape[MAXTENSORDIMS];
  int stride[MAXTENSORDIMS];
  int dim;
};

inline unsigned int getElementSize(nvinfer1::DataType t) {
  switch (t) {
    case nvinfer1::DataType::kINT32:
      return 4;
    case nvinfer1::DataType::kFLOAT:
      return 4;
    case nvinfer1::DataType::kHALF:
      return 2;
    // case nvinfer1::DataType::kBOOL:
    case nvinfer1::DataType::kINT8:
      return 1;
    default:
      throw std::runtime_error("Invalid DataType.");
  }
  throw std::runtime_error("Invalid DataType.");
  return 0;
}

inline size_t getAlignedSize(size_t origin_size, size_t aligned_number = 16) {
  return size_t((origin_size + aligned_number - 1) / aligned_number) *
         aligned_number;
}

}  // namespace mmcv
#endif  // TRT_PLUGIN_HELPER_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_roi_align.hpp
================================================
#ifndef TRT_ROI_ALIGN_HPP
#define TRT_ROI_ALIGN_HPP
#include <cublas_v2.h>

#include <memory>
#include <string>
#include <vector>

#include "trt_plugin_helper.hpp"

class RoIAlignPluginDynamic : public nvinfer1::IPluginV2DynamicExt {
 public:
  RoIAlignPluginDynamic(const std::string &name, int outWidth, int outHeight,
                        float spatialScale, int sampleRatio, int poolMode,
                        bool aligned);

  RoIAlignPluginDynamic(const std::string name, const void *data,
                        size_t length);

  RoIAlignPluginDynamic() = delete;

  // IPluginV2DynamicExt Methods
  nvinfer1::IPluginV2DynamicExt *clone() const override;
  nvinfer1::DimsExprs getOutputDimensions(
      int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
      nvinfer1::IExprBuilder &exprBuilder) override;
  bool supportsFormatCombination(int pos,
                                 const nvinfer1::PluginTensorDesc *inOut,
                                 int nbInputs, int nbOutputs) override;
  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
                       int nbInputs,
                       const nvinfer1::DynamicPluginTensorDesc *out,
                       int nbOutputs) override;
  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
                          int nbInputs,
                          const nvinfer1::PluginTensorDesc *outputs,
                          int nbOutputs) const override;
  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
              const nvinfer1::PluginTensorDesc *outputDesc,
              const void *const *inputs, void *const *outputs, void *workspace,
              cudaStream_t stream) override;

  // IPluginV2Ext Methods
  nvinfer1::DataType getOutputDataType(int index,
                                       const nvinfer1::DataType *inputTypes,
                                       int nbInputs) const override;

  // IPluginV2 Methods
  const char *getPluginType() const override;
  const char *getPluginVersion() const override;
  int getNbOutputs() const override;
  int initialize() override;
  void terminate() override;
  size_t getSerializationSize() const override;
  void serialize(void *buffer) const override;
  void destroy() override;
  void setPluginNamespace(const char *pluginNamespace) override;
  const char *getPluginNamespace() const override;

 private:
  const std::string mLayerName;
  std::string mNamespace;

  int mOutWidth;
  int mOutHeight;
  float mSpatialScale;
  int mSampleRatio;
  int mPoolMode;  // 1:avg 0:max
  bool mAligned;

 protected:
  // To prevent compiler warnings.
  using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::configurePlugin;
  using nvinfer1::IPluginV2DynamicExt::enqueue;
  using nvinfer1::IPluginV2DynamicExt::getOutputDimensions;
  using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize;
  using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::supportsFormat;
};

class RoIAlignPluginDynamicCreator : public nvinfer1::IPluginCreator {
 public:
  RoIAlignPluginDynamicCreator();

  const char *getPluginName() const override;

  const char *getPluginVersion() const override;

  const nvinfer1::PluginFieldCollection *getFieldNames() override;

  nvinfer1::IPluginV2 *createPlugin(
      const char *name, const nvinfer1::PluginFieldCollection *fc) override;

  nvinfer1::IPluginV2 *deserializePlugin(const char *name,
                                         const void *serialData,
                                         size_t serialLength) override;

  void setPluginNamespace(const char *pluginNamespace) override;

  const char *getPluginNamespace() const override;

 private:
  static nvinfer1::PluginFieldCollection mFC;
  static std::vector<nvinfer1::PluginField> mPluginAttributes;
  std::string mNamespace;
};
#endif  // TRT_ROI_ALIGN_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_scatternd.hpp
================================================
#ifndef TRT_SCATTERND_HPP
#define TRT_SCATTERND_HPP
#include <cublas_v2.h>

#include <memory>
#include <string>
#include <vector>

#include "trt_plugin_helper.hpp"

class ONNXScatterNDDynamic : public nvinfer1::IPluginV2DynamicExt {
 public:
  ONNXScatterNDDynamic(const std::string &name);

  ONNXScatterNDDynamic(const std::string name, const void *data, size_t length);

  ONNXScatterNDDynamic() = delete;

  // IPluginV2DynamicExt Methods
  nvinfer1::IPluginV2DynamicExt *clone() const override;
  nvinfer1::DimsExprs getOutputDimensions(
      int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
      nvinfer1::IExprBuilder &exprBuilder) override;
  bool supportsFormatCombination(int pos,
                                 const nvinfer1::PluginTensorDesc *inOut,
                                 int nbInputs, int nbOutputs) override;
  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
                       int nbInputs,
                       const nvinfer1::DynamicPluginTensorDesc *out,
                       int nbOutputs) override;
  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
                          int nbInputs,
                          const nvinfer1::PluginTensorDesc *outputs,
                          int nbOutputs) const override;
  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
              const nvinfer1::PluginTensorDesc *outputDesc,
              const void *const *inputs, void *const *outputs, void *workspace,
              cudaStream_t stream) override;

  // IPluginV2Ext Methods
  nvinfer1::DataType getOutputDataType(int index,
                                       const nvinfer1::DataType *inputTypes,
                                       int nbInputs) const override;

  // IPluginV2 Methods
  const char *getPluginType() const override;
  const char *getPluginVersion() const override;
  int getNbOutputs() const override;
  int initialize() override;
  void terminate() override;
  size_t getSerializationSize() const override;
  void serialize(void *buffer) const override;
  void destroy() override;
  void setPluginNamespace(const char *pluginNamespace) override;
  const char *getPluginNamespace() const override;

 private:
  const std::string mLayerName;
  std::string mNamespace;

 protected:
  // To prevent compiler warnings.
  using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::configurePlugin;
  using nvinfer1::IPluginV2DynamicExt::enqueue;
  using nvinfer1::IPluginV2DynamicExt::getOutputDimensions;
  using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize;
  using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch;
  using nvinfer1::IPluginV2DynamicExt::supportsFormat;
};

class ONNXScatterNDDynamicCreator : public nvinfer1::IPluginCreator {
 public:
  ONNXScatterNDDynamicCreator();

  const char *getPluginName() const override;

  const char *getPluginVersion() const override;

  const nvinfer1::PluginFieldCollection *getFieldNames() override;

  nvinfer1::IPluginV2 *createPlugin(
      const char *name, const nvinfer1::PluginFieldCollection *fc) override;

  nvinfer1::IPluginV2 *deserializePlugin(const char *name,
                                         const void *serialData,
                                         size_t serialLength) override;

  void setPluginNamespace(const char *pluginNamespace) override;

  const char *getPluginNamespace() const override;

 private:
  static nvinfer1::PluginFieldCollection mFC;
  static std::vector<nvinfer1::PluginField> mPluginAttributes;
  std::string mNamespace;
};
#endif  // TRT_SCATTERND_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/csrc/tensorrt/trt_serialize.hpp
================================================
// Modified from:
// https://github.com/NVIDIA/TensorRT/blob/master/plugin/common/serialize.hpp

#ifndef TRT_SERIALIZE_HPP
#define TRT_SERIALIZE_HPP
#include <cassert>
#include <cstring>
#include <iostream>
#include <type_traits>
#include <vector>
using std::cerr;
using std::cout;
using std::endl;

template <typename T>
inline void serialize_value(void** buffer, T const& value);

template <typename T>
inline void deserialize_value(void const** buffer, size_t* buffer_size,
                              T* value);

namespace {

template <typename T, class Enable = void>
struct Serializer {};

template <typename T>
struct Serializer<T, typename std::enable_if<std::is_arithmetic<T>::value ||
                                             std::is_enum<T>::value ||
                                             std::is_pod<T>::value>::type> {
  static size_t serialized_size(T const& value) { return sizeof(T); }
  static void serialize(void** buffer, T const& value) {
    ::memcpy(*buffer, &value, sizeof(T));
    reinterpret_cast<char*&>(*buffer) += sizeof(T);
  }
  static void deserialize(void const** buffer, size_t* buffer_size, T* value) {
    assert(*buffer_size >= sizeof(T));
    ::memcpy(value, *buffer, sizeof(T));
    reinterpret_cast<char const*&>(*buffer) += sizeof(T);
    *buffer_size -= sizeof(T);
  }
};

template <>
struct Serializer<const char*> {
  static size_t serialized_size(const char* value) { return strlen(value) + 1; }
  static void serialize(void** buffer, const char* value) {
    ::strcpy(static_cast<char*>(*buffer), value);
    reinterpret_cast<char*&>(*buffer) += strlen(value) + 1;
  }
  static void deserialize(void const** buffer, size_t* buffer_size,
                          const char** value) {
    *value = static_cast<char const*>(*buffer);
    size_t data_size = strnlen(*value, *buffer_size) + 1;
    assert(*buffer_size >= data_size);
    reinterpret_cast<char const*&>(*buffer) += data_size;
    *buffer_size -= data_size;
  }
};

template <typename T>
struct Serializer<std::vector<T>,
                  typename std::enable_if<std::is_arithmetic<T>::value ||
                                          std::is_enum<T>::value ||
                                          std::is_pod<T>::value>::type> {
  static size_t serialized_size(std::vector<T> const& value) {
    return sizeof(value.size()) + value.size() * sizeof(T);
  }
  static void serialize(void** buffer, std::vector<T> const& value) {
    serialize_value(buffer, value.size());
    size_t nbyte = value.size() * sizeof(T);
    ::memcpy(*buffer, value.data(), nbyte);
    reinterpret_cast<char*&>(*buffer) += nbyte;
  }
  static void deserialize(void const** buffer, size_t* buffer_size,
                          std::vector<T>* value) {
    size_t size;
    deserialize_value(buffer, buffer_size, &size);
    value->resize(size);
    size_t nbyte = value->size() * sizeof(T);
    assert(*buffer_size >= nbyte);
    ::memcpy(value->data(), *buffer, nbyte);
    reinterpret_cast<char const*&>(*buffer) += nbyte;
    *buffer_size -= nbyte;
  }
};

}  // namespace

template <typename T>
inline size_t serialized_size(T const& value) {
  return Serializer<T>::serialized_size(value);
}

template <typename T>
inline void serialize_value(void** buffer, T const& value) {
  return Serializer<T>::serialize(buffer, value);
}

template <typename T>
inline void deserialize_value(void const** buffer, size_t* buffer_size,
                              T* value) {
  return Serializer<T>::deserialize(buffer, buffer_size, value);
}
#endif  // TRT_SERIALIZE_HPP


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/deform_conv.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single

from mmcv.utils import deprecated_api_warning
from ..cnn import CONV_LAYERS
from ..utils import ext_loader, print_log

ext_module = ext_loader.load_ext('_ext', [
    'deform_conv_forward', 'deform_conv_backward_input',
    'deform_conv_backward_parameters'
])


class DeformConv2dFunction(Function):

    @staticmethod
    def symbolic(g,
                 input,
                 offset,
                 weight,
                 stride,
                 padding,
                 dilation,
                 groups,
                 deform_groups,
                 bias=False,
                 im2col_step=32):
        return g.op(
            'mmcv::MMCVDeformConv2d',
            input,
            offset,
            weight,
            stride_i=stride,
            padding_i=padding,
            dilation_i=dilation,
            groups_i=groups,
            deform_groups_i=deform_groups,
            bias_i=bias,
            im2col_step_i=im2col_step)

    @staticmethod
    def forward(ctx,
                input,
                offset,
                weight,
                stride=1,
                padding=0,
                dilation=1,
                groups=1,
                deform_groups=1,
                bias=False,
                im2col_step=32):
        if input is not None and input.dim() != 4:
            raise ValueError(
                f'Expected 4D tensor as input, got {input.dim()}D tensor \
                  instead.')
        assert bias is False, 'Only support bias is False.'
        ctx.stride = _pair(stride)
        ctx.padding = _pair(padding)
        ctx.dilation = _pair(dilation)
        ctx.groups = groups
        ctx.deform_groups = deform_groups
        ctx.im2col_step = im2col_step

        # When pytorch version >= 1.6.0, amp is adopted for fp16 mode;
        # amp won't cast the type of model (float32), but "offset" is cast
        # to float16 by nn.Conv2d automatically, leading to the type
        # mismatch with input (when it is float32) or weight.
        # The flag for whether to use fp16 or amp is the type of "offset",
        # we cast weight and input to temporarily support fp16 and amp
        # whatever the pytorch version is.
        input = input.type_as(offset)
        weight = weight.type_as(input)
        ctx.save_for_backward(input, offset, weight)

        output = input.new_empty(
            DeformConv2dFunction._output_size(ctx, input, weight))

        ctx.bufs_ = [input.new_empty(0), input.new_empty(0)]  # columns, ones

        cur_im2col_step = min(ctx.im2col_step, input.size(0))
        assert (input.size(0) % cur_im2col_step
                ) == 0, 'batch size must be divisible by im2col_step'
        ext_module.deform_conv_forward(
            input,
            weight,
            offset,
            output,
            ctx.bufs_[0],
            ctx.bufs_[1],
            kW=weight.size(3),
            kH=weight.size(2),
            dW=ctx.stride[1],
            dH=ctx.stride[0],
            padW=ctx.padding[1],
            padH=ctx.padding[0],
            dilationW=ctx.dilation[1],
            dilationH=ctx.dilation[0],
            group=ctx.groups,
            deformable_group=ctx.deform_groups,
            im2col_step=cur_im2col_step)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        input, offset, weight = ctx.saved_tensors

        grad_input = grad_offset = grad_weight = None

        cur_im2col_step = min(ctx.im2col_step, input.size(0))
        assert (input.size(0) % cur_im2col_step
                ) == 0, 'batch size must be divisible by im2col_step'

        grad_output = grad_output.contiguous()
        if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
            grad_input = torch.zeros_like(input)
            grad_offset = torch.zeros_like(offset)
            ext_module.deform_conv_backward_input(
                input,
                offset,
                grad_output,
                grad_input,
                grad_offset,
                weight,
                ctx.bufs_[0],
                kW=weight.size(3),
                kH=weight.size(2),
                dW=ctx.stride[1],
                dH=ctx.stride[0],
                padW=ctx.padding[1],
                padH=ctx.padding[0],
                dilationW=ctx.dilation[1],
                dilationH=ctx.dilation[0],
                group=ctx.groups,
                deformable_group=ctx.deform_groups,
                im2col_step=cur_im2col_step)

        if ctx.needs_input_grad[2]:
            grad_weight = torch.zeros_like(weight)
            ext_module.deform_conv_backward_parameters(
                input,
                offset,
                grad_output,
                grad_weight,
                ctx.bufs_[0],
                ctx.bufs_[1],
                kW=weight.size(3),
                kH=weight.size(2),
                dW=ctx.stride[1],
                dH=ctx.stride[0],
                padW=ctx.padding[1],
                padH=ctx.padding[0],
                dilationW=ctx.dilation[1],
                dilationH=ctx.dilation[0],
                group=ctx.groups,
                deformable_group=ctx.deform_groups,
                scale=1,
                im2col_step=cur_im2col_step)

        return grad_input, grad_offset, grad_weight, \
            None, None, None, None, None, None, None

    @staticmethod
    def _output_size(ctx, input, weight):
        channels = weight.size(0)
        output_size = (input.size(0), channels)
        for d in range(input.dim() - 2):
            in_size = input.size(d + 2)
            pad = ctx.padding[d]
            kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
            stride_ = ctx.stride[d]
            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
        if not all(map(lambda s: s > 0, output_size)):
            raise ValueError(
                'convolution input is too small (output would be ' +
                'x'.join(map(str, output_size)) + ')')
        return output_size


deform_conv2d = DeformConv2dFunction.apply


class DeformConv2d(nn.Module):
    r"""Deformable 2D convolution.

    Applies a deformable 2D convolution over an input signal composed of
    several input planes. DeformConv2d was described in the paper
    `Deformable Convolutional Networks
    <https://arxiv.org/pdf/1703.06211.pdf>`_

    Note:
        The argument ``im2col_step`` was added in version 1.3.17, which means
        number of samples processed by the ``im2col_cuda_kernel`` per call.
        It enables users to define ``batch_size`` and ``im2col_step`` more
        flexibly and solved `issue mmcv#1440
        <https://github.com/open-mmlab/mmcv/issues/1440>`_.

    Args:
        in_channels (int): Number of channels in the input image.
        out_channels (int): Number of channels produced by the convolution.
        kernel_size(int, tuple): Size of the convolving kernel.
        stride(int, tuple): Stride of the convolution. Default: 1.
        padding (int or tuple): Zero-padding added to both sides of the input.
            Default: 0.
        dilation (int or tuple): Spacing between kernel elements. Default: 1.
        groups (int): Number of blocked connections from input.
            channels to output channels. Default: 1.
        deform_groups (int): Number of deformable group partitions.
        bias (bool): If True, adds a learnable bias to the output.
            Default: False.
        im2col_step (int): Number of samples processed by im2col_cuda_kernel
            per call. It will work when ``batch_size`` > ``im2col_step``, but
            ``batch_size`` must be divisible by ``im2col_step``. Default: 32.
            `New in version 1.3.17.`
    """

    @deprecated_api_warning({'deformable_groups': 'deform_groups'},
                            cls_name='DeformConv2d')
    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 kernel_size: Union[int, Tuple[int, ...]],
                 stride: Union[int, Tuple[int, ...]] = 1,
                 padding: Union[int, Tuple[int, ...]] = 0,
                 dilation: Union[int, Tuple[int, ...]] = 1,
                 groups: int = 1,
                 deform_groups: int = 1,
                 bias: bool = False,
                 im2col_step: int = 32) -> None:
        super(DeformConv2d, self).__init__()

        assert not bias, \
            f'bias={bias} is not supported in DeformConv2d.'
        assert in_channels % groups == 0, \
            f'in_channels {in_channels} cannot be divisible by groups {groups}'
        assert out_channels % groups == 0, \
            f'out_channels {out_channels} cannot be divisible by groups \
              {groups}'

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = _pair(kernel_size)
        self.stride = _pair(stride)
        self.padding = _pair(padding)
        self.dilation = _pair(dilation)
        self.groups = groups
        self.deform_groups = deform_groups
        self.im2col_step = im2col_step
        # enable compatibility with nn.Conv2d
        self.transposed = False
        self.output_padding = _single(0)

        # only weight, no bias
        self.weight = nn.Parameter(
            torch.Tensor(out_channels, in_channels // self.groups,
                         *self.kernel_size))

        self.reset_parameters()

    def reset_parameters(self):
        # switch the initialization of `self.weight` to the standard kaiming
        # method described in `Delving deep into rectifiers: Surpassing
        # human-level performance on ImageNet classification` - He, K. et al.
        # (2015), using a uniform distribution
        nn.init.kaiming_uniform_(self.weight, nonlinearity='relu')

    def forward(self, x: Tensor, offset: Tensor) -> Tensor:
        """Deformable Convolutional forward function.

        Args:
            x (Tensor): Input feature, shape (B, C_in, H_in, W_in)
            offset (Tensor): Offset for deformable convolution, shape
                (B, deform_groups*kernel_size[0]*kernel_size[1]*2,
                H_out, W_out), H_out, W_out are equal to the output's.

                An offset is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`.
                The spatial arrangement is like:

                .. code:: text

                    (x0, y0) (x1, y1) (x2, y2)
                    (x3, y3) (x4, y4) (x5, y5)
                    (x6, y6) (x7, y7) (x8, y8)

        Returns:
            Tensor: Output of the layer.
        """
        # To fix an assert error in deform_conv_cuda.cpp:128
        # input image is smaller than kernel
        input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) <
                                                          self.kernel_size[1])
        if input_pad:
            pad_h = max(self.kernel_size[0] - x.size(2), 0)
            pad_w = max(self.kernel_size[1] - x.size(3), 0)
            x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
            offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0)
            offset = offset.contiguous()
        out = deform_conv2d(x, offset, self.weight, self.stride, self.padding,
                            self.dilation, self.groups, self.deform_groups,
                            False, self.im2col_step)
        if input_pad:
            out = out[:, :, :out.size(2) - pad_h, :out.size(3) -
                      pad_w].contiguous()
        return out

    def __repr__(self):
        s = self.__class__.__name__
        s += f'(in_channels={self.in_channels},\n'
        s += f'out_channels={self.out_channels},\n'
        s += f'kernel_size={self.kernel_size},\n'
        s += f'stride={self.stride},\n'
        s += f'padding={self.padding},\n'
        s += f'dilation={self.dilation},\n'
        s += f'groups={self.groups},\n'
        s += f'deform_groups={self.deform_groups},\n'
        # bias is not supported in DeformConv2d.
        s += 'bias=False)'
        return s


@CONV_LAYERS.register_module('DCN')
class DeformConv2dPack(DeformConv2d):
    """A Deformable Conv Encapsulation that acts as normal Conv layers.

    The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`.
    The spatial arrangement is like:

    .. code:: text

        (x0, y0) (x1, y1) (x2, y2)
        (x3, y3) (x4, y4) (x5, y5)
        (x6, y6) (x7, y7) (x8, y8)

    Args:
        in_channels (int): Same as nn.Conv2d.
        out_channels (int): Same as nn.Conv2d.
        kernel_size (int or tuple[int]): Same as nn.Conv2d.
        stride (int or tuple[int]): Same as nn.Conv2d.
        padding (int or tuple[int]): Same as nn.Conv2d.
        dilation (int or tuple[int]): Same as nn.Conv2d.
        groups (int): Same as nn.Conv2d.
        bias (bool or str): If specified as `auto`, it will be decided by the
            norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
            False.
    """

    _version = 2

    def __init__(self, *args, **kwargs):
        super(DeformConv2dPack, self).__init__(*args, **kwargs)
        self.conv_offset = nn.Conv2d(
            self.in_channels,
            self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1],
            kernel_size=self.kernel_size,
            stride=_pair(self.stride),
            padding=_pair(self.padding),
            dilation=_pair(self.dilation),
            bias=True)
        self.init_offset()

    def init_offset(self):
        self.conv_offset.weight.data.zero_()
        self.conv_offset.bias.data.zero_()

    def forward(self, x):
        offset = self.conv_offset(x)
        return deform_conv2d(x, offset, self.weight, self.stride, self.padding,
                             self.dilation, self.groups, self.deform_groups,
                             False, self.im2col_step)

    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
                              missing_keys, unexpected_keys, error_msgs):
        version = local_metadata.get('version', None)

        if version is None or version < 2:
            # the key is different in early versions
            # In version < 2, DeformConvPack loads previous benchmark models.
            if (prefix + 'conv_offset.weight' not in state_dict
                    and prefix[:-1] + '_offset.weight' in state_dict):
                state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
                    prefix[:-1] + '_offset.weight')
            if (prefix + 'conv_offset.bias' not in state_dict
                    and prefix[:-1] + '_offset.bias' in state_dict):
                state_dict[prefix +
                           'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
                                                                '_offset.bias')

        if version is not None and version > 1:
            print_log(
                f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to '
                'version 2.',
                logger='root')

        super()._load_from_state_dict(state_dict, prefix, local_metadata,
                                      strict, missing_keys, unexpected_keys,
                                      error_msgs)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/deform_roi_pool.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from torch import nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward'])


class DeformRoIPoolFunction(Function):

    @staticmethod
    def symbolic(g, input, rois, offset, output_size, spatial_scale,
                 sampling_ratio, gamma):
        return g.op(
            'mmcv::MMCVDeformRoIPool',
            input,
            rois,
            offset,
            pooled_height_i=output_size[0],
            pooled_width_i=output_size[1],
            spatial_scale_f=spatial_scale,
            sampling_ratio_f=sampling_ratio,
            gamma_f=gamma)

    @staticmethod
    def forward(ctx,
                input,
                rois,
                offset,
                output_size,
                spatial_scale=1.0,
                sampling_ratio=0,
                gamma=0.1):
        if offset is None:
            offset = input.new_zeros(0)
        ctx.output_size = _pair(output_size)
        ctx.spatial_scale = float(spatial_scale)
        ctx.sampling_ratio = int(sampling_ratio)
        ctx.gamma = float(gamma)

        assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'

        output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
                        ctx.output_size[1])
        output = input.new_zeros(output_shape)

        ext_module.deform_roi_pool_forward(
            input,
            rois,
            offset,
            output,
            pooled_height=ctx.output_size[0],
            pooled_width=ctx.output_size[1],
            spatial_scale=ctx.spatial_scale,
            sampling_ratio=ctx.sampling_ratio,
            gamma=ctx.gamma)

        ctx.save_for_backward(input, rois, offset)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        input, rois, offset = ctx.saved_tensors
        grad_input = grad_output.new_zeros(input.shape)
        grad_offset = grad_output.new_zeros(offset.shape)

        ext_module.deform_roi_pool_backward(
            grad_output,
            input,
            rois,
            offset,
            grad_input,
            grad_offset,
            pooled_height=ctx.output_size[0],
            pooled_width=ctx.output_size[1],
            spatial_scale=ctx.spatial_scale,
            sampling_ratio=ctx.sampling_ratio,
            gamma=ctx.gamma)
        if grad_offset.numel() == 0:
            grad_offset = None
        return grad_input, None, grad_offset, None, None, None, None


deform_roi_pool = DeformRoIPoolFunction.apply


class DeformRoIPool(nn.Module):

    def __init__(self,
                 output_size,
                 spatial_scale=1.0,
                 sampling_ratio=0,
                 gamma=0.1):
        super(DeformRoIPool, self).__init__()
        self.output_size = _pair(output_size)
        self.spatial_scale = float(spatial_scale)
        self.sampling_ratio = int(sampling_ratio)
        self.gamma = float(gamma)

    def forward(self, input, rois, offset=None):
        return deform_roi_pool(input, rois, offset, self.output_size,
                               self.spatial_scale, self.sampling_ratio,
                               self.gamma)


class DeformRoIPoolPack(DeformRoIPool):

    def __init__(self,
                 output_size,
                 output_channels,
                 deform_fc_channels=1024,
                 spatial_scale=1.0,
                 sampling_ratio=0,
                 gamma=0.1):
        super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale,
                                                sampling_ratio, gamma)

        self.output_channels = output_channels
        self.deform_fc_channels = deform_fc_channels

        self.offset_fc = nn.Sequential(
            nn.Linear(
                self.output_size[0] * self.output_size[1] *
                self.output_channels, self.deform_fc_channels),
            nn.ReLU(inplace=True),
            nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
            nn.ReLU(inplace=True),
            nn.Linear(self.deform_fc_channels,
                      self.output_size[0] * self.output_size[1] * 2))
        self.offset_fc[-1].weight.data.zero_()
        self.offset_fc[-1].bias.data.zero_()

    def forward(self, input, rois):
        assert input.size(1) == self.output_channels
        x = deform_roi_pool(input, rois, None, self.output_size,
                            self.spatial_scale, self.sampling_ratio,
                            self.gamma)
        rois_num = rois.size(0)
        offset = self.offset_fc(x.view(rois_num, -1))
        offset = offset.view(rois_num, 2, self.output_size[0],
                             self.output_size[1])
        return deform_roi_pool(input, rois, offset, self.output_size,
                               self.spatial_scale, self.sampling_ratio,
                               self.gamma)


class ModulatedDeformRoIPoolPack(DeformRoIPool):

    def __init__(self,
                 output_size,
                 output_channels,
                 deform_fc_channels=1024,
                 spatial_scale=1.0,
                 sampling_ratio=0,
                 gamma=0.1):
        super(ModulatedDeformRoIPoolPack,
              self).__init__(output_size, spatial_scale, sampling_ratio, gamma)

        self.output_channels = output_channels
        self.deform_fc_channels = deform_fc_channels

        self.offset_fc = nn.Sequential(
            nn.Linear(
                self.output_size[0] * self.output_size[1] *
                self.output_channels, self.deform_fc_channels),
            nn.ReLU(inplace=True),
            nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
            nn.ReLU(inplace=True),
            nn.Linear(self.deform_fc_channels,
                      self.output_size[0] * self.output_size[1] * 2))
        self.offset_fc[-1].weight.data.zero_()
        self.offset_fc[-1].bias.data.zero_()

        self.mask_fc = nn.Sequential(
            nn.Linear(
                self.output_size[0] * self.output_size[1] *
                self.output_channels, self.deform_fc_channels),
            nn.ReLU(inplace=True),
            nn.Linear(self.deform_fc_channels,
                      self.output_size[0] * self.output_size[1] * 1),
            nn.Sigmoid())
        self.mask_fc[2].weight.data.zero_()
        self.mask_fc[2].bias.data.zero_()

    def forward(self, input, rois):
        assert input.size(1) == self.output_channels
        x = deform_roi_pool(input, rois, None, self.output_size,
                            self.spatial_scale, self.sampling_ratio,
                            self.gamma)
        rois_num = rois.size(0)
        offset = self.offset_fc(x.view(rois_num, -1))
        offset = offset.view(rois_num, 2, self.output_size[0],
                             self.output_size[1])
        mask = self.mask_fc(x.view(rois_num, -1))
        mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1])
        d = deform_roi_pool(input, rois, offset, self.output_size,
                            self.spatial_scale, self.sampling_ratio,
                            self.gamma)
        return d * mask


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/deprecated_wrappers.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
# This file is for backward compatibility.
# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks.
import warnings

from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d


class Conv2d_deprecated(Conv2d):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        warnings.warn(
            'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in'
            ' the future. Please import them from "mmcv.cnn" instead',
            DeprecationWarning)


class ConvTranspose2d_deprecated(ConvTranspose2d):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        warnings.warn(
            'Importing ConvTranspose2d wrapper from "mmcv.ops" will be '
            'deprecated in the future. Please import them from "mmcv.cnn" '
            'instead', DeprecationWarning)


class MaxPool2d_deprecated(MaxPool2d):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        warnings.warn(
            'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in'
            ' the future. Please import them from "mmcv.cnn" instead',
            DeprecationWarning)


class Linear_deprecated(Linear):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        warnings.warn(
            'Importing Linear wrapper from "mmcv.ops" will be deprecated in'
            ' the future. Please import them from "mmcv.cnn" instead',
            DeprecationWarning)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/focal_loss.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', [
    'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward',
    'softmax_focal_loss_forward', 'softmax_focal_loss_backward'
])


class SigmoidFocalLossFunction(Function):

    @staticmethod
    def symbolic(g, input, target, gamma, alpha, weight, reduction):
        return g.op(
            'mmcv::MMCVSigmoidFocalLoss',
            input,
            target,
            gamma_f=gamma,
            alpha_f=alpha,
            weight_f=weight,
            reduction_s=reduction)

    @staticmethod
    def forward(ctx,
                input,
                target,
                gamma=2.0,
                alpha=0.25,
                weight=None,
                reduction='mean'):

        assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
        assert input.dim() == 2
        assert target.dim() == 1
        assert input.size(0) == target.size(0)
        if weight is None:
            weight = input.new_empty(0)
        else:
            assert weight.dim() == 1
            assert input.size(1) == weight.size(0)
        ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
        assert reduction in ctx.reduction_dict.keys()

        ctx.gamma = float(gamma)
        ctx.alpha = float(alpha)
        ctx.reduction = ctx.reduction_dict[reduction]

        output = input.new_zeros(input.size())

        ext_module.sigmoid_focal_loss_forward(
            input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha)
        if ctx.reduction == ctx.reduction_dict['mean']:
            output = output.sum() / input.size(0)
        elif ctx.reduction == ctx.reduction_dict['sum']:
            output = output.sum()
        ctx.save_for_backward(input, target, weight)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        input, target, weight = ctx.saved_tensors

        grad_input = input.new_zeros(input.size())

        ext_module.sigmoid_focal_loss_backward(
            input,
            target,
            weight,
            grad_input,
            gamma=ctx.gamma,
            alpha=ctx.alpha)

        grad_input *= grad_output
        if ctx.reduction == ctx.reduction_dict['mean']:
            grad_input /= input.size(0)
        return grad_input, None, None, None, None, None


sigmoid_focal_loss = SigmoidFocalLossFunction.apply


class SigmoidFocalLoss(nn.Module):

    def __init__(self, gamma, alpha, weight=None, reduction='mean'):
        super(SigmoidFocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.register_buffer('weight', weight)
        self.reduction = reduction

    def forward(self, input, target):
        return sigmoid_focal_loss(input, target, self.gamma, self.alpha,
                                  self.weight, self.reduction)

    def __repr__(self):
        s = self.__class__.__name__
        s += f'(gamma={self.gamma}, '
        s += f'alpha={self.alpha}, '
        s += f'reduction={self.reduction})'
        return s


class SoftmaxFocalLossFunction(Function):

    @staticmethod
    def symbolic(g, input, target, gamma, alpha, weight, reduction):
        return g.op(
            'mmcv::MMCVSoftmaxFocalLoss',
            input,
            target,
            gamma_f=gamma,
            alpha_f=alpha,
            weight_f=weight,
            reduction_s=reduction)

    @staticmethod
    def forward(ctx,
                input,
                target,
                gamma=2.0,
                alpha=0.25,
                weight=None,
                reduction='mean'):

        assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
        assert input.dim() == 2
        assert target.dim() == 1
        assert input.size(0) == target.size(0)
        if weight is None:
            weight = input.new_empty(0)
        else:
            assert weight.dim() == 1
            assert input.size(1) == weight.size(0)
        ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
        assert reduction in ctx.reduction_dict.keys()

        ctx.gamma = float(gamma)
        ctx.alpha = float(alpha)
        ctx.reduction = ctx.reduction_dict[reduction]

        channel_stats, _ = torch.max(input, dim=1)
        input_softmax = input - channel_stats.unsqueeze(1).expand_as(input)
        input_softmax.exp_()

        channel_stats = input_softmax.sum(dim=1)
        input_softmax /= channel_stats.unsqueeze(1).expand_as(input)

        output = input.new_zeros(input.size(0))
        ext_module.softmax_focal_loss_forward(
            input_softmax,
            target,
            weight,
            output,
            gamma=ctx.gamma,
            alpha=ctx.alpha)

        if ctx.reduction == ctx.reduction_dict['mean']:
            output = output.sum() / input.size(0)
        elif ctx.reduction == ctx.reduction_dict['sum']:
            output = output.sum()
        ctx.save_for_backward(input_softmax, target, weight)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        input_softmax, target, weight = ctx.saved_tensors
        buff = input_softmax.new_zeros(input_softmax.size(0))
        grad_input = input_softmax.new_zeros(input_softmax.size())

        ext_module.softmax_focal_loss_backward(
            input_softmax,
            target,
            weight,
            buff,
            grad_input,
            gamma=ctx.gamma,
            alpha=ctx.alpha)

        grad_input *= grad_output
        if ctx.reduction == ctx.reduction_dict['mean']:
            grad_input /= input_softmax.size(0)
        return grad_input, None, None, None, None, None


softmax_focal_loss = SoftmaxFocalLossFunction.apply


class SoftmaxFocalLoss(nn.Module):

    def __init__(self, gamma, alpha, weight=None, reduction='mean'):
        super(SoftmaxFocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.register_buffer('weight', weight)
        self.reduction = reduction

    def forward(self, input, target):
        return softmax_focal_loss(input, target, self.gamma, self.alpha,
                                  self.weight, self.reduction)

    def __repr__(self):
        s = self.__class__.__name__
        s += f'(gamma={self.gamma}, '
        s += f'alpha={self.alpha}, '
        s += f'reduction={self.reduction})'
        return s


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/furthest_point_sample.py
================================================
import torch
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', [
    'furthest_point_sampling_forward',
    'furthest_point_sampling_with_dist_forward'
])


class FurthestPointSampling(Function):
    """Uses iterative furthest point sampling to select a set of features whose
    corresponding points have the furthest distance."""

    @staticmethod
    def forward(ctx, points_xyz: torch.Tensor,
                num_points: int) -> torch.Tensor:
        """
        Args:
            points_xyz (torch.Tensor): (B, N, 3) where N > num_points.
            num_points (int): Number of points in the sampled set.

        Returns:
            torch.Tensor: (B, num_points) indices of the sampled points.
        """
        assert points_xyz.is_contiguous()

        B, N = points_xyz.size()[:2]
        output = torch.cuda.IntTensor(B, num_points)
        temp = torch.cuda.FloatTensor(B, N).fill_(1e10)

        ext_module.furthest_point_sampling_forward(
            points_xyz,
            temp,
            output,
            b=B,
            n=N,
            m=num_points,
        )
        if torch.__version__ != 'parrots':
            ctx.mark_non_differentiable(output)
        return output

    @staticmethod
    def backward(xyz, a=None):
        return None, None


class FurthestPointSamplingWithDist(Function):
    """Uses iterative furthest point sampling to select a set of features whose
    corresponding points have the furthest distance."""

    @staticmethod
    def forward(ctx, points_dist: torch.Tensor,
                num_points: int) -> torch.Tensor:
        """
        Args:
            points_dist (torch.Tensor): (B, N, N) Distance between each point
                pair.
            num_points (int): Number of points in the sampled set.

        Returns:
            torch.Tensor: (B, num_points) indices of the sampled points.
        """
        assert points_dist.is_contiguous()

        B, N, _ = points_dist.size()
        output = points_dist.new_zeros([B, num_points], dtype=torch.int32)
        temp = points_dist.new_zeros([B, N]).fill_(1e10)

        ext_module.furthest_point_sampling_with_dist_forward(
            points_dist, temp, output, b=B, n=N, m=num_points)
        if torch.__version__ != 'parrots':
            ctx.mark_non_differentiable(output)
        return output

    @staticmethod
    def backward(xyz, a=None):
        return None, None


furthest_point_sample = FurthestPointSampling.apply
furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/fused_bias_leakyrelu.py
================================================
# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501

# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
# Augmentation (ADA)
# =======================================================================

# 1. Definitions

# "Licensor" means any person or entity that distributes its Work.

# "Software" means the original work of authorship made available under
# this License.

# "Work" means the Software and any additions to or derivative works of
# the Software that are made available under this License.

# The terms "reproduce," "reproduction," "derivative works," and
# "distribution" have the meaning as provided under U.S. copyright law;
# provided, however, that for the purposes of this License, derivative
# works shall not include works that remain separable from, or merely
# link (or bind by name) to the interfaces of, the Work.

# Works, including the Software, are "made available" under this License
# by including in or with the Work either (a) a copyright notice
# referencing the applicability of this License to the Work, or (b) a
# copy of this License.

# 2. License Grants

#     2.1 Copyright Grant. Subject to the terms and conditions of this
#     License, each Licensor grants to you a perpetual, worldwide,
#     non-exclusive, royalty-free, copyright license to reproduce,
#     prepare derivative works of, publicly display, publicly perform,
#     sublicense and distribute its Work and any resulting derivative
#     works in any form.

# 3. Limitations

#     3.1 Redistribution. You may reproduce or distribute the Work only
#     if (a) you do so under this License, (b) you include a complete
#     copy of this License with your distribution, and (c) you retain
#     without modification any copyright, patent, trademark, or
#     attribution notices that are present in the Work.

#     3.2 Derivative Works. You may specify that additional or different
#     terms apply to the use, reproduction, and distribution of your
#     derivative works of the Work ("Your Terms") only if (a) Your Terms
#     provide that the use limitation in Section 3.3 applies to your
#     derivative works, and (b) you identify the specific derivative
#     works that are subject to Your Terms. Notwithstanding Your Terms,
#     this License (including the redistribution requirements in Section
#     3.1) will continue to apply to the Work itself.

#     3.3 Use Limitation. The Work and any derivative works thereof only
#     may be used or intended for use non-commercially. Notwithstanding
#     the foregoing, NVIDIA and its affiliates may use the Work and any
#     derivative works commercially. As used herein, "non-commercially"
#     means for research or evaluation purposes only.

#     3.4 Patent Claims. If you bring or threaten to bring a patent claim
#     against any Licensor (including any claim, cross-claim or
#     counterclaim in a lawsuit) to enforce any patents that you allege
#     are infringed by any Work, then your rights under this License from
#     such Licensor (including the grant in Section 2.1) will terminate
#     immediately.

#     3.5 Trademarks. This License does not grant any rights to use any
#     Licensor’s or its affiliates’ names, logos, or trademarks, except
#     as necessary to reproduce the notices described in this License.

#     3.6 Termination. If you violate any term of this License, then your
#     rights under this License (including the grant in Section 2.1) will
#     terminate immediately.

# 4. Disclaimer of Warranty.

# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
# THIS LICENSE.

# 5. Limitation of Liability.

# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGES.

# =======================================================================

import torch
import torch.nn.functional as F
from torch import nn
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['fused_bias_leakyrelu'])


class FusedBiasLeakyReLUFunctionBackward(Function):
    """Calculate second order deviation.

    This function is to compute the second order deviation for the fused leaky
    relu operation.
    """

    @staticmethod
    def forward(ctx, grad_output, out, negative_slope, scale):
        ctx.save_for_backward(out)
        ctx.negative_slope = negative_slope
        ctx.scale = scale

        empty = grad_output.new_empty(0)

        grad_input = ext_module.fused_bias_leakyrelu(
            grad_output,
            empty,
            out,
            act=3,
            grad=1,
            alpha=negative_slope,
            scale=scale)

        dim = [0]

        if grad_input.ndim > 2:
            dim += list(range(2, grad_input.ndim))

        grad_bias = grad_input.sum(dim).detach()

        return grad_input, grad_bias

    @staticmethod
    def backward(ctx, gradgrad_input, gradgrad_bias):
        out, = ctx.saved_tensors

        # The second order deviation, in fact, contains two parts, while the
        # the first part is zero. Thus, we direct consider the second part
        # which is similar with the first order deviation in implementation.
        gradgrad_out = ext_module.fused_bias_leakyrelu(
            gradgrad_input,
            gradgrad_bias.to(out.dtype),
            out,
            act=3,
            grad=1,
            alpha=ctx.negative_slope,
            scale=ctx.scale)

        return gradgrad_out, None, None, None


class FusedBiasLeakyReLUFunction(Function):

    @staticmethod
    def forward(ctx, input, bias, negative_slope, scale):
        empty = input.new_empty(0)

        out = ext_module.fused_bias_leakyrelu(
            input,
            bias,
            empty,
            act=3,
            grad=0,
            alpha=negative_slope,
            scale=scale)
        ctx.save_for_backward(out)
        ctx.negative_slope = negative_slope
        ctx.scale = scale

        return out

    @staticmethod
    def backward(ctx, grad_output):
        out, = ctx.saved_tensors

        grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply(
            grad_output, out, ctx.negative_slope, ctx.scale)

        return grad_input, grad_bias, None, None


class FusedBiasLeakyReLU(nn.Module):
    r"""Fused bias leaky ReLU.

    This function is introduced in the StyleGAN2:
    `Analyzing and Improving the Image Quality of StyleGAN
    <http://arxiv.org/abs/1912.04958>`_

    The bias term comes from the convolution operation. In addition, to keep
    the variance of the feature map or gradients unchanged, they also adopt a
    scale similarly with Kaiming initialization. However, since the
    :math:`1+{alpha}^2` is too small, we can just ignore it. Therefore, the
    final scale is just :math:`\sqrt{2}`. Of course, you may change it with
    your own scale.

    TODO: Implement the CPU version.

    Args:
        channel (int): The channel number of the feature map.
        negative_slope (float, optional): Same as nn.LeakyRelu.
            Defaults to 0.2.
        scale (float, optional): A scalar to adjust the variance of the feature
            map. Defaults to 2**0.5.
    """

    def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5):
        super(FusedBiasLeakyReLU, self).__init__()

        self.bias = nn.Parameter(torch.zeros(num_channels))
        self.negative_slope = negative_slope
        self.scale = scale

    def forward(self, input):
        return fused_bias_leakyrelu(input, self.bias, self.negative_slope,
                                    self.scale)


def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5):
    r"""Fused bias leaky ReLU function.

    This function is introduced in the StyleGAN2:
    `Analyzing and Improving the Image Quality of StyleGAN
    <http://arxiv.org/abs/1912.04958>`_

    The bias term comes from the convolution operation. In addition, to keep
    the variance of the feature map or gradients unchanged, they also adopt a
    scale similarly with Kaiming initialization. However, since the
    :math:`1+{alpha}^2` is too small, we can just ignore it. Therefore, the
    final scale is just :math:`\sqrt{2}`. Of course, you may change it with
    your own scale.

    Args:
        input (torch.Tensor): Input feature map.
        bias (nn.Parameter): The bias from convolution operation.
        negative_slope (float, optional): Same as nn.LeakyRelu.
            Defaults to 0.2.
        scale (float, optional): A scalar to adjust the variance of the feature
            map. Defaults to 2**0.5.

    Returns:
        torch.Tensor: Feature map after non-linear activation.
    """

    if not input.is_cuda:
        return bias_leakyrelu_ref(input, bias, negative_slope, scale)

    return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype),
                                            negative_slope, scale)


def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5):

    if bias is not None:
        assert bias.ndim == 1
        assert bias.shape[0] == x.shape[1]
        x = x + bias.reshape([-1 if i == 1 else 1 for i in range(x.ndim)])

    x = F.leaky_relu(x, negative_slope)
    if scale != 1:
        x = x * scale

    return x


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/gather_points.py
================================================
import torch
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['gather_points_forward', 'gather_points_backward'])


class GatherPoints(Function):
    """Gather points with given index."""

    @staticmethod
    def forward(ctx, features: torch.Tensor,
                indices: torch.Tensor) -> torch.Tensor:
        """
        Args:
            features (torch.Tensor): (B, C, N) features to gather.
            indices (torch.Tensor): (B, M) where M is the number of points.

        Returns:
            torch.Tensor: (B, C, M) where M is the number of points.
        """
        assert features.is_contiguous()
        assert indices.is_contiguous()

        B, npoint = indices.size()
        _, C, N = features.size()
        output = torch.cuda.FloatTensor(B, C, npoint)

        ext_module.gather_points_forward(
            features, indices, output, b=B, c=C, n=N, npoints=npoint)

        ctx.for_backwards = (indices, C, N)
        if torch.__version__ != 'parrots':
            ctx.mark_non_differentiable(indices)
        return output

    @staticmethod
    def backward(ctx, grad_out):
        idx, C, N = ctx.for_backwards
        B, npoint = idx.size()

        grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
        grad_out_data = grad_out.data.contiguous()
        ext_module.gather_points_backward(
            grad_out_data,
            idx,
            grad_features.data,
            b=B,
            c=C,
            n=N,
            npoints=npoint)
        return grad_features, None


gather_points = GatherPoints.apply


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/group_points.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple

import torch
from torch import nn as nn
from torch.autograd import Function

from ..utils import ext_loader
from .ball_query import ball_query
from .knn import knn

ext_module = ext_loader.load_ext(
    '_ext', ['group_points_forward', 'group_points_backward'])


class QueryAndGroup(nn.Module):
    """Groups points with a ball query of radius.

    Args:
        max_radius (float): The maximum radius of the balls.
            If None is given, we will use kNN sampling instead of ball query.
        sample_num (int): Maximum number of features to gather in the ball.
        min_radius (float, optional): The minimum radius of the balls.
            Default: 0.
        use_xyz (bool, optional): Whether to use xyz.
            Default: True.
        return_grouped_xyz (bool, optional): Whether to return grouped xyz.
            Default: False.
        normalize_xyz (bool, optional): Whether to normalize xyz.
            Default: False.
        uniform_sample (bool, optional): Whether to sample uniformly.
            Default: False
        return_unique_cnt (bool, optional): Whether to return the count of
            unique samples. Default: False.
        return_grouped_idx (bool, optional): Whether to return grouped idx.
            Default: False.
    """

    def __init__(self,
                 max_radius,
                 sample_num,
                 min_radius=0,
                 use_xyz=True,
                 return_grouped_xyz=False,
                 normalize_xyz=False,
                 uniform_sample=False,
                 return_unique_cnt=False,
                 return_grouped_idx=False):
        super().__init__()
        self.max_radius = max_radius
        self.min_radius = min_radius
        self.sample_num = sample_num
        self.use_xyz = use_xyz
        self.return_grouped_xyz = return_grouped_xyz
        self.normalize_xyz = normalize_xyz
        self.uniform_sample = uniform_sample
        self.return_unique_cnt = return_unique_cnt
        self.return_grouped_idx = return_grouped_idx
        if self.return_unique_cnt:
            assert self.uniform_sample, \
                'uniform_sample should be True when ' \
                'returning the count of unique samples'
        if self.max_radius is None:
            assert not self.normalize_xyz, \
                'can not normalize grouped xyz when max_radius is None'

    def forward(self, points_xyz, center_xyz, features=None):
        """
        Args:
            points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of the
                points.
            center_xyz (torch.Tensor): (B, npoint, 3) coordinates of the
                centriods.
            features (torch.Tensor): (B, C, N) The features of grouped
                points.

        Returns:
            torch.Tensor: (B, 3 + C, npoint, sample_num) Grouped
            concatenated coordinates and features of points.
        """
        # if self.max_radius is None, we will perform kNN instead of ball query
        # idx is of shape [B, npoint, sample_num]
        if self.max_radius is None:
            idx = knn(self.sample_num, points_xyz, center_xyz, False)
            idx = idx.transpose(1, 2).contiguous()
        else:
            idx = ball_query(self.min_radius, self.max_radius, self.sample_num,
                             points_xyz, center_xyz)

        if self.uniform_sample:
            unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))
            for i_batch in range(idx.shape[0]):
                for i_region in range(idx.shape[1]):
                    unique_ind = torch.unique(idx[i_batch, i_region, :])
                    num_unique = unique_ind.shape[0]
                    unique_cnt[i_batch, i_region] = num_unique
                    sample_ind = torch.randint(
                        0,
                        num_unique, (self.sample_num - num_unique, ),
                        dtype=torch.long)
                    all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))
                    idx[i_batch, i_region, :] = all_ind

        xyz_trans = points_xyz.transpose(1, 2).contiguous()
        # (B, 3, npoint, sample_num)
        grouped_xyz = grouping_operation(xyz_trans, idx)
        grouped_xyz_diff = grouped_xyz - \
            center_xyz.transpose(1, 2).unsqueeze(-1)  # relative offsets
        if self.normalize_xyz:
            grouped_xyz_diff /= self.max_radius

        if features is not None:
            grouped_features = grouping_operation(features, idx)
            if self.use_xyz:
                # (B, C + 3, npoint, sample_num)
                new_features = torch.cat([grouped_xyz_diff, grouped_features],
                                         dim=1)
            else:
                new_features = grouped_features
        else:
            assert (self.use_xyz
                    ), 'Cannot have not features and not use xyz as a feature!'
            new_features = grouped_xyz_diff

        ret = [new_features]
        if self.return_grouped_xyz:
            ret.append(grouped_xyz)
        if self.return_unique_cnt:
            ret.append(unique_cnt)
        if self.return_grouped_idx:
            ret.append(idx)
        if len(ret) == 1:
            return ret[0]
        else:
            return tuple(ret)


class GroupAll(nn.Module):
    """Group xyz with feature.

    Args:
        use_xyz (bool): Whether to use xyz.
    """

    def __init__(self, use_xyz: bool = True):
        super().__init__()
        self.use_xyz = use_xyz

    def forward(self,
                xyz: torch.Tensor,
                new_xyz: torch.Tensor,
                features: torch.Tensor = None):
        """
        Args:
            xyz (Tensor): (B, N, 3) xyz coordinates of the features.
            new_xyz (Tensor): new xyz coordinates of the features.
            features (Tensor): (B, C, N) features to group.

        Returns:
            Tensor: (B, C + 3, 1, N) Grouped feature.
        """
        grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
        if features is not None:
            grouped_features = features.unsqueeze(2)
            if self.use_xyz:
                # (B, 3 + C, 1, N)
                new_features = torch.cat([grouped_xyz, grouped_features],
                                         dim=1)
            else:
                new_features = grouped_features
        else:
            new_features = grouped_xyz

        return new_features


class GroupingOperation(Function):
    """Group feature with given index."""

    @staticmethod
    def forward(ctx, features: torch.Tensor,
                indices: torch.Tensor) -> torch.Tensor:
        """
        Args:
            features (Tensor): (B, C, N) tensor of features to group.
            indices (Tensor): (B, npoint, nsample) the indices of
                features to group with.

        Returns:
            Tensor: (B, C, npoint, nsample) Grouped features.
        """
        features = features.contiguous()
        indices = indices.contiguous()

        B, nfeatures, nsample = indices.size()
        _, C, N = features.size()
        output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)

        ext_module.group_points_forward(
            features,
            indices,
            output,
            b=B,
            c=C,
            n=N,
            npoints=nfeatures,
            nsample=nsample)

        ctx.for_backwards = (indices, N)
        return output

    @staticmethod
    def backward(ctx,
                 grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Args:
            grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients
                of the output from forward.

        Returns:
            Tensor: (B, C, N) gradient of the features.
        """
        idx, N = ctx.for_backwards

        B, C, npoint, nsample = grad_out.size()
        grad_features = torch.cuda.FloatTensor(B, C, N).zero_()

        grad_out_data = grad_out.data.contiguous()
        ext_module.group_points_backward(
            grad_out_data,
            idx,
            grad_features.data,
            b=B,
            c=C,
            n=N,
            npoints=npoint,
            nsample=nsample)
        return grad_features, None


grouping_operation = GroupingOperation.apply


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/info.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import glob
import os

import torch

if torch.__version__ == 'parrots':
    import parrots

    def get_compiler_version():
        return 'GCC ' + parrots.version.compiler

    def get_compiling_cuda_version():
        return parrots.version.cuda
else:
    from ..utils import ext_loader
    ext_module = ext_loader.load_ext(
        '_ext', ['get_compiler_version', 'get_compiling_cuda_version'])

    def get_compiler_version():
        return ext_module.get_compiler_version()

    def get_compiling_cuda_version():
        return ext_module.get_compiling_cuda_version()


def get_onnxruntime_op_path():
    wildcard = os.path.join(
        os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
        '_ext_ort.*.so')

    paths = glob.glob(wildcard)
    if len(paths) > 0:
        return paths[0]
    else:
        return ''


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/iou3d.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', [
    'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward',
    'iou3d_nms_normal_forward'
])


def boxes_iou_bev(boxes_a, boxes_b):
    """Calculate boxes IoU in the Bird's Eye View.

    Args:
        boxes_a (torch.Tensor): Input boxes a with shape (M, 5).
        boxes_b (torch.Tensor): Input boxes b with shape (N, 5).

    Returns:
        torch.Tensor: IoU result with shape (M, N).
    """
    ans_iou = boxes_a.new_zeros(
        torch.Size((boxes_a.shape[0], boxes_b.shape[0])))

    ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(),
                                           boxes_b.contiguous(), ans_iou)

    return ans_iou


def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None):
    """NMS function GPU implementation (for BEV boxes). The overlap of two
    boxes for IoU calculation is defined as the exact overlapping area of the
    two boxes. In this function, one can also set ``pre_max_size`` and
    ``post_max_size``.

    Args:
        boxes (torch.Tensor): Input boxes with the shape of [N, 5]
            ([x1, y1, x2, y2, ry]).
        scores (torch.Tensor): Scores of boxes with the shape of [N].
        thresh (float): Overlap threshold of NMS.
        pre_max_size (int, optional): Max size of boxes before NMS.
            Default: None.
        post_max_size (int, optional): Max size of boxes after NMS.
            Default: None.

    Returns:
        torch.Tensor: Indexes after NMS.
    """
    assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]'
    order = scores.sort(0, descending=True)[1]

    if pre_max_size is not None:
        order = order[:pre_max_size]
    boxes = boxes[order].contiguous()

    keep = torch.zeros(boxes.size(0), dtype=torch.long)
    num_out = torch.zeros(size=(), dtype=torch.long)
    ext_module.iou3d_nms_forward(
        boxes, keep, num_out, nms_overlap_thresh=thresh)
    keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
    if post_max_size is not None:
        keep = keep[:post_max_size]
    return keep


def nms_normal_bev(boxes, scores, thresh):
    """Normal NMS function GPU implementation (for BEV boxes). The overlap of
    two boxes for IoU calculation is defined as the exact overlapping area of
    the two boxes WITH their yaw angle set to 0.

    Args:
        boxes (torch.Tensor): Input boxes with shape (N, 5).
        scores (torch.Tensor): Scores of predicted boxes with shape (N).
        thresh (float): Overlap threshold of NMS.

    Returns:
        torch.Tensor: Remaining indices with scores in descending order.
    """
    assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]'
    order = scores.sort(0, descending=True)[1]

    boxes = boxes[order].contiguous()

    keep = torch.zeros(boxes.size(0), dtype=torch.long)
    num_out = torch.zeros(size=(), dtype=torch.long)
    ext_module.iou3d_nms_normal_forward(
        boxes, keep, num_out, nms_overlap_thresh=thresh)
    return order[keep[:num_out].cuda(boxes.device)].contiguous()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/knn.py
================================================
import torch
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['knn_forward'])


class KNN(Function):
    r"""KNN (CUDA) based on heap data structure.

    Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
    scene_seg/lib/pointops/src/knnquery_heap>`_.

    Find k-nearest points.
    """

    @staticmethod
    def forward(ctx,
                k: int,
                xyz: torch.Tensor,
                center_xyz: torch.Tensor = None,
                transposed: bool = False) -> torch.Tensor:
        """
        Args:
            k (int): number of nearest neighbors.
            xyz (torch.Tensor): (B, N, 3) if transposed == False, else
                (B, 3, N). xyz coordinates of the features.
            center_xyz (torch.Tensor, optional): (B, npoint, 3) if transposed
                is False, else (B, 3, npoint). centers of the knn query.
                Default: None.
            transposed (bool, optional): whether the input tensors are
                transposed. Should not explicitly use this keyword when
                calling knn (=KNN.apply), just add the fourth param.
                Default: False.

        Returns:
            torch.Tensor: (B, k, npoint) tensor with the indices of the
            features that form k-nearest neighbours.
        """
        assert (k > 0) & (k < 100), 'k should be in range(0, 100)'

        if center_xyz is None:
            center_xyz = xyz

        if transposed:
            xyz = xyz.transpose(2, 1).contiguous()
            center_xyz = center_xyz.transpose(2, 1).contiguous()

        assert xyz.is_contiguous()  # [B, N, 3]
        assert center_xyz.is_contiguous()  # [B, npoint, 3]

        center_xyz_device = center_xyz.get_device()
        assert center_xyz_device == xyz.get_device(), \
            'center_xyz and xyz should be put on the same device'
        if torch.cuda.current_device() != center_xyz_device:
            torch.cuda.set_device(center_xyz_device)

        B, npoint, _ = center_xyz.shape
        N = xyz.shape[1]

        idx = center_xyz.new_zeros((B, npoint, k)).int()
        dist2 = center_xyz.new_zeros((B, npoint, k)).float()

        ext_module.knn_forward(
            xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k)
        # idx shape to [B, k, npoint]
        idx = idx.transpose(2, 1).contiguous()
        if torch.__version__ != 'parrots':
            ctx.mark_non_differentiable(idx)
        return idx

    @staticmethod
    def backward(ctx, a=None):
        return None, None, None


knn = KNN.apply


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/masked_conv.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import math

import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['masked_im2col_forward', 'masked_col2im_forward'])


class MaskedConv2dFunction(Function):

    @staticmethod
    def symbolic(g, features, mask, weight, bias, padding, stride):
        return g.op(
            'mmcv::MMCVMaskedConv2d',
            features,
            mask,
            weight,
            bias,
            padding_i=padding,
            stride_i=stride)

    @staticmethod
    def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
        assert mask.dim() == 3 and mask.size(0) == 1
        assert features.dim() == 4 and features.size(0) == 1
        assert features.size()[2:] == mask.size()[1:]
        pad_h, pad_w = _pair(padding)
        stride_h, stride_w = _pair(stride)
        if stride_h != 1 or stride_w != 1:
            raise ValueError(
                'Stride could not only be 1 in masked_conv2d currently.')
        out_channel, in_channel, kernel_h, kernel_w = weight.size()

        batch_size = features.size(0)
        out_h = int(
            math.floor((features.size(2) + 2 * pad_h -
                        (kernel_h - 1) - 1) / stride_h + 1))
        out_w = int(
            math.floor((features.size(3) + 2 * pad_w -
                        (kernel_h - 1) - 1) / stride_w + 1))
        mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False)
        output = features.new_zeros(batch_size, out_channel, out_h, out_w)
        if mask_inds.numel() > 0:
            mask_h_idx = mask_inds[:, 0].contiguous()
            mask_w_idx = mask_inds[:, 1].contiguous()
            data_col = features.new_zeros(in_channel * kernel_h * kernel_w,
                                          mask_inds.size(0))
            ext_module.masked_im2col_forward(
                features,
                mask_h_idx,
                mask_w_idx,
                data_col,
                kernel_h=kernel_h,
                kernel_w=kernel_w,
                pad_h=pad_h,
                pad_w=pad_w)
            masked_output = torch.addmm(1, bias[:, None], 1,
                                        weight.view(out_channel, -1), data_col)
            ext_module.masked_col2im_forward(
                masked_output,
                mask_h_idx,
                mask_w_idx,
                output,
                height=out_h,
                width=out_w,
                channels=out_channel)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        return (None, ) * 5


masked_conv2d = MaskedConv2dFunction.apply


class MaskedConv2d(nn.Conv2d):
    """A MaskedConv2d which inherits the official Conv2d.

    The masked forward doesn't implement the backward function and only
    supports the stride parameter to be 1 currently.
    """

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True):
        super(MaskedConv2d,
              self).__init__(in_channels, out_channels, kernel_size, stride,
                             padding, dilation, groups, bias)

    def forward(self, input, mask=None):
        if mask is None:  # fallback to the normal Conv2d
            return super(MaskedConv2d, self).forward(input)
        else:
            return masked_conv2d(input, mask, self.weight, self.bias,
                                 self.padding)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/merge_cells.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from abc import abstractmethod

import torch
import torch.nn as nn
import torch.nn.functional as F

from ..cnn import ConvModule


class BaseMergeCell(nn.Module):
    """The basic class for cells used in NAS-FPN and NAS-FCOS.

    BaseMergeCell takes 2 inputs. After applying convolution
    on them, they are resized to the target size. Then,
    they go through binary_op, which depends on the type of cell.
    If with_out_conv is True, the result of output will go through
    another convolution layer.

    Args:
        in_channels (int): number of input channels in out_conv layer.
        out_channels (int): number of output channels in out_conv layer.
        with_out_conv (bool): Whether to use out_conv layer
        out_conv_cfg (dict): Config dict for convolution layer, which should
            contain "groups", "kernel_size", "padding", "bias" to build
            out_conv layer.
        out_norm_cfg (dict): Config dict for normalization layer in out_conv.
        out_conv_order (tuple): The order of conv/norm/activation layers in
            out_conv.
        with_input1_conv (bool): Whether to use convolution on input1.
        with_input2_conv (bool): Whether to use convolution on input2.
        input_conv_cfg (dict): Config dict for building input1_conv layer and
            input2_conv layer, which is expected to contain the type of
            convolution.
            Default: None, which means using conv2d.
        input_norm_cfg (dict): Config dict for normalization layer in
            input1_conv and input2_conv layer. Default: None.
        upsample_mode (str): Interpolation method used to resize the output
            of input1_conv and input2_conv to target size. Currently, we
            support ['nearest', 'bilinear']. Default: 'nearest'.
    """

    def __init__(self,
                 fused_channels=256,
                 out_channels=256,
                 with_out_conv=True,
                 out_conv_cfg=dict(
                     groups=1, kernel_size=3, padding=1, bias=True),
                 out_norm_cfg=None,
                 out_conv_order=('act', 'conv', 'norm'),
                 with_input1_conv=False,
                 with_input2_conv=False,
                 input_conv_cfg=None,
                 input_norm_cfg=None,
                 upsample_mode='nearest'):
        super(BaseMergeCell, self).__init__()
        assert upsample_mode in ['nearest', 'bilinear']
        self.with_out_conv = with_out_conv
        self.with_input1_conv = with_input1_conv
        self.with_input2_conv = with_input2_conv
        self.upsample_mode = upsample_mode

        if self.with_out_conv:
            self.out_conv = ConvModule(
                fused_channels,
                out_channels,
                **out_conv_cfg,
                norm_cfg=out_norm_cfg,
                order=out_conv_order)

        self.input1_conv = self._build_input_conv(
            out_channels, input_conv_cfg,
            input_norm_cfg) if with_input1_conv else nn.Sequential()
        self.input2_conv = self._build_input_conv(
            out_channels, input_conv_cfg,
            input_norm_cfg) if with_input2_conv else nn.Sequential()

    def _build_input_conv(self, channel, conv_cfg, norm_cfg):
        return ConvModule(
            channel,
            channel,
            3,
            padding=1,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            bias=True)

    @abstractmethod
    def _binary_op(self, x1, x2):
        pass

    def _resize(self, x, size):
        if x.shape[-2:] == size:
            return x
        elif x.shape[-2:] < size:
            return F.interpolate(x, size=size, mode=self.upsample_mode)
        else:
            assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0
            kernel_size = x.shape[-1] // size[-1]
            x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size)
            return x

    def forward(self, x1, x2, out_size=None):
        assert x1.shape[:2] == x2.shape[:2]
        assert out_size is None or len(out_size) == 2
        if out_size is None:  # resize to larger one
            out_size = max(x1.size()[2:], x2.size()[2:])

        x1 = self.input1_conv(x1)
        x2 = self.input2_conv(x2)

        x1 = self._resize(x1, out_size)
        x2 = self._resize(x2, out_size)

        x = self._binary_op(x1, x2)
        if self.with_out_conv:
            x = self.out_conv(x)
        return x


class SumCell(BaseMergeCell):

    def __init__(self, in_channels, out_channels, **kwargs):
        super(SumCell, self).__init__(in_channels, out_channels, **kwargs)

    def _binary_op(self, x1, x2):
        return x1 + x2


class ConcatCell(BaseMergeCell):

    def __init__(self, in_channels, out_channels, **kwargs):
        super(ConcatCell, self).__init__(in_channels * 2, out_channels,
                                         **kwargs)

    def _binary_op(self, x1, x2):
        ret = torch.cat([x1, x2], dim=1)
        return ret


class GlobalPoolingCell(BaseMergeCell):

    def __init__(self, in_channels=None, out_channels=None, **kwargs):
        super().__init__(in_channels, out_channels, **kwargs)
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))

    def _binary_op(self, x1, x2):
        x2_att = self.global_pool(x2).sigmoid()
        return x2 + x2_att * x1


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/min_area_polygons.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['min_area_polygons'])


def min_area_polygons(pointsets):
    """Find the smallest polygons that surrounds all points in the point sets.

    Args:
        pointsets (Tensor): point sets with shape  (N, 18).

    Returns:
        torch.Tensor: Return the smallest polygons with shape (N, 8).
    """
    polygons = pointsets.new_zeros((pointsets.size(0), 8))
    ext_module.min_area_polygons(pointsets, polygons)
    return polygons


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/modulated_deform_conv.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import math

import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single

from mmcv.utils import deprecated_api_warning
from ..cnn import CONV_LAYERS
from ..utils import ext_loader, print_log

ext_module = ext_loader.load_ext(
    '_ext',
    ['modulated_deform_conv_forward', 'modulated_deform_conv_backward'])


class ModulatedDeformConv2dFunction(Function):

    @staticmethod
    def symbolic(g, input, offset, mask, weight, bias, stride, padding,
                 dilation, groups, deform_groups):
        input_tensors = [input, offset, mask, weight]
        if bias is not None:
            input_tensors.append(bias)
        return g.op(
            'mmcv::MMCVModulatedDeformConv2d',
            *input_tensors,
            stride_i=stride,
            padding_i=padding,
            dilation_i=dilation,
            groups_i=groups,
            deform_groups_i=deform_groups)

    @staticmethod
    def forward(ctx,
                input,
                offset,
                mask,
                weight,
                bias=None,
                stride=1,
                padding=0,
                dilation=1,
                groups=1,
                deform_groups=1):
        if input is not None and input.dim() != 4:
            raise ValueError(
                f'Expected 4D tensor as input, got {input.dim()}D tensor \
                  instead.')
        ctx.stride = _pair(stride)
        ctx.padding = _pair(padding)
        ctx.dilation = _pair(dilation)
        ctx.groups = groups
        ctx.deform_groups = deform_groups
        ctx.with_bias = bias is not None
        if not ctx.with_bias:
            bias = input.new_empty(0)  # fake tensor
        # When pytorch version >= 1.6.0, amp is adopted for fp16 mode;
        # amp won't cast the type of model (float32), but "offset" is cast
        # to float16 by nn.Conv2d automatically, leading to the type
        # mismatch with input (when it is float32) or weight.
        # The flag for whether to use fp16 or amp is the type of "offset",
        # we cast weight and input to temporarily support fp16 and amp
        # whatever the pytorch version is.
        input = input.type_as(offset)
        weight = weight.type_as(input)
        bias = bias.type_as(input)
        ctx.save_for_backward(input, offset, mask, weight, bias)
        output = input.new_empty(
            ModulatedDeformConv2dFunction._output_size(ctx, input, weight))
        ctx._bufs = [input.new_empty(0), input.new_empty(0)]
        ext_module.modulated_deform_conv_forward(
            input,
            weight,
            bias,
            ctx._bufs[0],
            offset,
            mask,
            output,
            ctx._bufs[1],
            kernel_h=weight.size(2),
            kernel_w=weight.size(3),
            stride_h=ctx.stride[0],
            stride_w=ctx.stride[1],
            pad_h=ctx.padding[0],
            pad_w=ctx.padding[1],
            dilation_h=ctx.dilation[0],
            dilation_w=ctx.dilation[1],
            group=ctx.groups,
            deformable_group=ctx.deform_groups,
            with_bias=ctx.with_bias)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        input, offset, mask, weight, bias = ctx.saved_tensors
        grad_input = torch.zeros_like(input)
        grad_offset = torch.zeros_like(offset)
        grad_mask = torch.zeros_like(mask)
        grad_weight = torch.zeros_like(weight)
        grad_bias = torch.zeros_like(bias)
        grad_output = grad_output.contiguous()
        ext_module.modulated_deform_conv_backward(
            input,
            weight,
            bias,
            ctx._bufs[0],
            offset,
            mask,
            ctx._bufs[1],
            grad_input,
            grad_weight,
            grad_bias,
            grad_offset,
            grad_mask,
            grad_output,
            kernel_h=weight.size(2),
            kernel_w=weight.size(3),
            stride_h=ctx.stride[0],
            stride_w=ctx.stride[1],
            pad_h=ctx.padding[0],
            pad_w=ctx.padding[1],
            dilation_h=ctx.dilation[0],
            dilation_w=ctx.dilation[1],
            group=ctx.groups,
            deformable_group=ctx.deform_groups,
            with_bias=ctx.with_bias)
        if not ctx.with_bias:
            grad_bias = None

        return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
                None, None, None, None, None)

    @staticmethod
    def _output_size(ctx, input, weight):
        channels = weight.size(0)
        output_size = (input.size(0), channels)
        for d in range(input.dim() - 2):
            in_size = input.size(d + 2)
            pad = ctx.padding[d]
            kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
            stride_ = ctx.stride[d]
            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
        if not all(map(lambda s: s > 0, output_size)):
            raise ValueError(
                'convolution input is too small (output would be ' +
                'x'.join(map(str, output_size)) + ')')
        return output_size


modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply


class ModulatedDeformConv2d(nn.Module):

    @deprecated_api_warning({'deformable_groups': 'deform_groups'},
                            cls_name='ModulatedDeformConv2d')
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 deform_groups=1,
                 bias=True):
        super(ModulatedDeformConv2d, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = _pair(kernel_size)
        self.stride = _pair(stride)
        self.padding = _pair(padding)
        self.dilation = _pair(dilation)
        self.groups = groups
        self.deform_groups = deform_groups
        # enable compatibility with nn.Conv2d
        self.transposed = False
        self.output_padding = _single(0)

        self.weight = nn.Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)
        self.init_weights()

    def init_weights(self):
        n = self.in_channels
        for k in self.kernel_size:
            n *= k
        stdv = 1. / math.sqrt(n)
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.zero_()

    def forward(self, x, offset, mask):
        return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
                                       self.stride, self.padding,
                                       self.dilation, self.groups,
                                       self.deform_groups)


@CONV_LAYERS.register_module('DCNv2')
class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
    """A ModulatedDeformable Conv Encapsulation that acts as normal Conv
    layers.

    Args:
        in_channels (int): Same as nn.Conv2d.
        out_channels (int): Same as nn.Conv2d.
        kernel_size (int or tuple[int]): Same as nn.Conv2d.
        stride (int): Same as nn.Conv2d, while tuple is not supported.
        padding (int): Same as nn.Conv2d, while tuple is not supported.
        dilation (int): Same as nn.Conv2d, while tuple is not supported.
        groups (int): Same as nn.Conv2d.
        bias (bool or str): If specified as `auto`, it will be decided by the
            norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
            False.
    """

    _version = 2

    def __init__(self, *args, **kwargs):
        super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs)
        self.conv_offset = nn.Conv2d(
            self.in_channels,
            self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
            kernel_size=self.kernel_size,
            stride=self.stride,
            padding=self.padding,
            dilation=self.dilation,
            bias=True)
        self.init_weights()

    def init_weights(self):
        super(ModulatedDeformConv2dPack, self).init_weights()
        if hasattr(self, 'conv_offset'):
            self.conv_offset.weight.data.zero_()
            self.conv_offset.bias.data.zero_()

    def forward(self, x):
        out = self.conv_offset(x)
        o1, o2, mask = torch.chunk(out, 3, dim=1)
        offset = torch.cat((o1, o2), dim=1)
        mask = torch.sigmoid(mask)
        return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
                                       self.stride, self.padding,
                                       self.dilation, self.groups,
                                       self.deform_groups)

    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
                              missing_keys, unexpected_keys, error_msgs):
        version = local_metadata.get('version', None)

        if version is None or version < 2:
            # the key is different in early versions
            # In version < 2, ModulatedDeformConvPack
            # loads previous benchmark models.
            if (prefix + 'conv_offset.weight' not in state_dict
                    and prefix[:-1] + '_offset.weight' in state_dict):
                state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
                    prefix[:-1] + '_offset.weight')
            if (prefix + 'conv_offset.bias' not in state_dict
                    and prefix[:-1] + '_offset.bias' in state_dict):
                state_dict[prefix +
                           'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
                                                                '_offset.bias')

        if version is not None and version > 1:
            print_log(
                f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to '
                'version 2.',
                logger='root')

        super()._load_from_state_dict(state_dict, prefix, local_metadata,
                                      strict, missing_keys, unexpected_keys,
                                      error_msgs)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/multi_scale_deform_attn.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import math
import warnings

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd.function import Function, once_differentiable

from mmcv import deprecated_api_warning
from mmcv.cnn import constant_init, xavier_init
from mmcv.cnn.bricks.registry import ATTENTION
from mmcv.runner import BaseModule
from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])


class MultiScaleDeformableAttnFunction(Function):

    @staticmethod
    def forward(ctx, value, value_spatial_shapes, value_level_start_index,
                sampling_locations, attention_weights, im2col_step):
        """GPU version of multi-scale deformable attention.

        Args:
            value (torch.Tensor): The value has shape
                (bs, num_keys, mum_heads, embed_dims//num_heads)
            value_spatial_shapes (torch.Tensor): Spatial shape of
                each feature map, has shape (num_levels, 2),
                last dimension 2 represent (h, w)
            sampling_locations (torch.Tensor): The location of sampling points,
                has shape
                (bs ,num_queries, num_heads, num_levels, num_points, 2),
                the last dimension 2 represent (x, y).
            attention_weights (torch.Tensor): The weight of sampling points
                used when calculate the attention, has shape
                (bs ,num_queries, num_heads, num_levels, num_points),
            im2col_step (Tensor): The step used in image to column.

        Returns:
            torch.Tensor: has shape (bs, num_queries, embed_dims)
        """

        ctx.im2col_step = im2col_step
        output = ext_module.ms_deform_attn_forward(
            value,
            value_spatial_shapes,
            value_level_start_index,
            sampling_locations,
            attention_weights,
            im2col_step=ctx.im2col_step)
        ctx.save_for_backward(value, value_spatial_shapes,
                              value_level_start_index, sampling_locations,
                              attention_weights)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        """GPU version of backward function.

        Args:
            grad_output (torch.Tensor): Gradient of output tensor of forward.

        Returns:
            tuple[Tensor]: Gradient of input tensors in forward.
        """
        value, value_spatial_shapes, value_level_start_index,\
            sampling_locations, attention_weights = ctx.saved_tensors
        grad_value = torch.zeros_like(value)
        grad_sampling_loc = torch.zeros_like(sampling_locations)
        grad_attn_weight = torch.zeros_like(attention_weights)

        ext_module.ms_deform_attn_backward(
            value,
            value_spatial_shapes,
            value_level_start_index,
            sampling_locations,
            attention_weights,
            grad_output.contiguous(),
            grad_value,
            grad_sampling_loc,
            grad_attn_weight,
            im2col_step=ctx.im2col_step)

        return grad_value, None, None, \
            grad_sampling_loc, grad_attn_weight, None


def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,
                                        sampling_locations, attention_weights):
    """CPU version of multi-scale deformable attention.

    Args:
        value (torch.Tensor): The value has shape
            (bs, num_keys, mum_heads, embed_dims//num_heads)
        value_spatial_shapes (torch.Tensor): Spatial shape of
            each feature map, has shape (num_levels, 2),
            last dimension 2 represent (h, w)
        sampling_locations (torch.Tensor): The location of sampling points,
            has shape
            (bs ,num_queries, num_heads, num_levels, num_points, 2),
            the last dimension 2 represent (x, y).
        attention_weights (torch.Tensor): The weight of sampling points used
            when calculate the attention, has shape
            (bs ,num_queries, num_heads, num_levels, num_points),

    Returns:
        torch.Tensor: has shape (bs, num_queries, embed_dims)
    """

    bs, _, num_heads, embed_dims = value.shape
    _, num_queries, num_heads, num_levels, num_points, _ =\
        sampling_locations.shape
    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes],
                             dim=1)
    sampling_grids = 2 * sampling_locations - 1
    sampling_value_list = []
    for level, (H_, W_) in enumerate(value_spatial_shapes):
        # bs, H_*W_, num_heads, embed_dims ->
        # bs, H_*W_, num_heads*embed_dims ->
        # bs, num_heads*embed_dims, H_*W_ ->
        # bs*num_heads, embed_dims, H_, W_
        value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(
            bs * num_heads, embed_dims, H_, W_)
        # bs, num_queries, num_heads, num_points, 2 ->
        # bs, num_heads, num_queries, num_points, 2 ->
        # bs*num_heads, num_queries, num_points, 2
        sampling_grid_l_ = sampling_grids[:, :, :,
                                          level].transpose(1, 2).flatten(0, 1)
        # bs*num_heads, embed_dims, num_queries, num_points
        sampling_value_l_ = F.grid_sample(
            value_l_,
            sampling_grid_l_,
            mode='bilinear',
            padding_mode='zeros',
            align_corners=False)
        sampling_value_list.append(sampling_value_l_)
    # (bs, num_queries, num_heads, num_levels, num_points) ->
    # (bs, num_heads, num_queries, num_levels, num_points) ->
    # (bs, num_heads, 1, num_queries, num_levels*num_points)
    attention_weights = attention_weights.transpose(1, 2).reshape(
        bs * num_heads, 1, num_queries, num_levels * num_points)
    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) *
              attention_weights).sum(-1).view(bs, num_heads * embed_dims,
                                              num_queries)
    return output.transpose(1, 2).contiguous()


@ATTENTION.register_module()
class MultiScaleDeformableAttention(BaseModule):
    """An attention module used in Deformable-Detr.

    `Deformable DETR: Deformable Transformers for End-to-End Object Detection.
    <https://arxiv.org/pdf/2010.04159.pdf>`_.

    Args:
        embed_dims (int): The embedding dimension of Attention.
            Default: 256.
        num_heads (int): Parallel attention heads. Default: 64.
        num_levels (int): The number of feature map used in
            Attention. Default: 4.
        num_points (int): The number of sampling points for
            each query in each head. Default: 4.
        im2col_step (int): The step used in image_to_column.
            Default: 64.
        dropout (float): A Dropout layer on `inp_identity`.
            Default: 0.1.
        batch_first (bool): Key, Query and Value are shape of
            (batch, n, embed_dim)
            or (n, batch, embed_dim). Default to False.
        norm_cfg (dict): Config dict for normalization layer.
            Default: None.
        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
            Default: None.
    """

    def __init__(self,
                 embed_dims=256,
                 num_heads=8,
                 num_levels=4,
                 num_points=4,
                 im2col_step=64,
                 dropout=0.1,
                 batch_first=False,
                 norm_cfg=None,
                 init_cfg=None):
        super().__init__(init_cfg)
        if embed_dims % num_heads != 0:
            raise ValueError(f'embed_dims must be divisible by num_heads, '
                             f'but got {embed_dims} and {num_heads}')
        dim_per_head = embed_dims // num_heads
        self.norm_cfg = norm_cfg
        self.dropout = nn.Dropout(dropout)
        self.batch_first = batch_first

        # you'd better set dim_per_head to a power of 2
        # which is more efficient in the CUDA implementation
        def _is_power_of_2(n):
            if (not isinstance(n, int)) or (n < 0):
                raise ValueError(
                    'invalid input for _is_power_of_2: {} (type: {})'.format(
                        n, type(n)))
            return (n & (n - 1) == 0) and n != 0

        if not _is_power_of_2(dim_per_head):
            warnings.warn(
                "You'd better set embed_dims in "
                'MultiScaleDeformAttention to make '
                'the dimension of each attention head a power of 2 '
                'which is more efficient in our CUDA implementation.')

        self.im2col_step = im2col_step
        self.embed_dims = embed_dims
        self.num_levels = num_levels
        self.num_heads = num_heads
        self.num_points = num_points
        self.sampling_offsets = nn.Linear(
            embed_dims, num_heads * num_levels * num_points * 2)
        self.attention_weights = nn.Linear(embed_dims,
                                           num_heads * num_levels * num_points)
        self.value_proj = nn.Linear(embed_dims, embed_dims)
        self.output_proj = nn.Linear(embed_dims, embed_dims)
        self.init_weights()

    def init_weights(self):
        """Default initialization for Parameters of Module."""
        constant_init(self.sampling_offsets, 0.)
        thetas = torch.arange(
            self.num_heads,
            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
        grid_init = (grid_init /
                     grid_init.abs().max(-1, keepdim=True)[0]).view(
                         self.num_heads, 1, 1,
                         2).repeat(1, self.num_levels, self.num_points, 1)
        for i in range(self.num_points):
            grid_init[:, :, i, :] *= i + 1

        self.sampling_offsets.bias.data = grid_init.view(-1)
        constant_init(self.attention_weights, val=0., bias=0.)
        xavier_init(self.value_proj, distribution='uniform', bias=0.)
        xavier_init(self.output_proj, distribution='uniform', bias=0.)
        self._is_init = True

    @deprecated_api_warning({'residual': 'identity'},
                            cls_name='MultiScaleDeformableAttention')
    def forward(self,
                query,
                key=None,
                value=None,
                identity=None,
                query_pos=None,
                key_padding_mask=None,
                reference_points=None,
                spatial_shapes=None,
                level_start_index=None,
                **kwargs):
        """Forward Function of MultiScaleDeformAttention.

        Args:
            query (torch.Tensor): Query of Transformer with shape
                (num_query, bs, embed_dims).
            key (torch.Tensor): The key tensor with shape
                `(num_key, bs, embed_dims)`.
            value (torch.Tensor): The value tensor with shape
                `(num_key, bs, embed_dims)`.
            identity (torch.Tensor): The tensor used for addition, with the
                same shape as `query`. Default None. If None,
                `query` will be used.
            query_pos (torch.Tensor): The positional encoding for `query`.
                Default: None.
            key_pos (torch.Tensor): The positional encoding for `key`. Default
                None.
            reference_points (torch.Tensor):  The normalized reference
                points with shape (bs, num_query, num_levels, 2),
                all elements is range in [0, 1], top-left (0,0),
                bottom-right (1, 1), including padding area.
                or (N, Length_{query}, num_levels, 4), add
                additional two dimensions is (w, h) to
                form reference boxes.
            key_padding_mask (torch.Tensor): ByteTensor for `query`, with
                shape [bs, num_key].
            spatial_shapes (torch.Tensor): Spatial shape of features in
                different levels. With shape (num_levels, 2),
                last dimension represents (h, w).
            level_start_index (torch.Tensor): The start index of each level.
                A tensor has shape ``(num_levels, )`` and can be represented
                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].

        Returns:
            torch.Tensor: forwarded results with shape
            [num_query, bs, embed_dims].
        """

        if value is None:
            value = query

        if identity is None:
            identity = query
        if query_pos is not None:
            query = query + query_pos
        if not self.batch_first:
            # change to (bs, num_query ,embed_dims)
            query = query.permute(1, 0, 2)
            value = value.permute(1, 0, 2)

        bs, num_query, _ = query.shape
        bs, num_value, _ = value.shape
        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value

        value = self.value_proj(value)
        if key_padding_mask is not None:
            value = value.masked_fill(key_padding_mask[..., None], 0.0)
        value = value.view(bs, num_value, self.num_heads, -1)
        sampling_offsets = self.sampling_offsets(query).view(
            bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)
        attention_weights = self.attention_weights(query).view(
            bs, num_query, self.num_heads, self.num_levels * self.num_points)
        attention_weights = attention_weights.softmax(-1)

        attention_weights = attention_weights.view(bs, num_query,
                                                   self.num_heads,
                                                   self.num_levels,
                                                   self.num_points)
        if reference_points.shape[-1] == 2:
            offset_normalizer = torch.stack(
                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
            sampling_locations = reference_points[:, :, None, :, None, :] \
                + sampling_offsets \
                / offset_normalizer[None, None, None, :, None, :]
        elif reference_points.shape[-1] == 4:
            sampling_locations = reference_points[:, :, None, :, None, :2] \
                + sampling_offsets / self.num_points \
                * reference_points[:, :, None, :, None, 2:] \
                * 0.5
        else:
            raise ValueError(
                f'Last dim of reference_points must be'
                f' 2 or 4, but get {reference_points.shape[-1]} instead.')
        if torch.cuda.is_available() and value.is_cuda:
            output = MultiScaleDeformableAttnFunction.apply(
                value, spatial_shapes, level_start_index, sampling_locations,
                attention_weights, self.im2col_step)
        else:
            output = multi_scale_deformable_attn_pytorch(
                value, spatial_shapes, sampling_locations, attention_weights)

        output = self.output_proj(output)

        if not self.batch_first:
            # (num_query, bs ,embed_dims)
            output = output.permute(1, 0, 2)

        return self.dropout(output) + identity


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/nms.py
================================================
import os

import numpy as np
import torch

from mmcv.utils import deprecated_api_warning
from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated'])


# This function is modified from: https://github.com/pytorch/vision/
class NMSop(torch.autograd.Function):

    @staticmethod
    def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold,
                max_num):
        is_filtering_by_score = score_threshold > 0
        if is_filtering_by_score:
            valid_mask = scores > score_threshold
            bboxes, scores = bboxes[valid_mask], scores[valid_mask]
            valid_inds = torch.nonzero(
                valid_mask, as_tuple=False).squeeze(dim=1)

        inds = ext_module.nms(
            bboxes, scores, iou_threshold=float(iou_threshold), offset=offset)

        if max_num > 0:
            inds = inds[:max_num]
        if is_filtering_by_score:
            inds = valid_inds[inds]
        return inds

    @staticmethod
    def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold,
                 max_num):
        from ..onnx import is_custom_op_loaded
        has_custom_op = is_custom_op_loaded()
        # TensorRT nms plugin is aligned with original nms in ONNXRuntime
        is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT'
        if has_custom_op and (not is_trt_backend):
            return g.op(
                'mmcv::NonMaxSuppression',
                bboxes,
                scores,
                iou_threshold_f=float(iou_threshold),
                offset_i=int(offset))
        else:
            from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
            from ..onnx.onnx_utils.symbolic_helper import _size_helper

            boxes = unsqueeze(g, bboxes, 0)
            scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)

            if max_num > 0:
                max_num = g.op(
                    'Constant',
                    value_t=torch.tensor(max_num, dtype=torch.long))
            else:
                dim = g.op('Constant', value_t=torch.tensor(0))
                max_num = _size_helper(g, bboxes, dim)
            max_output_per_class = max_num
            iou_threshold = g.op(
                'Constant',
                value_t=torch.tensor([iou_threshold], dtype=torch.float))
            score_threshold = g.op(
                'Constant',
                value_t=torch.tensor([score_threshold], dtype=torch.float))
            nms_out = g.op('NonMaxSuppression', boxes, scores,
                           max_output_per_class, iou_threshold,
                           score_threshold)
            return squeeze(
                g,
                select(
                    g, nms_out, 1,
                    g.op(
                        'Constant',
                        value_t=torch.tensor([2], dtype=torch.long))), 1)


class SoftNMSop(torch.autograd.Function):

    @staticmethod
    def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method,
                offset):
        dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
        inds = ext_module.softnms(
            boxes.cpu(),
            scores.cpu(),
            dets.cpu(),
            iou_threshold=float(iou_threshold),
            sigma=float(sigma),
            min_score=float(min_score),
            method=int(method),
            offset=int(offset))
        return dets, inds

    @staticmethod
    def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method,
                 offset):
        from packaging import version
        assert version.parse(torch.__version__) >= version.parse('1.7.0')
        nms_out = g.op(
            'mmcv::SoftNonMaxSuppression',
            boxes,
            scores,
            iou_threshold_f=float(iou_threshold),
            sigma_f=float(sigma),
            min_score_f=float(min_score),
            method_i=int(method),
            offset_i=int(offset),
            outputs=2)
        return nms_out


@deprecated_api_warning({'iou_thr': 'iou_threshold'})
def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
    """Dispatch to either CPU or GPU NMS implementations.

    The input can be either torch tensor or numpy array. GPU NMS will be used
    if the input is gpu tensor, otherwise CPU NMS
    will be used. The returned type will always be the same as inputs.

    Arguments:
        boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
        scores (torch.Tensor or np.ndarray): scores in shape (N, ).
        iou_threshold (float): IoU threshold for NMS.
        offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
        score_threshold (float): score threshold for NMS.
        max_num (int): maximum number of boxes after NMS.

    Returns:
        tuple: kept dets (boxes and scores) and indice, which always have
        the same data type as the input.

    Example:
        >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9],
        >>>                   [49.3, 32.9, 51.0, 35.3],
        >>>                   [49.2, 31.8, 51.0, 35.4],
        >>>                   [35.1, 11.5, 39.1, 15.7],
        >>>                   [35.6, 11.8, 39.3, 14.2],
        >>>                   [35.3, 11.5, 39.9, 14.5],
        >>>                   [35.2, 11.7, 39.7, 15.7]], dtype=np.float32)
        >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\
               dtype=np.float32)
        >>> iou_threshold = 0.6
        >>> dets, inds = nms(boxes, scores, iou_threshold)
        >>> assert len(inds) == len(dets) == 3
    """
    assert isinstance(boxes, (torch.Tensor, np.ndarray))
    assert isinstance(scores, (torch.Tensor, np.ndarray))
    is_numpy = False
    if isinstance(boxes, np.ndarray):
        is_numpy = True
        boxes = torch.from_numpy(boxes)
    if isinstance(scores, np.ndarray):
        scores = torch.from_numpy(scores)
    assert boxes.size(1) == 4
    assert boxes.size(0) == scores.size(0)
    assert offset in (0, 1)

    if torch.__version__ == 'parrots':
        indata_list = [boxes, scores]
        indata_dict = {
            'iou_threshold': float(iou_threshold),
            'offset': int(offset)
        }
        inds = ext_module.nms(*indata_list, **indata_dict)
    else:
        inds = NMSop.apply(boxes, scores, iou_threshold, offset,
                           score_threshold, max_num)
    dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)
    if is_numpy:
        dets = dets.cpu().numpy()
        inds = inds.cpu().numpy()
    return dets, inds


@deprecated_api_warning({'iou_thr': 'iou_threshold'})
def soft_nms(boxes,
             scores,
             iou_threshold=0.3,
             sigma=0.5,
             min_score=1e-3,
             method='linear',
             offset=0):
    """Dispatch to only CPU Soft NMS implementations.

    The input can be either a torch tensor or numpy array.
    The returned type will always be the same as inputs.

    Args:
        boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
        scores (torch.Tensor or np.ndarray): scores in shape (N, ).
        iou_threshold (float): IoU threshold for NMS.
        sigma (float): hyperparameter for gaussian method
        min_score (float): score filter threshold
        method (str): either 'linear' or 'gaussian'
        offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).

    Returns:
        tuple: kept dets (boxes and scores) and indice, which always have
        the same data type as the input.

    Example:
        >>> boxes = np.array([[4., 3., 5., 3.],
        >>>                   [4., 3., 5., 4.],
        >>>                   [3., 1., 3., 1.],
        >>>                   [3., 1., 3., 1.],
        >>>                   [3., 1., 3., 1.],
        >>>                   [3., 1., 3., 1.]], dtype=np.float32)
        >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32)
        >>> iou_threshold = 0.6
        >>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5)
        >>> assert len(inds) == len(dets) == 5
    """

    assert isinstance(boxes, (torch.Tensor, np.ndarray))
    assert isinstance(scores, (torch.Tensor, np.ndarray))
    is_numpy = False
    if isinstance(boxes, np.ndarray):
        is_numpy = True
        boxes = torch.from_numpy(boxes)
    if isinstance(scores, np.ndarray):
        scores = torch.from_numpy(scores)
    assert boxes.size(1) == 4
    assert boxes.size(0) == scores.size(0)
    assert offset in (0, 1)
    method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2}
    assert method in method_dict.keys()

    if torch.__version__ == 'parrots':
        dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
        indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()]
        indata_dict = {
            'iou_threshold': float(iou_threshold),
            'sigma': float(sigma),
            'min_score': min_score,
            'method': method_dict[method],
            'offset': int(offset)
        }
        inds = ext_module.softnms(*indata_list, **indata_dict)
    else:
        dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(),
                                     float(iou_threshold), float(sigma),
                                     float(min_score), method_dict[method],
                                     int(offset))

    dets = dets[:inds.size(0)]

    if is_numpy:
        dets = dets.cpu().numpy()
        inds = inds.cpu().numpy()
        return dets, inds
    else:
        return dets.to(device=boxes.device), inds.to(device=boxes.device)


def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
    r"""Performs non-maximum suppression in a batched fashion.

    Modified from `torchvision/ops/boxes.py#L39
    <https://github.com/pytorch/vision/blob/
    505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39>`_.
    In order to perform NMS independently per class, we add an offset to all
    the boxes. The offset is dependent only on the class idx, and is large
    enough so that boxes from different classes do not overlap.

    Note:
        In v1.4.1 and later, ``batched_nms`` supports skipping the NMS and
        returns sorted raw results when `nms_cfg` is None.

    Args:
        boxes (torch.Tensor): boxes in shape (N, 4).
        scores (torch.Tensor): scores in shape (N, ).
        idxs (torch.Tensor): each index value correspond to a bbox cluster,
            and NMS will not be applied between elements of different idxs,
            shape (N, ).
        nms_cfg (dict | None): Supports skipping the nms when `nms_cfg`
            is None, otherwise it should specify nms type and other
            parameters like `iou_thr`. Possible keys includes the following.

            - iou_thr (float): IoU threshold used for NMS.
            - split_thr (float): threshold number of boxes. In some cases the
              number of boxes is large (e.g., 200k). To avoid OOM during
              training, the users could set `split_thr` to a small value.
              If the number of boxes is greater than the threshold, it will
              perform NMS on each group of boxes separately and sequentially.
              Defaults to 10000.
        class_agnostic (bool): if true, nms is class agnostic,
            i.e. IoU thresholding happens over all boxes,
            regardless of the predicted class.

    Returns:
        tuple: kept dets and indice.

        - boxes (Tensor): Bboxes with score after nms, has shape
          (num_bboxes, 5). last dimension 5 arrange as
          (x1, y1, x2, y2, score)
        - keep (Tensor): The indices of remaining boxes in input
          boxes.
    """
    # skip nms when nms_cfg is None
    if nms_cfg is None:
        scores, inds = scores.sort(descending=True)
        boxes = boxes[inds]
        return torch.cat([boxes, scores[:, None]], -1), inds

    nms_cfg_ = nms_cfg.copy()
    class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic)
    if class_agnostic:
        boxes_for_nms = boxes
    else:
        max_coordinate = boxes.max()
        offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
        boxes_for_nms = boxes + offsets[:, None]

    nms_type = nms_cfg_.pop('type', 'nms')
    nms_op = eval(nms_type)

    split_thr = nms_cfg_.pop('split_thr', 10000)
    # Won't split to multiple nms nodes when exporting to onnx
    if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
        dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
        boxes = boxes[keep]
        # -1 indexing works abnormal in TensorRT
        # This assumes `dets` has 5 dimensions where
        # the last dimension is score.
        # TODO: more elegant way to handle the dimension issue.
        # Some type of nms would reweight the score, such as SoftNMS
        scores = dets[:, 4]
    else:
        max_num = nms_cfg_.pop('max_num', -1)
        total_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
        # Some type of nms would reweight the score, such as SoftNMS
        scores_after_nms = scores.new_zeros(scores.size())
        for id in torch.unique(idxs):
            mask = (idxs == id).nonzero(as_tuple=False).view(-1)
            dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_)
            total_mask[mask[keep]] = True
            scores_after_nms[mask[keep]] = dets[:, -1]
        keep = total_mask.nonzero(as_tuple=False).view(-1)

        scores, inds = scores_after_nms[keep].sort(descending=True)
        keep = keep[inds]
        boxes = boxes[keep]

        if max_num > 0:
            keep = keep[:max_num]
            boxes = boxes[:max_num]
            scores = scores[:max_num]

    boxes = torch.cat([boxes, scores[:, None]], -1)
    return boxes, keep


def nms_match(dets, iou_threshold):
    """Matched dets into different groups by NMS.

    NMS match is Similar to NMS but when a bbox is suppressed, nms match will
    record the indice of suppressed bbox and form a group with the indice of
    kept bbox. In each group, indice is sorted as score order.

    Args:
        dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5).
        iou_thr (float): IoU thresh for NMS.

    Returns:
        list[torch.Tensor | np.ndarray]: The outer list corresponds different
        matched group, the inner Tensor corresponds the indices for a group
        in score order.
    """
    if dets.shape[0] == 0:
        matched = []
    else:
        assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \
                                    f'but get {dets.shape}'
        if isinstance(dets, torch.Tensor):
            dets_t = dets.detach().cpu()
        else:
            dets_t = torch.from_numpy(dets)
        indata_list = [dets_t]
        indata_dict = {'iou_threshold': float(iou_threshold)}
        matched = ext_module.nms_match(*indata_list, **indata_dict)
        if torch.__version__ == 'parrots':
            matched = matched.tolist()

    if isinstance(dets, torch.Tensor):
        return [dets.new_tensor(m, dtype=torch.long) for m in matched]
    else:
        return [np.array(m, dtype=int) for m in matched]


def nms_rotated(dets, scores, iou_threshold, labels=None, clockwise=True):
    """Performs non-maximum suppression (NMS) on the rotated boxes according to
    their intersection-over-union (IoU).

    Rotated NMS iteratively removes lower scoring rotated boxes which have an
    IoU greater than iou_threshold with another (higher scoring) rotated box.

    Args:
        dets (Tensor):  Rotated boxes in shape (N, 5). They are expected to
            be in (x_ctr, y_ctr, width, height, angle_radian) format.
        scores (Tensor): scores in shape (N, ).
        iou_threshold (float): IoU thresh for NMS.
        labels (Tensor): boxes' label in shape (N,).
        clockwise (bool): flag indicating whether the positive angular
            orientation is clockwise. default True.
            `New in version 1.4.3.`

    Returns:
        tuple: kept dets(boxes and scores) and indice, which is always the
        same data type as the input.
    """
    if dets.shape[0] == 0:
        return dets, None
    if not clockwise:
        flip_mat = dets.new_ones(dets.shape[-1])
        flip_mat[-1] = -1
        dets_cw = dets * flip_mat
    else:
        dets_cw = dets
    multi_label = labels is not None
    if multi_label:
        dets_wl = torch.cat((dets_cw, labels.unsqueeze(1)), 1)
    else:
        dets_wl = dets_cw
    _, order = scores.sort(0, descending=True)
    dets_sorted = dets_wl.index_select(0, order)

    if torch.__version__ == 'parrots':
        keep_inds = ext_module.nms_rotated(
            dets_wl,
            scores,
            order,
            dets_sorted,
            iou_threshold=iou_threshold,
            multi_label=multi_label)
    else:
        keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted,
                                           iou_threshold, multi_label)
    dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)),
                     dim=1)
    return dets, keep_inds


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/pixel_group.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['pixel_group'])


def pixel_group(score, mask, embedding, kernel_label, kernel_contour,
                kernel_region_num, distance_threshold):
    """Group pixels into text instances, which is widely used text detection
    methods.

    Arguments:
        score (np.array or torch.Tensor): The foreground score with size hxw.
        mask (np.array or Tensor): The foreground mask with size hxw.
        embedding (np.array or torch.Tensor): The embedding with size hxwxc to
            distinguish instances.
        kernel_label (np.array or torch.Tensor): The instance kernel index with
            size hxw.
        kernel_contour (np.array or torch.Tensor): The kernel contour with
            size hxw.
        kernel_region_num (int): The instance kernel region number.
        distance_threshold (float): The embedding distance threshold between
            kernel and pixel in one instance.

    Returns:
        list[list[float]]: The instance coordinates and attributes list. Each
        element consists of averaged confidence, pixel number, and coordinates
        (x_i, y_i for all pixels) in order.
    """
    assert isinstance(score, (torch.Tensor, np.ndarray))
    assert isinstance(mask, (torch.Tensor, np.ndarray))
    assert isinstance(embedding, (torch.Tensor, np.ndarray))
    assert isinstance(kernel_label, (torch.Tensor, np.ndarray))
    assert isinstance(kernel_contour, (torch.Tensor, np.ndarray))
    assert isinstance(kernel_region_num, int)
    assert isinstance(distance_threshold, float)

    if isinstance(score, np.ndarray):
        score = torch.from_numpy(score)
    if isinstance(mask, np.ndarray):
        mask = torch.from_numpy(mask)
    if isinstance(embedding, np.ndarray):
        embedding = torch.from_numpy(embedding)
    if isinstance(kernel_label, np.ndarray):
        kernel_label = torch.from_numpy(kernel_label)
    if isinstance(kernel_contour, np.ndarray):
        kernel_contour = torch.from_numpy(kernel_contour)

    if torch.__version__ == 'parrots':
        label = ext_module.pixel_group(
            score,
            mask,
            embedding,
            kernel_label,
            kernel_contour,
            kernel_region_num=kernel_region_num,
            distance_threshold=distance_threshold)
        label = label.tolist()
        label = label[0]
        list_index = kernel_region_num
        pixel_assignment = []
        for x in range(kernel_region_num):
            pixel_assignment.append(
                np.array(
                    label[list_index:list_index + int(label[x])],
                    dtype=np.float))
            list_index = list_index + int(label[x])
    else:
        pixel_assignment = ext_module.pixel_group(score, mask, embedding,
                                                  kernel_label, kernel_contour,
                                                  kernel_region_num,
                                                  distance_threshold)
    return pixel_assignment


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/point_sample.py
================================================
# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend  # noqa

from os import path as osp

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.utils import _pair
from torch.onnx.operators import shape_as_tensor


def bilinear_grid_sample(im, grid, align_corners=False):
    """Given an input and a flow-field grid, computes the output using input
    values and pixel locations from grid. Supported only bilinear interpolation
    method to sample the input pixels.

    Args:
        im (torch.Tensor): Input feature map, shape (N, C, H, W)
        grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2)
        align_corners {bool}: If set to True, the extrema (-1 and 1) are
            considered as referring to the center points of the input’s
            corner pixels. If set to False, they are instead considered as
            referring to the corner points of the input’s corner pixels,
            making the sampling more resolution agnostic.

    Returns:
        torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg)
    """
    n, c, h, w = im.shape
    gn, gh, gw, _ = grid.shape
    assert n == gn

    x = grid[:, :, :, 0]
    y = grid[:, :, :, 1]

    if align_corners:
        x = ((x + 1) / 2) * (w - 1)
        y = ((y + 1) / 2) * (h - 1)
    else:
        x = ((x + 1) * w - 1) / 2
        y = ((y + 1) * h - 1) / 2

    x = x.view(n, -1)
    y = y.view(n, -1)

    x0 = torch.floor(x).long()
    y0 = torch.floor(y).long()
    x1 = x0 + 1
    y1 = y0 + 1

    wa = ((x1 - x) * (y1 - y)).unsqueeze(1)
    wb = ((x1 - x) * (y - y0)).unsqueeze(1)
    wc = ((x - x0) * (y1 - y)).unsqueeze(1)
    wd = ((x - x0) * (y - y0)).unsqueeze(1)

    # Apply default for grid_sample function zero padding
    im_padded = F.pad(im, pad=[1, 1, 1, 1], mode='constant', value=0)
    padded_h = h + 2
    padded_w = w + 2
    # save points positions after padding
    x0, x1, y0, y1 = x0 + 1, x1 + 1, y0 + 1, y1 + 1

    # Clip coordinates to padded image size
    x0 = torch.where(x0 < 0, torch.tensor(0), x0)
    x0 = torch.where(x0 > padded_w - 1, torch.tensor(padded_w - 1), x0)
    x1 = torch.where(x1 < 0, torch.tensor(0), x1)
    x1 = torch.where(x1 > padded_w - 1, torch.tensor(padded_w - 1), x1)
    y0 = torch.where(y0 < 0, torch.tensor(0), y0)
    y0 = torch.where(y0 > padded_h - 1, torch.tensor(padded_h - 1), y0)
    y1 = torch.where(y1 < 0, torch.tensor(0), y1)
    y1 = torch.where(y1 > padded_h - 1, torch.tensor(padded_h - 1), y1)

    im_padded = im_padded.view(n, c, -1)

    x0_y0 = (x0 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1)
    x0_y1 = (x0 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1)
    x1_y0 = (x1 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1)
    x1_y1 = (x1 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1)

    Ia = torch.gather(im_padded, 2, x0_y0)
    Ib = torch.gather(im_padded, 2, x0_y1)
    Ic = torch.gather(im_padded, 2, x1_y0)
    Id = torch.gather(im_padded, 2, x1_y1)

    return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw)


def is_in_onnx_export_without_custom_ops():
    from mmcv.ops import get_onnxruntime_op_path
    ort_custom_op_path = get_onnxruntime_op_path()
    return torch.onnx.is_in_onnx_export(
    ) and not osp.exists(ort_custom_op_path)


def normalize(grid):
    """Normalize input grid from [-1, 1] to [0, 1]

    Args:
        grid (torch.Tensor): The grid to be normalize, range [-1, 1].

    Returns:
        torch.Tensor: Normalized grid, range [0, 1].
    """

    return (grid + 1.0) / 2.0


def denormalize(grid):
    """Denormalize input grid from range [0, 1] to [-1, 1]

    Args:
        grid (torch.Tensor): The grid to be denormalize, range [0, 1].

    Returns:
        torch.Tensor: Denormalized grid, range [-1, 1].
    """

    return grid * 2.0 - 1.0


def generate_grid(num_grid, size, device):
    """Generate regular square grid of points in [0, 1] x [0, 1] coordinate
    space.

    Args:
        num_grid (int): The number of grids to sample, one for each region.
        size (tuple[int, int]): The side size of the regular grid.
        device (torch.device): Desired device of returned tensor.

    Returns:
        torch.Tensor: A tensor of shape (num_grid, size[0]*size[1], 2) that
        contains coordinates for the regular grids.
    """

    affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device)
    grid = F.affine_grid(
        affine_trans, torch.Size((1, 1, *size)), align_corners=False)
    grid = normalize(grid)
    return grid.view(1, -1, 2).expand(num_grid, -1, -1)


def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
    """Convert roi based relative point coordinates to image based absolute
    point coordinates.

    Args:
        rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
        rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative
            to RoI, location, range (0, 1), shape (N, P, 2)
    Returns:
        torch.Tensor: Image based absolute point coordinates, shape (N, P, 2)
    """

    with torch.no_grad():
        assert rel_roi_points.size(0) == rois.size(0)
        assert rois.dim() == 2
        assert rel_roi_points.dim() == 3
        assert rel_roi_points.size(2) == 2
        # remove batch idx
        if rois.size(1) == 5:
            rois = rois[:, 1:]
        abs_img_points = rel_roi_points.clone()
        # To avoid an error during exporting to onnx use independent
        # variables instead inplace computation
        xs = abs_img_points[:, :, 0] * (rois[:, None, 2] - rois[:, None, 0])
        ys = abs_img_points[:, :, 1] * (rois[:, None, 3] - rois[:, None, 1])
        xs += rois[:, None, 0]
        ys += rois[:, None, 1]
        abs_img_points = torch.stack([xs, ys], dim=2)
    return abs_img_points


def get_shape_from_feature_map(x):
    """Get spatial resolution of input feature map considering exporting to
    onnx mode.

    Args:
        x (torch.Tensor): Input tensor, shape (N, C, H, W)

    Returns:
        torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2)
    """
    if torch.onnx.is_in_onnx_export():
        img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to(
            x.device).float()
    else:
        img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to(
            x.device).float()
    return img_shape


def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.):
    """Convert image based absolute point coordinates to image based relative
    coordinates for sampling.

    Args:
        abs_img_points (torch.Tensor): Image based absolute point coordinates,
            shape (N, P, 2)
        img (tuple or torch.Tensor): (height, width) of image or feature map.
        spatial_scale (float, optional): Scale points by this factor.
            Default: 1.

    Returns:
        Tensor: Image based relative point coordinates for sampling, shape
        (N, P, 2).
    """

    assert (isinstance(img, tuple) and len(img) == 2) or \
           (isinstance(img, torch.Tensor) and len(img.shape) == 4)

    if isinstance(img, tuple):
        h, w = img
        scale = torch.tensor([w, h],
                             dtype=torch.float,
                             device=abs_img_points.device)
        scale = scale.view(1, 1, 2)
    else:
        scale = get_shape_from_feature_map(img)

    return abs_img_points / scale * spatial_scale


def rel_roi_point_to_rel_img_point(rois,
                                   rel_roi_points,
                                   img,
                                   spatial_scale=1.):
    """Convert roi based relative point coordinates to image based absolute
    point coordinates.

    Args:
        rois (torch.Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
        rel_roi_points (torch.Tensor): Point coordinates inside RoI, relative
            to RoI, location, range (0, 1), shape (N, P, 2)
        img (tuple or torch.Tensor): (height, width) of image or feature map.
        spatial_scale (float, optional): Scale points by this factor.
            Default: 1.

    Returns:
        torch.Tensor: Image based relative point coordinates for sampling,
        shape (N, P, 2).
    """

    abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points)
    rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img,
                                                   spatial_scale)

    return rel_img_point


def point_sample(input, points, align_corners=False, **kwargs):
    """A wrapper around :func:`grid_sample` to support 3D point_coords tensors
    Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to
    lie inside ``[0, 1] x [0, 1]`` square.

    Args:
        input (torch.Tensor): Feature map, shape (N, C, H, W).
        points (torch.Tensor): Image based absolute point coordinates
            (normalized), range [0, 1] x [0, 1], shape (N, P, 2) or
            (N, Hgrid, Wgrid, 2).
        align_corners (bool, optional): Whether align_corners.
            Default: False

    Returns:
        torch.Tensor: Features of `point` on `input`, shape (N, C, P) or
        (N, C, Hgrid, Wgrid).
    """

    add_dim = False
    if points.dim() == 3:
        add_dim = True
        points = points.unsqueeze(2)
    if is_in_onnx_export_without_custom_ops():
        # If custom ops for onnx runtime not compiled use python
        # implementation of grid_sample function to make onnx graph
        # with supported nodes
        output = bilinear_grid_sample(
            input, denormalize(points), align_corners=align_corners)
    else:
        output = F.grid_sample(
            input, denormalize(points), align_corners=align_corners, **kwargs)
    if add_dim:
        output = output.squeeze(3)
    return output


class SimpleRoIAlign(nn.Module):

    def __init__(self, output_size, spatial_scale, aligned=True):
        """Simple RoI align in PointRend, faster than standard RoIAlign.

        Args:
            output_size (tuple[int]): h, w
            spatial_scale (float): scale the input boxes by this number
            aligned (bool): if False, use the legacy implementation in
                MMDetection, align_corners=True will be used in F.grid_sample.
                If True, align the results more perfectly.
        """

        super(SimpleRoIAlign, self).__init__()
        self.output_size = _pair(output_size)
        self.spatial_scale = float(spatial_scale)
        # to be consistent with other RoI ops
        self.use_torchvision = False
        self.aligned = aligned

    def forward(self, features, rois):
        num_imgs = features.size(0)
        num_rois = rois.size(0)
        rel_roi_points = generate_grid(
            num_rois, self.output_size, device=rois.device)

        if torch.onnx.is_in_onnx_export():
            rel_img_points = rel_roi_point_to_rel_img_point(
                rois, rel_roi_points, features, self.spatial_scale)
            rel_img_points = rel_img_points.reshape(num_imgs, -1,
                                                    *rel_img_points.shape[1:])
            point_feats = point_sample(
                features, rel_img_points, align_corners=not self.aligned)
            point_feats = point_feats.transpose(1, 2)
        else:
            point_feats = []
            for batch_ind in range(num_imgs):
                # unravel batch dim
                feat = features[batch_ind].unsqueeze(0)
                inds = (rois[:, 0].long() == batch_ind)
                if inds.any():
                    rel_img_points = rel_roi_point_to_rel_img_point(
                        rois[inds], rel_roi_points[inds], feat,
                        self.spatial_scale).unsqueeze(0)
                    point_feat = point_sample(
                        feat, rel_img_points, align_corners=not self.aligned)
                    point_feat = point_feat.squeeze(0).transpose(0, 1)
                    point_feats.append(point_feat)

            point_feats = torch.cat(point_feats, dim=0)

        channels = features.size(1)
        roi_feats = point_feats.reshape(num_rois, channels, *self.output_size)

        return roi_feats

    def __repr__(self):
        format_str = self.__class__.__name__
        format_str += '(output_size={}, spatial_scale={}'.format(
            self.output_size, self.spatial_scale)
        return format_str


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/points_in_boxes.py
================================================
import torch

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', [
    'points_in_boxes_part_forward', 'points_in_boxes_cpu_forward',
    'points_in_boxes_all_forward'
])


def points_in_boxes_part(points, boxes):
    """Find the box in which each point is (CUDA).

    Args:
        points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate.
        boxes (torch.Tensor): [B, T, 7],
            num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in
            LiDAR/DEPTH coordinate, (x, y, z) is the bottom center.

    Returns:
        torch.Tensor: Return the box indices of points with the shape of
        (B, M). Default background = -1.
    """
    assert points.shape[0] == boxes.shape[0], \
        'Points and boxes should have the same batch size, ' \
        f'but got {points.shape[0]} and {boxes.shape[0]}'
    assert boxes.shape[2] == 7, \
        'boxes dimension should be 7, ' \
        f'but got unexpected shape {boxes.shape[2]}'
    assert points.shape[2] == 3, \
        'points dimension should be 3, ' \
        f'but got unexpected shape {points.shape[2]}'
    batch_size, num_points, _ = points.shape

    box_idxs_of_pts = points.new_zeros((batch_size, num_points),
                                       dtype=torch.int).fill_(-1)

    # If manually put the tensor 'points' or 'boxes' on a device
    # which is not the current device, some temporary variables
    # will be created on the current device in the cuda op,
    # and the output will be incorrect.
    # Therefore, we force the current device to be the same
    # as the device of the tensors if it was not.
    # Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305
    # for the incorrect output before the fix.
    points_device = points.get_device()
    assert points_device == boxes.get_device(), \
        'Points and boxes should be put on the same device'
    if torch.cuda.current_device() != points_device:
        torch.cuda.set_device(points_device)

    ext_module.points_in_boxes_part_forward(boxes.contiguous(),
                                            points.contiguous(),
                                            box_idxs_of_pts)

    return box_idxs_of_pts


def points_in_boxes_cpu(points, boxes):
    """Find all boxes in which each point is (CPU). The CPU version of
    :meth:`points_in_boxes_all`.

    Args:
        points (torch.Tensor): [B, M, 3], [x, y, z] in
            LiDAR/DEPTH coordinate
        boxes (torch.Tensor): [B, T, 7],
            num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
            (x, y, z) is the bottom center.

    Returns:
        torch.Tensor: Return the box indices of points with the shape of
        (B, M, T). Default background = 0.
    """
    assert points.shape[0] == boxes.shape[0], \
        'Points and boxes should have the same batch size, ' \
        f'but got {points.shape[0]} and {boxes.shape[0]}'
    assert boxes.shape[2] == 7, \
        'boxes dimension should be 7, ' \
        f'but got unexpected shape {boxes.shape[2]}'
    assert points.shape[2] == 3, \
        'points dimension should be 3, ' \
        f'but got unexpected shape {points.shape[2]}'
    batch_size, num_points, _ = points.shape
    num_boxes = boxes.shape[1]

    point_indices = points.new_zeros((batch_size, num_boxes, num_points),
                                     dtype=torch.int)
    for b in range(batch_size):
        ext_module.points_in_boxes_cpu_forward(boxes[b].float().contiguous(),
                                               points[b].float().contiguous(),
                                               point_indices[b])
    point_indices = point_indices.transpose(1, 2)

    return point_indices


def points_in_boxes_all(points, boxes):
    """Find all boxes in which each point is (CUDA).

    Args:
        points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate
        boxes (torch.Tensor): [B, T, 7],
            num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
            (x, y, z) is the bottom center.

    Returns:
        torch.Tensor: Return the box indices of points with the shape of
        (B, M, T). Default background = 0.
    """
    assert boxes.shape[0] == points.shape[0], \
        'Points and boxes should have the same batch size, ' \
        f'but got {boxes.shape[0]} and {boxes.shape[0]}'
    assert boxes.shape[2] == 7, \
        'boxes dimension should be 7, ' \
        f'but got unexpected shape {boxes.shape[2]}'
    assert points.shape[2] == 3, \
        'points dimension should be 3, ' \
        f'but got unexpected shape {points.shape[2]}'
    batch_size, num_points, _ = points.shape
    num_boxes = boxes.shape[1]

    box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes),
                                       dtype=torch.int).fill_(0)

    # Same reason as line 25-32
    points_device = points.get_device()
    assert points_device == boxes.get_device(), \
        'Points and boxes should be put on the same device'
    if torch.cuda.current_device() != points_device:
        torch.cuda.set_device(points_device)

    ext_module.points_in_boxes_all_forward(boxes.contiguous(),
                                           points.contiguous(),
                                           box_idxs_of_pts)

    return box_idxs_of_pts


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/points_in_polygons.py
================================================
import torch

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['points_in_polygons_forward'])


def points_in_polygons(points, polygons):
    """Judging whether points are inside polygons, which is used in the ATSS
    assignment for the rotated boxes.

    It should be noted that when the point is just at the polygon boundary, the
    judgment will be inaccurate, but the effect on assignment is limited.

    Args:
        points (torch.Tensor): It has shape (B, 2), indicating (x, y).
            M means the number of predicted points.
        polygons (torch.Tensor): It has shape (M, 8), indicating
            (x1, y1, x2, y2, x3, y3, x4, y4). M means the number of
            ground truth polygons.

    Returns:
        torch.Tensor: Return the result with the shape of (B, M),
        1 indicates that the point is inside the polygon,
        0 indicates that the point is outside the polygon.
    """
    assert points.shape[1] == 2, \
        'points dimension should be 2, ' \
        f'but got unexpected shape {points.shape[1]}'
    assert polygons.shape[1] == 8, \
        'polygons dimension should be 8, ' \
        f'but got unexpected shape {polygons.shape[1]}'
    output = torch.full([points.shape[0], polygons.shape[0]],
                        0.).cuda().float()
    ext_module.points_in_polygons_forward(points.contiguous(),
                                          polygons.contiguous(), output)
    return output


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/points_sampler.py
================================================
from typing import List

import torch
from torch import nn as nn

from mmcv.runner import force_fp32
from .furthest_point_sample import (furthest_point_sample,
                                    furthest_point_sample_with_dist)


def calc_square_dist(point_feat_a, point_feat_b, norm=True):
    """Calculating square distance between a and b.

    Args:
        point_feat_a (torch.Tensor): (B, N, C) Feature vector of each point.
        point_feat_b (torch.Tensor): (B, M, C) Feature vector of each point.
        norm (bool, optional): Whether to normalize the distance.
            Default: True.

    Returns:
        torch.Tensor: (B, N, M) Square distance between each point pair.
    """
    num_channel = point_feat_a.shape[-1]
    # [bs, n, 1]
    a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1)
    # [bs, 1, m]
    b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1)

    corr_matrix = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2))

    dist = a_square + b_square - 2 * corr_matrix
    if norm:
        dist = torch.sqrt(dist) / num_channel
    return dist


def get_sampler_cls(sampler_type):
    """Get the type and mode of points sampler.

    Args:
        sampler_type (str): The type of points sampler.
            The valid value are "D-FPS", "F-FPS", or "FS".

    Returns:
        class: Points sampler type.
    """
    sampler_mappings = {
        'D-FPS': DFPSSampler,
        'F-FPS': FFPSSampler,
        'FS': FSSampler,
    }
    try:
        return sampler_mappings[sampler_type]
    except KeyError:
        raise KeyError(
            f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \
                {sampler_type}')


class PointsSampler(nn.Module):
    """Points sampling.

    Args:
        num_point (list[int]): Number of sample points.
        fps_mod_list (list[str], optional): Type of FPS method, valid mod
            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].
            F-FPS: using feature distances for FPS.
            D-FPS: using Euclidean distances of points for FPS.
            FS: using F-FPS and D-FPS simultaneously.
        fps_sample_range_list (list[int], optional):
            Range of points to apply FPS. Default: [-1].
    """

    def __init__(self,
                 num_point: List[int],
                 fps_mod_list: List[str] = ['D-FPS'],
                 fps_sample_range_list: List[int] = [-1]):
        super().__init__()
        # FPS would be applied to different fps_mod in the list,
        # so the length of the num_point should be equal to
        # fps_mod_list and fps_sample_range_list.
        assert len(num_point) == len(fps_mod_list) == len(
            fps_sample_range_list)
        self.num_point = num_point
        self.fps_sample_range_list = fps_sample_range_list
        self.samplers = nn.ModuleList()
        for fps_mod in fps_mod_list:
            self.samplers.append(get_sampler_cls(fps_mod)())
        self.fp16_enabled = False

    @force_fp32()
    def forward(self, points_xyz, features):
        """
        Args:
            points_xyz (torch.Tensor): (B, N, 3) xyz coordinates of
                the points.
            features (torch.Tensor): (B, C, N) features of the points.

        Returns:
            torch.Tensor: (B, npoint, sample_num) Indices of sampled points.
        """
        indices = []
        last_fps_end_index = 0

        for fps_sample_range, sampler, npoint in zip(
                self.fps_sample_range_list, self.samplers, self.num_point):
            assert fps_sample_range < points_xyz.shape[1]

            if fps_sample_range == -1:
                sample_points_xyz = points_xyz[:, last_fps_end_index:]
                if features is not None:
                    sample_features = features[:, :, last_fps_end_index:]
                else:
                    sample_features = None
            else:
                sample_points_xyz = \
                    points_xyz[:, last_fps_end_index:fps_sample_range]
                if features is not None:
                    sample_features = features[:, :, last_fps_end_index:
                                               fps_sample_range]
                else:
                    sample_features = None

            fps_idx = sampler(sample_points_xyz.contiguous(), sample_features,
                              npoint)

            indices.append(fps_idx + last_fps_end_index)
            last_fps_end_index += fps_sample_range
        indices = torch.cat(indices, dim=1)

        return indices


class DFPSSampler(nn.Module):
    """Using Euclidean distances of points for FPS."""

    def __init__(self):
        super().__init__()

    def forward(self, points, features, npoint):
        """Sampling points with D-FPS."""
        fps_idx = furthest_point_sample(points.contiguous(), npoint)
        return fps_idx


class FFPSSampler(nn.Module):
    """Using feature distances for FPS."""

    def __init__(self):
        super().__init__()

    def forward(self, points, features, npoint):
        """Sampling points with F-FPS."""
        assert features is not None, \
            'feature input to FFPS_Sampler should not be None'
        features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2)
        features_dist = calc_square_dist(
            features_for_fps, features_for_fps, norm=False)
        fps_idx = furthest_point_sample_with_dist(features_dist, npoint)
        return fps_idx


class FSSampler(nn.Module):
    """Using F-FPS and D-FPS simultaneously."""

    def __init__(self):
        super().__init__()

    def forward(self, points, features, npoint):
        """Sampling points with FS_Sampling."""
        assert features is not None, \
            'feature input to FS_Sampler should not be None'
        ffps_sampler = FFPSSampler()
        dfps_sampler = DFPSSampler()
        fps_idx_ffps = ffps_sampler(points, features, npoint)
        fps_idx_dfps = dfps_sampler(points, features, npoint)
        fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1)
        return fps_idx


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/psa_mask.py
================================================
# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa
from torch import nn
from torch.autograd import Function
from torch.nn.modules.utils import _pair

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext',
                                 ['psamask_forward', 'psamask_backward'])


class PSAMaskFunction(Function):

    @staticmethod
    def symbolic(g, input, psa_type, mask_size):
        return g.op(
            'mmcv::MMCVPSAMask',
            input,
            psa_type_i=psa_type,
            mask_size_i=mask_size)

    @staticmethod
    def forward(ctx, input, psa_type, mask_size):
        ctx.psa_type = psa_type
        ctx.mask_size = _pair(mask_size)
        ctx.save_for_backward(input)

        h_mask, w_mask = ctx.mask_size
        batch_size, channels, h_feature, w_feature = input.size()
        assert channels == h_mask * w_mask
        output = input.new_zeros(
            (batch_size, h_feature * w_feature, h_feature, w_feature))

        ext_module.psamask_forward(
            input,
            output,
            psa_type=psa_type,
            num_=batch_size,
            h_feature=h_feature,
            w_feature=w_feature,
            h_mask=h_mask,
            w_mask=w_mask,
            half_h_mask=(h_mask - 1) // 2,
            half_w_mask=(w_mask - 1) // 2)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        input = ctx.saved_tensors[0]
        psa_type = ctx.psa_type
        h_mask, w_mask = ctx.mask_size
        batch_size, channels, h_feature, w_feature = input.size()
        grad_input = grad_output.new_zeros(
            (batch_size, channels, h_feature, w_feature))
        ext_module.psamask_backward(
            grad_output,
            grad_input,
            psa_type=psa_type,
            num_=batch_size,
            h_feature=h_feature,
            w_feature=w_feature,
            h_mask=h_mask,
            w_mask=w_mask,
            half_h_mask=(h_mask - 1) // 2,
            half_w_mask=(w_mask - 1) // 2)
        return grad_input, None, None, None


psa_mask = PSAMaskFunction.apply


class PSAMask(nn.Module):

    def __init__(self, psa_type, mask_size=None):
        super(PSAMask, self).__init__()
        assert psa_type in ['collect', 'distribute']
        if psa_type == 'collect':
            psa_type_enum = 0
        else:
            psa_type_enum = 1
        self.psa_type_enum = psa_type_enum
        self.mask_size = mask_size
        self.psa_type = psa_type

    def forward(self, input):
        return psa_mask(input, self.psa_type_enum, self.mask_size)

    def __repr__(self):
        s = self.__class__.__name__
        s += f'(psa_type={self.psa_type}, '
        s += f'mask_size={self.mask_size})'
        return s


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/readme.md
================================================
test


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/riroi_align_rotated.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
from torch.autograd import Function

from ..utils import ext_loader, is_tuple_of

ext_module = ext_loader.load_ext(
    '_ext', ['riroi_align_rotated_forward', 'riroi_align_rotated_backward'])


class RiRoIAlignRotatedFunction(Function):

    @staticmethod
    def forward(ctx,
                features,
                rois,
                out_size,
                spatial_scale,
                num_samples=0,
                num_orientations=8,
                clockwise=False):
        if isinstance(out_size, int):
            out_h = out_size
            out_w = out_size
        elif is_tuple_of(out_size, int):
            assert len(out_size) == 2
            out_h, out_w = out_size
        else:
            raise TypeError(
                f'"out_size" should be an integer or tuple of integers,'
                f' but got {out_size}')
        ctx.spatial_scale = spatial_scale
        ctx.num_samples = num_samples
        ctx.num_orientations = num_orientations
        ctx.clockwise = clockwise
        ctx.save_for_backward(rois)
        ctx.feature_size = features.size()

        batch_size, num_channels, _, _ = features.size()
        num_rois = rois.size(0)

        output = features.new_zeros(num_rois, num_channels, out_h, out_w)

        ext_module.riroi_align_rotated_forward(
            features,
            rois,
            output,
            pooled_height=out_h,
            pooled_width=out_w,
            spatial_scale=spatial_scale,
            num_samples=num_samples,
            num_orientations=num_orientations,
            clockwise=clockwise)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        feature_size = ctx.feature_size
        spatial_scale = ctx.spatial_scale
        num_orientations = ctx.num_orientations
        clockwise = ctx.clockwise
        num_samples = ctx.num_samples
        rois = ctx.saved_tensors[0]
        assert feature_size is not None
        batch_size, num_channels, feature_h, feature_w = feature_size

        out_w = grad_output.size(3)
        out_h = grad_output.size(2)

        grad_input = grad_rois = None

        if ctx.needs_input_grad[0]:
            grad_input = rois.new_zeros(batch_size, num_channels, feature_h,
                                        feature_w)
            ext_module.riroi_align_rotated_backward(
                grad_output.contiguous(),
                rois,
                grad_input,
                pooled_height=out_h,
                pooled_width=out_w,
                spatial_scale=spatial_scale,
                num_samples=num_samples,
                num_orientations=num_orientations,
                clockwise=clockwise)

            return grad_input, grad_rois, None, None, None, None, None


riroi_align_rotated = RiRoIAlignRotatedFunction.apply


class RiRoIAlignRotated(nn.Module):
    """Rotation-invariant RoI align pooling layer for rotated proposals.

    It accepts a feature map of shape (N, C, H, W) and rois with shape
    (n, 6) with each roi decoded as (batch_index, center_x, center_y,
    w, h, angle). The angle is in radian.

    The details are described in the paper `ReDet: A Rotation-equivariant
    Detector for Aerial Object Detection  <https://arxiv.org/abs/2103.07733>`_.

    Args:
        out_size (tuple): fixed dimensional RoI output with shape (h, w).
        spatial_scale (float): scale the input boxes by this number
        num_samples (int): number of inputs samples to take for each
            output sample. 0 to take samples densely for current models.
        num_orientations (int): number of oriented channels.
        clockwise (bool): If True, the angle in each proposal follows a
            clockwise fashion in image space, otherwise, the angle is
            counterclockwise. Default: False.
    """

    def __init__(self,
                 out_size,
                 spatial_scale,
                 num_samples=0,
                 num_orientations=8,
                 clockwise=False):
        super(RiRoIAlignRotated, self).__init__()

        self.out_size = out_size
        self.spatial_scale = float(spatial_scale)
        self.num_samples = int(num_samples)
        self.num_orientations = int(num_orientations)
        self.clockwise = clockwise

    def forward(self, features, rois):
        return RiRoIAlignRotatedFunction.apply(features, rois, self.out_size,
                                               self.spatial_scale,
                                               self.num_samples,
                                               self.num_orientations,
                                               self.clockwise)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/roi_align.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair

from ..utils import deprecated_api_warning, ext_loader

ext_module = ext_loader.load_ext('_ext',
                                 ['roi_align_forward', 'roi_align_backward'])


class RoIAlignFunction(Function):

    @staticmethod
    def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio,
                 pool_mode, aligned):
        from ..onnx import is_custom_op_loaded
        has_custom_op = is_custom_op_loaded()
        if has_custom_op:
            return g.op(
                'mmcv::MMCVRoiAlign',
                input,
                rois,
                output_height_i=output_size[0],
                output_width_i=output_size[1],
                spatial_scale_f=spatial_scale,
                sampling_ratio_i=sampling_ratio,
                mode_s=pool_mode,
                aligned_i=aligned)
        else:
            from torch.onnx.symbolic_opset9 import sub, squeeze
            from torch.onnx.symbolic_helper import _slice_helper
            from torch.onnx import TensorProtoDataType
            # batch_indices = rois[:, 0].long()
            batch_indices = _slice_helper(
                g, rois, axes=[1], starts=[0], ends=[1])
            batch_indices = squeeze(g, batch_indices, 1)
            batch_indices = g.op(
                'Cast', batch_indices, to_i=TensorProtoDataType.INT64)
            # rois = rois[:, 1:]
            rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5])
            if aligned:
                # rois -= 0.5/spatial_scale
                aligned_offset = g.op(
                    'Constant',
                    value_t=torch.tensor([0.5 / spatial_scale],
                                         dtype=torch.float32))
                rois = sub(g, rois, aligned_offset)
            # roi align
            return g.op(
                'RoiAlign',
                input,
                rois,
                batch_indices,
                output_height_i=output_size[0],
                output_width_i=output_size[1],
                spatial_scale_f=spatial_scale,
                sampling_ratio_i=max(0, sampling_ratio),
                mode_s=pool_mode)

    @staticmethod
    def forward(ctx,
                input,
                rois,
                output_size,
                spatial_scale=1.0,
                sampling_ratio=0,
                pool_mode='avg',
                aligned=True):
        ctx.output_size = _pair(output_size)
        ctx.spatial_scale = spatial_scale
        ctx.sampling_ratio = sampling_ratio
        assert pool_mode in ('max', 'avg')
        ctx.pool_mode = 0 if pool_mode == 'max' else 1
        ctx.aligned = aligned
        ctx.input_shape = input.size()

        assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'

        output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
                        ctx.output_size[1])
        output = input.new_zeros(output_shape)
        if ctx.pool_mode == 0:
            argmax_y = input.new_zeros(output_shape)
            argmax_x = input.new_zeros(output_shape)
        else:
            argmax_y = input.new_zeros(0)
            argmax_x = input.new_zeros(0)

        ext_module.roi_align_forward(
            input,
            rois,
            output,
            argmax_y,
            argmax_x,
            aligned_height=ctx.output_size[0],
            aligned_width=ctx.output_size[1],
            spatial_scale=ctx.spatial_scale,
            sampling_ratio=ctx.sampling_ratio,
            pool_mode=ctx.pool_mode,
            aligned=ctx.aligned)

        ctx.save_for_backward(rois, argmax_y, argmax_x)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        rois, argmax_y, argmax_x = ctx.saved_tensors
        grad_input = grad_output.new_zeros(ctx.input_shape)
        # complex head architecture may cause grad_output uncontiguous.
        grad_output = grad_output.contiguous()
        ext_module.roi_align_backward(
            grad_output,
            rois,
            argmax_y,
            argmax_x,
            grad_input,
            aligned_height=ctx.output_size[0],
            aligned_width=ctx.output_size[1],
            spatial_scale=ctx.spatial_scale,
            sampling_ratio=ctx.sampling_ratio,
            pool_mode=ctx.pool_mode,
            aligned=ctx.aligned)
        return grad_input, None, None, None, None, None, None


roi_align = RoIAlignFunction.apply


class RoIAlign(nn.Module):
    """RoI align pooling layer.

    Args:
        output_size (tuple): h, w
        spatial_scale (float): scale the input boxes by this number
        sampling_ratio (int): number of inputs samples to take for each
            output sample. 0 to take samples densely for current models.
        pool_mode (str, 'avg' or 'max'): pooling mode in each bin.
        aligned (bool): if False, use the legacy implementation in
            MMDetection. If True, align the results more perfectly.
        use_torchvision (bool): whether to use roi_align from torchvision.

    Note:
        The implementation of RoIAlign when aligned=True is modified from
        https://github.com/facebookresearch/detectron2/

        The meaning of aligned=True:

        Given a continuous coordinate c, its two neighboring pixel
        indices (in our pixel model) are computed by floor(c - 0.5) and
        ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete
        indices [0] and [1] (which are sampled from the underlying signal
        at continuous coordinates 0.5 and 1.5). But the original roi_align
        (aligned=False) does not subtract the 0.5 when computing
        neighboring pixel indices and therefore it uses pixels with a
        slightly incorrect alignment (relative to our pixel model) when
        performing bilinear interpolation.

        With `aligned=True`,
        we first appropriately scale the ROI and then shift it by -0.5
        prior to calling roi_align. This produces the correct neighbors;

        The difference does not make a difference to the model's
        performance if ROIAlign is used together with conv layers.
    """

    @deprecated_api_warning(
        {
            'out_size': 'output_size',
            'sample_num': 'sampling_ratio'
        },
        cls_name='RoIAlign')
    def __init__(self,
                 output_size,
                 spatial_scale=1.0,
                 sampling_ratio=0,
                 pool_mode='avg',
                 aligned=True,
                 use_torchvision=False):
        super(RoIAlign, self).__init__()

        self.output_size = _pair(output_size)
        self.spatial_scale = float(spatial_scale)
        self.sampling_ratio = int(sampling_ratio)
        self.pool_mode = pool_mode
        self.aligned = aligned
        self.use_torchvision = use_torchvision

    def forward(self, input, rois):
        """
        Args:
            input: NCHW images
            rois: Bx5 boxes. First column is the index into N.\
                The other 4 columns are xyxy.
        """
        if self.use_torchvision:
            from torchvision.ops import roi_align as tv_roi_align
            if 'aligned' in tv_roi_align.__code__.co_varnames:
                return tv_roi_align(input, rois, self.output_size,
                                    self.spatial_scale, self.sampling_ratio,
                                    self.aligned)
            else:
                if self.aligned:
                    rois -= rois.new_tensor([0.] +
                                            [0.5 / self.spatial_scale] * 4)
                return tv_roi_align(input, rois, self.output_size,
                                    self.spatial_scale, self.sampling_ratio)
        else:
            return roi_align(input, rois, self.output_size, self.spatial_scale,
                             self.sampling_ratio, self.pool_mode, self.aligned)

    def __repr__(self):
        s = self.__class__.__name__
        s += f'(output_size={self.output_size}, '
        s += f'spatial_scale={self.spatial_scale}, '
        s += f'sampling_ratio={self.sampling_ratio}, '
        s += f'pool_mode={self.pool_mode}, '
        s += f'aligned={self.aligned}, '
        s += f'use_torchvision={self.use_torchvision})'
        return s


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/roi_align_rotated.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward'])


class RoIAlignRotatedFunction(Function):

    @staticmethod
    def symbolic(g, features, rois, out_size, spatial_scale, sample_num,
                 aligned, clockwise):
        if isinstance(out_size, int):
            out_h = out_size
            out_w = out_size
        elif isinstance(out_size, tuple):
            assert len(out_size) == 2
            assert isinstance(out_size[0], int)
            assert isinstance(out_size[1], int)
            out_h, out_w = out_size
        else:
            raise TypeError(
                '"out_size" must be an integer or tuple of integers')
        return g.op(
            'mmcv::MMCVRoIAlignRotated',
            features,
            rois,
            output_height_i=out_h,
            output_width_i=out_h,
            spatial_scale_f=spatial_scale,
            sampling_ratio_i=sample_num,
            aligned_i=aligned,
            clockwise_i=clockwise)

    @staticmethod
    def forward(ctx,
                features,
                rois,
                out_size,
                spatial_scale,
                sample_num=0,
                aligned=True,
                clockwise=False):
        if isinstance(out_size, int):
            out_h = out_size
            out_w = out_size
        elif isinstance(out_size, tuple):
            assert len(out_size) == 2
            assert isinstance(out_size[0], int)
            assert isinstance(out_size[1], int)
            out_h, out_w = out_size
        else:
            raise TypeError(
                '"out_size" must be an integer or tuple of integers')
        ctx.spatial_scale = spatial_scale
        ctx.sample_num = sample_num
        ctx.aligned = aligned
        ctx.clockwise = clockwise
        ctx.save_for_backward(rois)
        ctx.feature_size = features.size()

        batch_size, num_channels, data_height, data_width = features.size()
        num_rois = rois.size(0)

        output = features.new_zeros(num_rois, num_channels, out_h, out_w)
        ext_module.roi_align_rotated_forward(
            features,
            rois,
            output,
            pooled_height=out_h,
            pooled_width=out_w,
            spatial_scale=spatial_scale,
            sample_num=sample_num,
            aligned=aligned,
            clockwise=clockwise)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        feature_size = ctx.feature_size
        spatial_scale = ctx.spatial_scale
        aligned = ctx.aligned
        clockwise = ctx.clockwise
        sample_num = ctx.sample_num
        rois = ctx.saved_tensors[0]
        assert feature_size is not None
        batch_size, num_channels, data_height, data_width = feature_size

        out_w = grad_output.size(3)
        out_h = grad_output.size(2)

        grad_input = grad_rois = None

        if ctx.needs_input_grad[0]:
            grad_input = rois.new_zeros(batch_size, num_channels, data_height,
                                        data_width)
            ext_module.roi_align_rotated_backward(
                grad_output.contiguous(),
                rois,
                grad_input,
                pooled_height=out_h,
                pooled_width=out_w,
                spatial_scale=spatial_scale,
                sample_num=sample_num,
                aligned=aligned,
                clockwise=clockwise)
        return grad_input, grad_rois, None, None, None, None, None


roi_align_rotated = RoIAlignRotatedFunction.apply


class RoIAlignRotated(nn.Module):
    """RoI align pooling layer for rotated proposals.

    It accepts a feature map of shape (N, C, H, W) and rois with shape
    (n, 6) with each roi decoded as (batch_index, center_x, center_y,
    w, h, angle). The angle is in radian.

    Args:
        out_size (tuple): h, w
        spatial_scale (float): scale the input boxes by this number
        sample_num (int): number of inputs samples to take for each
            output sample. 0 to take samples densely for current models.
        aligned (bool): if False, use the legacy implementation in
            MMDetection. If True, align the results more perfectly.
            Default: True.
        clockwise (bool): If True, the angle in each proposal follows a
            clockwise fashion in image space, otherwise, the angle is
            counterclockwise. Default: False.

    Note:
        The implementation of RoIAlign when aligned=True is modified from
        https://github.com/facebookresearch/detectron2/

        The meaning of aligned=True:

        Given a continuous coordinate c, its two neighboring pixel
        indices (in our pixel model) are computed by floor(c - 0.5) and
        ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete
        indices [0] and [1] (which are sampled from the underlying signal
        at continuous coordinates 0.5 and 1.5). But the original roi_align
        (aligned=False) does not subtract the 0.5 when computing
        neighboring pixel indices and therefore it uses pixels with a
        slightly incorrect alignment (relative to our pixel model) when
        performing bilinear interpolation.

        With `aligned=True`,
        we first appropriately scale the ROI and then shift it by -0.5
        prior to calling roi_align. This produces the correct neighbors;

        The difference does not make a difference to the model's
        performance if ROIAlign is used together with conv layers.
    """

    def __init__(self,
                 out_size,
                 spatial_scale,
                 sample_num=0,
                 aligned=True,
                 clockwise=False):
        super(RoIAlignRotated, self).__init__()

        self.out_size = out_size
        self.spatial_scale = float(spatial_scale)
        self.sample_num = int(sample_num)
        self.aligned = aligned
        self.clockwise = clockwise

    def forward(self, features, rois):
        return RoIAlignRotatedFunction.apply(features, rois, self.out_size,
                                             self.spatial_scale,
                                             self.sample_num, self.aligned,
                                             self.clockwise)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/roi_pool.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext',
                                 ['roi_pool_forward', 'roi_pool_backward'])


class RoIPoolFunction(Function):

    @staticmethod
    def symbolic(g, input, rois, output_size, spatial_scale):
        return g.op(
            'MaxRoiPool',
            input,
            rois,
            pooled_shape_i=output_size,
            spatial_scale_f=spatial_scale)

    @staticmethod
    def forward(ctx, input, rois, output_size, spatial_scale=1.0):
        ctx.output_size = _pair(output_size)
        ctx.spatial_scale = spatial_scale
        ctx.input_shape = input.size()

        assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'

        output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
                        ctx.output_size[1])
        output = input.new_zeros(output_shape)
        argmax = input.new_zeros(output_shape, dtype=torch.int)

        ext_module.roi_pool_forward(
            input,
            rois,
            output,
            argmax,
            pooled_height=ctx.output_size[0],
            pooled_width=ctx.output_size[1],
            spatial_scale=ctx.spatial_scale)

        ctx.save_for_backward(rois, argmax)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        rois, argmax = ctx.saved_tensors
        grad_input = grad_output.new_zeros(ctx.input_shape)

        ext_module.roi_pool_backward(
            grad_output,
            rois,
            argmax,
            grad_input,
            pooled_height=ctx.output_size[0],
            pooled_width=ctx.output_size[1],
            spatial_scale=ctx.spatial_scale)

        return grad_input, None, None, None


roi_pool = RoIPoolFunction.apply


class RoIPool(nn.Module):

    def __init__(self, output_size, spatial_scale=1.0):
        super(RoIPool, self).__init__()

        self.output_size = _pair(output_size)
        self.spatial_scale = float(spatial_scale)

    def forward(self, input, rois):
        return roi_pool(input, rois, self.output_size, self.spatial_scale)

    def __repr__(self):
        s = self.__class__.__name__
        s += f'(output_size={self.output_size}, '
        s += f'spatial_scale={self.spatial_scale})'
        return s


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/roiaware_pool3d.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch import nn as nn
from torch.autograd import Function

import mmcv
from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward'])


class RoIAwarePool3d(nn.Module):
    """Encode the geometry-specific features of each 3D proposal.

    Please refer to `PartA2 <https://arxiv.org/pdf/1907.03670.pdf>`_ for more
    details.

    Args:
        out_size (int or tuple): The size of output features. n or
            [n1, n2, n3].
        max_pts_per_voxel (int, optional): The maximum number of points per
            voxel. Default: 128.
        mode (str, optional): Pooling method of RoIAware, 'max' or 'avg'.
            Default: 'max'.
    """

    def __init__(self, out_size, max_pts_per_voxel=128, mode='max'):
        super().__init__()

        self.out_size = out_size
        self.max_pts_per_voxel = max_pts_per_voxel
        assert mode in ['max', 'avg']
        pool_mapping = {'max': 0, 'avg': 1}
        self.mode = pool_mapping[mode]

    def forward(self, rois, pts, pts_feature):
        """
        Args:
            rois (torch.Tensor): [N, 7], in LiDAR coordinate,
                (x, y, z) is the bottom center of rois.
            pts (torch.Tensor): [npoints, 3], coordinates of input points.
            pts_feature (torch.Tensor): [npoints, C], features of input points.

        Returns:
            torch.Tensor: Pooled features whose shape is
            [N, out_x, out_y, out_z, C].
        """

        return RoIAwarePool3dFunction.apply(rois, pts, pts_feature,
                                            self.out_size,
                                            self.max_pts_per_voxel, self.mode)


class RoIAwarePool3dFunction(Function):

    @staticmethod
    def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel,
                mode):
        """
        Args:
            rois (torch.Tensor): [N, 7], in LiDAR coordinate,
                (x, y, z) is the bottom center of rois.
            pts (torch.Tensor): [npoints, 3], coordinates of input points.
            pts_feature (torch.Tensor): [npoints, C], features of input points.
            out_size (int or tuple): The size of output features. n or
                [n1, n2, n3].
            max_pts_per_voxel (int): The maximum number of points per voxel.
                Default: 128.
            mode (int): Pooling method of RoIAware, 0 (max pool) or 1 (average
                pool).

        Returns:
            torch.Tensor: Pooled features whose shape is
            [N, out_x, out_y, out_z, C].
        """

        if isinstance(out_size, int):
            out_x = out_y = out_z = out_size
        else:
            assert len(out_size) == 3
            assert mmcv.is_tuple_of(out_size, int)
            out_x, out_y, out_z = out_size

        num_rois = rois.shape[0]
        num_channels = pts_feature.shape[-1]
        num_pts = pts.shape[0]

        pooled_features = pts_feature.new_zeros(
            (num_rois, out_x, out_y, out_z, num_channels))
        argmax = pts_feature.new_zeros(
            (num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int)
        pts_idx_of_voxels = pts_feature.new_zeros(
            (num_rois, out_x, out_y, out_z, max_pts_per_voxel),
            dtype=torch.int)

        ext_module.roiaware_pool3d_forward(
            rois,
            pts,
            pts_feature,
            argmax,
            pts_idx_of_voxels,
            pooled_features,
            pool_method=mode)

        ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode,
                                            num_pts, num_channels)
        return pooled_features

    @staticmethod
    def backward(ctx, grad_out):
        ret = ctx.roiaware_pool3d_for_backward
        pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret

        grad_in = grad_out.new_zeros((num_pts, num_channels))
        ext_module.roiaware_pool3d_backward(
            pts_idx_of_voxels,
            argmax,
            grad_out.contiguous(),
            grad_in,
            pool_method=mode)

        return None, None, grad_in, None, None, None


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/roipoint_pool3d.py
================================================
from torch import nn as nn
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['roipoint_pool3d_forward'])


class RoIPointPool3d(nn.Module):
    """Encode the geometry-specific features of each 3D proposal.

    Please refer to `Paper of PartA2 <https://arxiv.org/pdf/1907.03670.pdf>`_
    for more details.

    Args:
        num_sampled_points (int, optional): Number of samples in each roi.
            Default: 512.
    """

    def __init__(self, num_sampled_points=512):
        super().__init__()
        self.num_sampled_points = num_sampled_points

    def forward(self, points, point_features, boxes3d):
        """
        Args:
            points (torch.Tensor): Input points whose shape is (B, N, C).
            point_features (torch.Tensor): Features of input points whose shape
                is (B, N, C).
            boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7).

        Returns:
            tuple[torch.Tensor]: A tuple contains two elements. The first one
            is the pooled features whose shape is (B, M, 512, 3 + C). The
            second is an empty flag whose shape is (B, M).
        """
        return RoIPointPool3dFunction.apply(points, point_features, boxes3d,
                                            self.num_sampled_points)


class RoIPointPool3dFunction(Function):

    @staticmethod
    def forward(ctx, points, point_features, boxes3d, num_sampled_points=512):
        """
        Args:
            points (torch.Tensor): Input points whose shape is (B, N, C).
            point_features (torch.Tensor): Features of input points whose shape
                is (B, N, C).
            boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7).
            num_sampled_points (int, optional): The num of sampled points.
                Default: 512.

        Returns:
            tuple[torch.Tensor]: A tuple contains two elements. The first one
            is the pooled features whose shape is (B, M, 512, 3 + C). The
            second is an empty flag whose shape is (B, M).
        """
        assert len(points.shape) == 3 and points.shape[2] == 3
        batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[
            1], point_features.shape[2]
        pooled_boxes3d = boxes3d.view(batch_size, -1, 7)
        pooled_features = point_features.new_zeros(
            (batch_size, boxes_num, num_sampled_points, 3 + feature_len))
        pooled_empty_flag = point_features.new_zeros(
            (batch_size, boxes_num)).int()

        ext_module.roipoint_pool3d_forward(points.contiguous(),
                                           pooled_boxes3d.contiguous(),
                                           point_features.contiguous(),
                                           pooled_features, pooled_empty_flag)

        return pooled_features, pooled_empty_flag

    @staticmethod
    def backward(ctx, grad_out):
        raise NotImplementedError


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/rotated_feature_align.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch.autograd import Function
from torch.autograd.function import once_differentiable

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext',
    ['rotated_feature_align_forward', 'rotated_feature_align_backward'])


class RotatedFeatureAlignFunction(Function):
    """Using the feature interpolation to obtain the position information
    correspond to the refined rotate anchors and reconstruct the feature maps
    in pixel-wise manner to achieve feature alignment.

    The details are described in the paper
    `R3Det: Refined Single-Stage Detector with Feature Refinement for Rotating
    Object <https://arxiv.org/abs/1908.05612>`_.
    """

    @staticmethod
    def forward(ctx, features, best_rbboxes, spatial_scale, points):
        """
        Args:
            features (torch.Tensor): Input features with shape [N,C,H,W].
            best_rbboxes (torch.Tensor): Refined rotate anchors with
                shape [N,H,W,5]. Coordinate format (cx,cx,h,w,a).
            spatial_scale (float): The scale of feature map size and
                input image size.
            points (int, optional): The number of sample points.
                Only 1 and 5 are supported. Defaults to 1.

        Returns:
            torch.Tensor: Refined features with shape [N,C,H,W].
        """
        ctx.spatial_scale = spatial_scale
        ctx.points = points
        ctx.save_for_backward(best_rbboxes)
        assert points in [1, 5]
        output = torch.zeros_like(features)
        ext_module.rotated_feature_align_forward(
            features,
            best_rbboxes,
            output,
            spatial_scale=spatial_scale,
            points=points)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):
        """
        Args:
            grad_output (torch.Tensor): The gradiant of output features
                with shape [N,C,H,W].

        Returns:
            torch.Tensor: The gradiant of input features with shape [N,C,H,W].
        """
        best_rbboxes = ctx.saved_tensors[0]
        points = ctx.points
        spatial_scale = ctx.spatial_scale
        grad_input = None
        if ctx.needs_input_grad[0]:
            grad_input = torch.zeros_like(grad_output)
            ext_module.rotated_feature_align_backward(
                grad_output.contiguous(),
                best_rbboxes,
                grad_input,
                spatial_scale=spatial_scale,
                points=points)
        return grad_input, None, None, None


def rotated_feature_align(features,
                          best_rbboxes,
                          spatial_scale=1 / 8,
                          points=1):
    return RotatedFeatureAlignFunction.apply(features, best_rbboxes,
                                             spatial_scale, points)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/saconv.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F

from mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init
from mmcv.ops.deform_conv import deform_conv2d
from mmcv.utils import TORCH_VERSION, digit_version


@CONV_LAYERS.register_module(name='SAC')
class SAConv2d(ConvAWS2d):
    """SAC (Switchable Atrous Convolution)

    This is an implementation of `DetectoRS: Detecting Objects with Recursive
    Feature Pyramid and Switchable Atrous Convolution
    <https://arxiv.org/abs/2006.02334>`_.

    Args:
        in_channels (int): Number of channels in the input image
        out_channels (int): Number of channels produced by the convolution
        kernel_size (int or tuple): Size of the convolving kernel
        stride (int or tuple, optional): Stride of the convolution. Default: 1
        padding (int or tuple, optional): Zero-padding added to both sides of
            the input. Default: 0
        padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
            ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
        dilation (int or tuple, optional): Spacing between kernel elements.
            Default: 1
        groups (int, optional): Number of blocked connections from input
            channels to output channels. Default: 1
        bias (bool, optional): If ``True``, adds a learnable bias to the
            output. Default: ``True``
        use_deform: If ``True``, replace convolution with deformable
            convolution. Default: ``False``.
    """

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 use_deform=False):
        super().__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            bias=bias)
        self.use_deform = use_deform
        self.switch = nn.Conv2d(
            self.in_channels, 1, kernel_size=1, stride=stride, bias=True)
        self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size()))
        self.pre_context = nn.Conv2d(
            self.in_channels, self.in_channels, kernel_size=1, bias=True)
        self.post_context = nn.Conv2d(
            self.out_channels, self.out_channels, kernel_size=1, bias=True)
        if self.use_deform:
            self.offset_s = nn.Conv2d(
                self.in_channels,
                18,
                kernel_size=3,
                padding=1,
                stride=stride,
                bias=True)
            self.offset_l = nn.Conv2d(
                self.in_channels,
                18,
                kernel_size=3,
                padding=1,
                stride=stride,
                bias=True)
        self.init_weights()

    def init_weights(self):
        constant_init(self.switch, 0, bias=1)
        self.weight_diff.data.zero_()
        constant_init(self.pre_context, 0)
        constant_init(self.post_context, 0)
        if self.use_deform:
            constant_init(self.offset_s, 0)
            constant_init(self.offset_l, 0)

    def forward(self, x):
        # pre-context
        avg_x = F.adaptive_avg_pool2d(x, output_size=1)
        avg_x = self.pre_context(avg_x)
        avg_x = avg_x.expand_as(x)
        x = x + avg_x
        # switch
        avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect')
        avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0)
        switch = self.switch(avg_x)
        # sac
        weight = self._get_weight(self.weight)
        zero_bias = torch.zeros(
            self.out_channels, device=weight.device, dtype=weight.dtype)

        if self.use_deform:
            offset = self.offset_s(avg_x)
            out_s = deform_conv2d(x, offset, weight, self.stride, self.padding,
                                  self.dilation, self.groups, 1)
        else:
            if (TORCH_VERSION == 'parrots'
                    or digit_version(TORCH_VERSION) < digit_version('1.5.0')):
                out_s = super().conv2d_forward(x, weight)
            elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'):
                # bias is a required argument of _conv_forward in torch 1.8.0
                out_s = super()._conv_forward(x, weight, zero_bias)
            else:
                out_s = super()._conv_forward(x, weight)
        ori_p = self.padding
        ori_d = self.dilation
        self.padding = tuple(3 * p for p in self.padding)
        self.dilation = tuple(3 * d for d in self.dilation)
        weight = weight + self.weight_diff
        if self.use_deform:
            offset = self.offset_l(avg_x)
            out_l = deform_conv2d(x, offset, weight, self.stride, self.padding,
                                  self.dilation, self.groups, 1)
        else:
            if (TORCH_VERSION == 'parrots'
                    or digit_version(TORCH_VERSION) < digit_version('1.5.0')):
                out_l = super().conv2d_forward(x, weight)
            elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'):
                # bias is a required argument of _conv_forward in torch 1.8.0
                out_l = super()._conv_forward(x, weight, zero_bias)
            else:
                out_l = super()._conv_forward(x, weight)

        out = switch * out_s + (1 - switch) * out_l
        self.padding = ori_p
        self.dilation = ori_d
        # post-context
        avg_x = F.adaptive_avg_pool2d(out, output_size=1)
        avg_x = self.post_context(avg_x)
        avg_x = avg_x.expand_as(out)
        out = out + avg_x
        return out


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/scatter_points.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn.functional as F
from torch import nn
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext',
    ['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward'])


class _DynamicScatter(Function):

    @staticmethod
    def forward(ctx, feats, coors, reduce_type='max'):
        """convert kitti points(N, >=3) to voxels.

        Args:
            feats (torch.Tensor): [N, C]. Points features to be reduced
                into voxels.
            coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates
                (specifically multi-dim voxel index) of each points.
            reduce_type (str, optional): Reduce op. support 'max', 'sum' and
                'mean'. Default: 'max'.

        Returns:
            tuple[torch.Tensor]: A tuple contains two elements. The first one
            is the voxel features with shape [M, C] which are respectively
            reduced from input features that share the same voxel coordinates.
            The second is voxel coordinates with shape [M, ndim].
        """
        results = ext_module.dynamic_point_to_voxel_forward(
            feats, coors, reduce_type)
        (voxel_feats, voxel_coors, point2voxel_map,
         voxel_points_count) = results
        ctx.reduce_type = reduce_type
        ctx.save_for_backward(feats, voxel_feats, point2voxel_map,
                              voxel_points_count)
        ctx.mark_non_differentiable(voxel_coors)
        return voxel_feats, voxel_coors

    @staticmethod
    def backward(ctx, grad_voxel_feats, grad_voxel_coors=None):
        (feats, voxel_feats, point2voxel_map,
         voxel_points_count) = ctx.saved_tensors
        grad_feats = torch.zeros_like(feats)
        # TODO: whether to use index put or use cuda_backward
        # To use index put, need point to voxel index
        ext_module.dynamic_point_to_voxel_backward(
            grad_feats, grad_voxel_feats.contiguous(), feats, voxel_feats,
            point2voxel_map, voxel_points_count, ctx.reduce_type)
        return grad_feats, None, None


dynamic_scatter = _DynamicScatter.apply


class DynamicScatter(nn.Module):
    """Scatters points into voxels, used in the voxel encoder with dynamic
    voxelization.

    Note:
        The CPU and GPU implementation get the same output, but have numerical
        difference after summation and division (e.g., 5e-7).

    Args:
        voxel_size (list): list [x, y, z] size of three dimension.
        point_cloud_range (list): The coordinate range of points, [x_min,
            y_min, z_min, x_max, y_max, z_max].
        average_points (bool): whether to use avg pooling to scatter points
            into voxel.
    """

    def __init__(self, voxel_size, point_cloud_range, average_points: bool):
        super().__init__()

        self.voxel_size = voxel_size
        self.point_cloud_range = point_cloud_range
        self.average_points = average_points

    def forward_single(self, points, coors):
        """Scatters points into voxels.

        Args:
            points (torch.Tensor): Points to be reduced into voxels.
            coors (torch.Tensor): Corresponding voxel coordinates (specifically
                multi-dim voxel index) of each points.

        Returns:
            tuple[torch.Tensor]: A tuple contains two elements. The first one
            is the voxel features with shape [M, C] which are respectively
            reduced from input features that share the same voxel coordinates.
            The second is voxel coordinates with shape [M, ndim].
        """
        reduce = 'mean' if self.average_points else 'max'
        return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce)

    def forward(self, points, coors):
        """Scatters points/features into voxels.

        Args:
            points (torch.Tensor): Points to be reduced into voxels.
            coors (torch.Tensor): Corresponding voxel coordinates (specifically
                multi-dim voxel index) of each points.

        Returns:
            tuple[torch.Tensor]: A tuple contains two elements. The first one
            is the voxel features with shape [M, C] which are respectively
            reduced from input features that share the same voxel coordinates.
            The second is voxel coordinates with shape [M, ndim].
        """
        if coors.size(-1) == 3:
            return self.forward_single(points, coors)
        else:
            batch_size = coors[-1, 0] + 1
            voxels, voxel_coors = [], []
            for i in range(batch_size):
                inds = torch.where(coors[:, 0] == i)
                voxel, voxel_coor = self.forward_single(
                    points[inds], coors[inds][:, 1:])
                coor_pad = F.pad(voxel_coor, (1, 0), mode='constant', value=i)
                voxel_coors.append(coor_pad)
                voxels.append(voxel)
            features = torch.cat(voxels, dim=0)
            feature_coors = torch.cat(voxel_coors, dim=0)

            return features, feature_coors

    def __repr__(self):
        s = self.__class__.__name__ + '('
        s += 'voxel_size=' + str(self.voxel_size)
        s += ', point_cloud_range=' + str(self.point_cloud_range)
        s += ', average_points=' + str(self.average_points)
        s += ')'
        return s


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/sync_bn.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.distributed as dist
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter

from mmcv.cnn import NORM_LAYERS
from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', [
    'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output',
    'sync_bn_backward_param', 'sync_bn_backward_data'
])


class SyncBatchNormFunction(Function):

    @staticmethod
    def symbolic(g, input, running_mean, running_var, weight, bias, momentum,
                 eps, group, group_size, stats_mode):
        return g.op(
            'mmcv::MMCVSyncBatchNorm',
            input,
            running_mean,
            running_var,
            weight,
            bias,
            momentum_f=momentum,
            eps_f=eps,
            group_i=group,
            group_size_i=group_size,
            stats_mode=stats_mode)

    @staticmethod
    def forward(self, input, running_mean, running_var, weight, bias, momentum,
                eps, group, group_size, stats_mode):
        self.momentum = momentum
        self.eps = eps
        self.group = group
        self.group_size = group_size
        self.stats_mode = stats_mode

        assert isinstance(
                   input, (torch.HalfTensor, torch.FloatTensor,
                           torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \
               f'only support Half or Float Tensor, but {input.type()}'
        output = torch.zeros_like(input)
        input3d = input.flatten(start_dim=2)
        output3d = output.view_as(input3d)
        num_channels = input3d.size(1)

        # ensure mean/var/norm/std are initialized as zeros
        # ``torch.empty()`` does not guarantee that
        mean = torch.zeros(
            num_channels, dtype=torch.float, device=input3d.device)
        var = torch.zeros(
            num_channels, dtype=torch.float, device=input3d.device)
        norm = torch.zeros_like(
            input3d, dtype=torch.float, device=input3d.device)
        std = torch.zeros(
            num_channels, dtype=torch.float, device=input3d.device)

        batch_size = input3d.size(0)
        if batch_size > 0:
            ext_module.sync_bn_forward_mean(input3d, mean)
            batch_flag = torch.ones([1], device=mean.device, dtype=mean.dtype)
        else:
            # skip updating mean and leave it as zeros when the input is empty
            batch_flag = torch.zeros([1], device=mean.device, dtype=mean.dtype)

        # synchronize mean and the batch flag
        vec = torch.cat([mean, batch_flag])
        if self.stats_mode == 'N':
            vec *= batch_size
        if self.group_size > 1:
            dist.all_reduce(vec, group=self.group)
        total_batch = vec[-1].detach()
        mean = vec[:num_channels]

        if self.stats_mode == 'default':
            mean = mean / self.group_size
        elif self.stats_mode == 'N':
            mean = mean / total_batch.clamp(min=1)
        else:
            raise NotImplementedError

        # leave var as zeros when the input is empty
        if batch_size > 0:
            ext_module.sync_bn_forward_var(input3d, mean, var)

        if self.stats_mode == 'N':
            var *= batch_size
        if self.group_size > 1:
            dist.all_reduce(var, group=self.group)

        if self.stats_mode == 'default':
            var /= self.group_size
        elif self.stats_mode == 'N':
            var /= total_batch.clamp(min=1)
        else:
            raise NotImplementedError

        # if the total batch size over all the ranks is zero,
        # we should not update the statistics in the current batch
        update_flag = total_batch.clamp(max=1)
        momentum = update_flag * self.momentum
        ext_module.sync_bn_forward_output(
            input3d,
            mean,
            var,
            weight,
            bias,
            running_mean,
            running_var,
            norm,
            std,
            output3d,
            eps=self.eps,
            momentum=momentum,
            group_size=self.group_size)
        self.save_for_backward(norm, std, weight)
        return output

    @staticmethod
    @once_differentiable
    def backward(self, grad_output):
        norm, std, weight = self.saved_tensors
        grad_weight = torch.zeros_like(weight)
        grad_bias = torch.zeros_like(weight)
        grad_input = torch.zeros_like(grad_output)
        grad_output3d = grad_output.flatten(start_dim=2)
        grad_input3d = grad_input.view_as(grad_output3d)

        batch_size = grad_input3d.size(0)
        if batch_size > 0:
            ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight,
                                              grad_bias)

        # all reduce
        if self.group_size > 1:
            dist.all_reduce(grad_weight, group=self.group)
            dist.all_reduce(grad_bias, group=self.group)
            grad_weight /= self.group_size
            grad_bias /= self.group_size

        if batch_size > 0:
            ext_module.sync_bn_backward_data(grad_output3d, weight,
                                             grad_weight, grad_bias, norm, std,
                                             grad_input3d)

        return grad_input, None, None, grad_weight, grad_bias, \
            None, None, None, None, None


@NORM_LAYERS.register_module(name='MMSyncBN')
class SyncBatchNorm(Module):
    """Synchronized Batch Normalization.

    Args:
        num_features (int): number of features/chennels in input tensor
        eps (float, optional): a value added to the denominator for numerical
            stability. Defaults to 1e-5.
        momentum (float, optional): the value used for the running_mean and
            running_var computation. Defaults to 0.1.
        affine (bool, optional): whether to use learnable affine parameters.
            Defaults to True.
        track_running_stats (bool, optional): whether to track the running
            mean and variance during training. When set to False, this
            module does not track such statistics, and initializes statistics
            buffers ``running_mean`` and ``running_var`` as ``None``. When
            these buffers are ``None``, this module always uses batch
            statistics in both training and eval modes. Defaults to True.
        group (int, optional): synchronization of stats happen within
            each process group individually. By default it is synchronization
            across the whole world. Defaults to None.
        stats_mode (str, optional): The statistical mode. Available options
            includes ``'default'`` and ``'N'``. Defaults to 'default'.
            When ``stats_mode=='default'``, it computes the overall statistics
            using those from each worker with equal weight, i.e., the
            statistics are synchronized and simply divied by ``group``. This
            mode will produce inaccurate statistics when empty tensors occur.
            When ``stats_mode=='N'``, it compute the overall statistics using
            the total number of batches in each worker ignoring the number of
            group, i.e., the statistics are synchronized and then divied by
            the total batch ``N``. This mode is beneficial when empty tensors
            occur during training, as it average the total mean by the real
            number of batch.
    """

    def __init__(self,
                 num_features,
                 eps=1e-5,
                 momentum=0.1,
                 affine=True,
                 track_running_stats=True,
                 group=None,
                 stats_mode='default'):
        super(SyncBatchNorm, self).__init__()
        self.num_features = num_features
        self.eps = eps
        self.momentum = momentum
        self.affine = affine
        self.track_running_stats = track_running_stats
        group = dist.group.WORLD if group is None else group
        self.group = group
        self.group_size = dist.get_world_size(group)
        assert stats_mode in ['default', 'N'], \
            f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"'
        self.stats_mode = stats_mode
        if self.affine:
            self.weight = Parameter(torch.Tensor(num_features))
            self.bias = Parameter(torch.Tensor(num_features))
        else:
            self.register_parameter('weight', None)
            self.register_parameter('bias', None)
        if self.track_running_stats:
            self.register_buffer('running_mean', torch.zeros(num_features))
            self.register_buffer('running_var', torch.ones(num_features))
            self.register_buffer('num_batches_tracked',
                                 torch.tensor(0, dtype=torch.long))
        else:
            self.register_buffer('running_mean', None)
            self.register_buffer('running_var', None)
            self.register_buffer('num_batches_tracked', None)
        self.reset_parameters()

    def reset_running_stats(self):
        if self.track_running_stats:
            self.running_mean.zero_()
            self.running_var.fill_(1)
            self.num_batches_tracked.zero_()

    def reset_parameters(self):
        self.reset_running_stats()
        if self.affine:
            self.weight.data.uniform_()  # pytorch use ones_()
            self.bias.data.zero_()

    def forward(self, input):
        if input.dim() < 2:
            raise ValueError(
                f'expected at least 2D input, got {input.dim()}D input')
        if self.momentum is None:
            exponential_average_factor = 0.0
        else:
            exponential_average_factor = self.momentum

        if self.training and self.track_running_stats:
            if self.num_batches_tracked is not None:
                self.num_batches_tracked += 1
                if self.momentum is None:  # use cumulative moving average
                    exponential_average_factor = 1.0 / float(
                        self.num_batches_tracked)
                else:  # use exponential moving average
                    exponential_average_factor = self.momentum

        if self.training or not self.track_running_stats:
            return SyncBatchNormFunction.apply(
                input, self.running_mean, self.running_var, self.weight,
                self.bias, exponential_average_factor, self.eps, self.group,
                self.group_size, self.stats_mode)
        else:
            return F.batch_norm(input, self.running_mean, self.running_var,
                                self.weight, self.bias, False,
                                exponential_average_factor, self.eps)

    def __repr__(self):
        s = self.__class__.__name__
        s += f'({self.num_features}, '
        s += f'eps={self.eps}, '
        s += f'momentum={self.momentum}, '
        s += f'affine={self.affine}, '
        s += f'track_running_stats={self.track_running_stats}, '
        s += f'group_size={self.group_size},'
        s += f'stats_mode={self.stats_mode})'
        return s


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/three_interpolate.py
================================================
from typing import Tuple

import torch
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['three_interpolate_forward', 'three_interpolate_backward'])


class ThreeInterpolate(Function):
    """Performs weighted linear interpolation on 3 features.

    Please refer to `Paper of PointNet++ <https://arxiv.org/abs/1706.02413>`_
    for more details.
    """

    @staticmethod
    def forward(ctx, features: torch.Tensor, indices: torch.Tensor,
                weight: torch.Tensor) -> torch.Tensor:
        """
        Args:
            features (torch.Tensor): (B, C, M) Features descriptors to be
                interpolated.
            indices (torch.Tensor): (B, n, 3) indices of three nearest
                neighbor features for the target features.
            weight (torch.Tensor): (B, n, 3) weights of three nearest
                neighbor features for the target features.

        Returns:
            torch.Tensor: (B, C, N) tensor of the interpolated features
        """
        assert features.is_contiguous()
        assert indices.is_contiguous()
        assert weight.is_contiguous()

        B, c, m = features.size()
        n = indices.size(1)
        ctx.three_interpolate_for_backward = (indices, weight, m)
        output = torch.cuda.FloatTensor(B, c, n)

        ext_module.three_interpolate_forward(
            features, indices, weight, output, b=B, c=c, m=m, n=n)
        return output

    @staticmethod
    def backward(
        ctx, grad_out: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        Args:
            grad_out (torch.Tensor): (B, C, N) tensor with gradients of outputs

        Returns:
            torch.Tensor: (B, C, M) tensor with gradients of features
        """
        idx, weight, m = ctx.three_interpolate_for_backward
        B, c, n = grad_out.size()

        grad_features = torch.cuda.FloatTensor(B, c, m).zero_()
        grad_out_data = grad_out.data.contiguous()

        ext_module.three_interpolate_backward(
            grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m)
        return grad_features, None, None


three_interpolate = ThreeInterpolate.apply


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/three_nn.py
================================================
from typing import Tuple

import torch
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext', ['three_nn_forward'])


class ThreeNN(Function):
    """Find the top-3 nearest neighbors of the target set from the source set.

    Please refer to `Paper of PointNet++ <https://arxiv.org/abs/1706.02413>`_
    for more details.
    """

    @staticmethod
    def forward(ctx, target: torch.Tensor,
                source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Args:
            target (torch.Tensor): shape (B, N, 3), points set that needs to
                find the nearest neighbors.
            source (torch.Tensor): shape (B, M, 3), points set that is used
                to find the nearest neighbors of points in target set.

        Returns:
            torch.Tensor: shape (B, N, 3), L2 distance of each point in target
            set to their corresponding top three nearest neighbors.
        """
        target = target.contiguous()
        source = source.contiguous()

        B, N, _ = target.size()
        m = source.size(1)
        dist2 = torch.cuda.FloatTensor(B, N, 3)
        idx = torch.cuda.IntTensor(B, N, 3)

        ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m)
        if torch.__version__ != 'parrots':
            ctx.mark_non_differentiable(idx)

        return torch.sqrt(dist2), idx

    @staticmethod
    def backward(ctx, a=None, b=None):
        return None, None


three_nn = ThreeNN.apply


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/tin_shift.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
# Code reference from "Temporal Interlacing Network"
# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py
# Hao Shao, Shengju Qian, Yu Liu
# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk

import torch
import torch.nn as nn
from torch.autograd import Function

from ..utils import ext_loader

ext_module = ext_loader.load_ext('_ext',
                                 ['tin_shift_forward', 'tin_shift_backward'])


class TINShiftFunction(Function):

    @staticmethod
    def forward(ctx, input, shift):
        C = input.size(2)
        num_segments = shift.size(1)
        if C // num_segments <= 0 or C % num_segments != 0:
            raise ValueError('C should be a multiple of num_segments, '
                             f'but got C={C} and num_segments={num_segments}.')

        ctx.save_for_backward(shift)

        out = torch.zeros_like(input)
        ext_module.tin_shift_forward(input, shift, out)

        return out

    @staticmethod
    def backward(ctx, grad_output):

        shift = ctx.saved_tensors[0]
        data_grad_input = grad_output.new(*grad_output.size()).zero_()
        shift_grad_input = shift.new(*shift.size()).zero_()
        ext_module.tin_shift_backward(grad_output, shift, data_grad_input)

        return data_grad_input, shift_grad_input


tin_shift = TINShiftFunction.apply


class TINShift(nn.Module):
    """Temporal Interlace Shift.

    Temporal Interlace shift is a differentiable temporal-wise frame shifting
    which is proposed in "Temporal Interlacing Network"

    Please refer to `Temporal Interlacing Network
    <https://arxiv.org/abs/2001.06499>`_ for more details.

    Code is modified from https://github.com/mit-han-lab/temporal-shift-module
    """

    def forward(self, input, shift):
        """Perform temporal interlace shift.

        Args:
            input (torch.Tensor): Feature map with shape
                [N, num_segments, C, H * W].
            shift (torch.Tensor): Shift tensor with shape [N, num_segments].

        Returns:
            Feature map after temporal interlace shift.
        """
        return tin_shift(input, shift)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/upfirdn2d.py
================================================
# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py  # noqa:E501

# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
# Augmentation (ADA)
# =======================================================================

# 1. Definitions

# "Licensor" means any person or entity that distributes its Work.

# "Software" means the original work of authorship made available under
# this License.

# "Work" means the Software and any additions to or derivative works of
# the Software that are made available under this License.

# The terms "reproduce," "reproduction," "derivative works," and
# "distribution" have the meaning as provided under U.S. copyright law;
# provided, however, that for the purposes of this License, derivative
# works shall not include works that remain separable from, or merely
# link (or bind by name) to the interfaces of, the Work.

# Works, including the Software, are "made available" under this License
# by including in or with the Work either (a) a copyright notice
# referencing the applicability of this License to the Work, or (b) a
# copy of this License.

# 2. License Grants

#     2.1 Copyright Grant. Subject to the terms and conditions of this
#     License, each Licensor grants to you a perpetual, worldwide,
#     non-exclusive, royalty-free, copyright license to reproduce,
#     prepare derivative works of, publicly display, publicly perform,
#     sublicense and distribute its Work and any resulting derivative
#     works in any form.

# 3. Limitations

#     3.1 Redistribution. You may reproduce or distribute the Work only
#     if (a) you do so under this License, (b) you include a complete
#     copy of this License with your distribution, and (c) you retain
#     without modification any copyright, patent, trademark, or
#     attribution notices that are present in the Work.

#     3.2 Derivative Works. You may specify that additional or different
#     terms apply to the use, reproduction, and distribution of your
#     derivative works of the Work ("Your Terms") only if (a) Your Terms
#     provide that the use limitation in Section 3.3 applies to your
#     derivative works, and (b) you identify the specific derivative
#     works that are subject to Your Terms. Notwithstanding Your Terms,
#     this License (including the redistribution requirements in Section
#     3.1) will continue to apply to the Work itself.

#     3.3 Use Limitation. The Work and any derivative works thereof only
#     may be used or intended for use non-commercially. Notwithstanding
#     the foregoing, NVIDIA and its affiliates may use the Work and any
#     derivative works commercially. As used herein, "non-commercially"
#     means for research or evaluation purposes only.

#     3.4 Patent Claims. If you bring or threaten to bring a patent claim
#     against any Licensor (including any claim, cross-claim or
#     counterclaim in a lawsuit) to enforce any patents that you allege
#     are infringed by any Work, then your rights under this License from
#     such Licensor (including the grant in Section 2.1) will terminate
#     immediately.

#     3.5 Trademarks. This License does not grant any rights to use any
#     Licensor’s or its affiliates’ names, logos, or trademarks, except
#     as necessary to reproduce the notices described in this License.

#     3.6 Termination. If you violate any term of this License, then your
#     rights under this License (including the grant in Section 2.1) will
#     terminate immediately.

# 4. Disclaimer of Warranty.

# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
# THIS LICENSE.

# 5. Limitation of Liability.

# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGES.

# =======================================================================

import torch
from torch.autograd import Function
from torch.nn import functional as F

from mmcv.utils import to_2tuple
from ..utils import ext_loader

upfirdn2d_ext = ext_loader.load_ext('_ext', ['upfirdn2d'])


class UpFirDn2dBackward(Function):

    @staticmethod
    def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad,
                in_size, out_size):

        up_x, up_y = up
        down_x, down_y = down
        g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad

        grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)

        grad_input = upfirdn2d_ext.upfirdn2d(
            grad_output,
            grad_kernel,
            up_x=down_x,
            up_y=down_y,
            down_x=up_x,
            down_y=up_y,
            pad_x0=g_pad_x0,
            pad_x1=g_pad_x1,
            pad_y0=g_pad_y0,
            pad_y1=g_pad_y1)
        grad_input = grad_input.view(in_size[0], in_size[1], in_size[2],
                                     in_size[3])

        ctx.save_for_backward(kernel)

        pad_x0, pad_x1, pad_y0, pad_y1 = pad

        ctx.up_x = up_x
        ctx.up_y = up_y
        ctx.down_x = down_x
        ctx.down_y = down_y
        ctx.pad_x0 = pad_x0
        ctx.pad_x1 = pad_x1
        ctx.pad_y0 = pad_y0
        ctx.pad_y1 = pad_y1
        ctx.in_size = in_size
        ctx.out_size = out_size

        return grad_input

    @staticmethod
    def backward(ctx, gradgrad_input):
        kernel, = ctx.saved_tensors

        gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2],
                                                ctx.in_size[3], 1)

        gradgrad_out = upfirdn2d_ext.upfirdn2d(
            gradgrad_input,
            kernel,
            up_x=ctx.up_x,
            up_y=ctx.up_y,
            down_x=ctx.down_x,
            down_y=ctx.down_y,
            pad_x0=ctx.pad_x0,
            pad_x1=ctx.pad_x1,
            pad_y0=ctx.pad_y0,
            pad_y1=ctx.pad_y1)
        # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0],
        #                                  ctx.out_size[1], ctx.in_size[3])
        gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1],
                                         ctx.out_size[0], ctx.out_size[1])

        return gradgrad_out, None, None, None, None, None, None, None, None


class UpFirDn2d(Function):

    @staticmethod
    def forward(ctx, input, kernel, up, down, pad):
        up_x, up_y = up
        down_x, down_y = down
        pad_x0, pad_x1, pad_y0, pad_y1 = pad

        kernel_h, kernel_w = kernel.shape
        batch, channel, in_h, in_w = input.shape
        ctx.in_size = input.shape

        input = input.reshape(-1, in_h, in_w, 1)

        ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1]))

        out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
        out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
        ctx.out_size = (out_h, out_w)

        ctx.up = (up_x, up_y)
        ctx.down = (down_x, down_y)
        ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1)

        g_pad_x0 = kernel_w - pad_x0 - 1
        g_pad_y0 = kernel_h - pad_y0 - 1
        g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1
        g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1

        ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1)

        out = upfirdn2d_ext.upfirdn2d(
            input,
            kernel,
            up_x=up_x,
            up_y=up_y,
            down_x=down_x,
            down_y=down_y,
            pad_x0=pad_x0,
            pad_x1=pad_x1,
            pad_y0=pad_y0,
            pad_y1=pad_y1)
        # out = out.view(major, out_h, out_w, minor)
        out = out.view(-1, channel, out_h, out_w)

        return out

    @staticmethod
    def backward(ctx, grad_output):
        kernel, grad_kernel = ctx.saved_tensors

        grad_input = UpFirDn2dBackward.apply(
            grad_output,
            kernel,
            grad_kernel,
            ctx.up,
            ctx.down,
            ctx.pad,
            ctx.g_pad,
            ctx.in_size,
            ctx.out_size,
        )

        return grad_input, None, None, None, None


def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
    """UpFRIDn for 2d features.

    UpFIRDn is short for upsample, apply FIR filter and downsample. More
    details can be found in:
    https://www.mathworks.com/help/signal/ref/upfirdn.html

    Args:
        input (torch.Tensor): Tensor with shape of (n, c, h, w).
        kernel (torch.Tensor): Filter kernel.
        up (int | tuple[int], optional): Upsampling factor. If given a number,
            we will use this factor for the both height and width side.
            Defaults to 1.
        down (int | tuple[int], optional): Downsampling factor. If given a
            number, we will use this factor for the both height and width side.
            Defaults to 1.
        pad (tuple[int], optional): Padding for tensors, (x_pad, y_pad) or
            (x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0).

    Returns:
        torch.Tensor: Tensor after UpFIRDn.
    """
    if input.device.type == 'cpu':
        if len(pad) == 2:
            pad = (pad[0], pad[1], pad[0], pad[1])

        up = to_2tuple(up)

        down = to_2tuple(down)

        out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1],
                               pad[0], pad[1], pad[2], pad[3])
    else:
        _up = to_2tuple(up)

        _down = to_2tuple(down)

        if len(pad) == 4:
            _pad = pad
        elif len(pad) == 2:
            _pad = (pad[0], pad[1], pad[0], pad[1])

        out = UpFirDn2d.apply(input, kernel, _up, _down, _pad)

    return out


def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1,
                     pad_y0, pad_y1):
    _, channel, in_h, in_w = input.shape
    input = input.reshape(-1, in_h, in_w, 1)

    _, in_h, in_w, minor = input.shape
    kernel_h, kernel_w = kernel.shape

    out = input.view(-1, in_h, 1, in_w, 1, minor)
    out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
    out = out.view(-1, in_h * up_y, in_w * up_x, minor)

    out = F.pad(
        out,
        [0, 0,
         max(pad_x0, 0),
         max(pad_x1, 0),
         max(pad_y0, 0),
         max(pad_y1, 0)])
    out = out[:,
              max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0),
              max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ]

    out = out.permute(0, 3, 1, 2)
    out = out.reshape(
        [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1])
    w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
    out = F.conv2d(out, w)
    out = out.reshape(
        -1,
        minor,
        in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1,
        in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
    )
    out = out.permute(0, 2, 3, 1)
    out = out[:, ::down_y, ::down_x, :]

    out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
    out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1

    return out.view(-1, channel, out_h, out_w)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/ops/voxelize.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch import nn
from torch.autograd import Function
from torch.nn.modules.utils import _pair

from ..utils import ext_loader

ext_module = ext_loader.load_ext(
    '_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward'])


class _Voxelization(Function):

    @staticmethod
    def forward(ctx,
                points,
                voxel_size,
                coors_range,
                max_points=35,
                max_voxels=20000):
        """Convert kitti points(N, >=3) to voxels.

        Args:
            points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points
                and points[:, 3:] contain other information like reflectivity.
            voxel_size (tuple or float): The size of voxel with the shape of
                [3].
            coors_range (tuple or float): The coordinate range of voxel with
                the shape of [6].
            max_points (int, optional): maximum points contained in a voxel. if
                max_points=-1, it means using dynamic_voxelize. Default: 35.
            max_voxels (int, optional): maximum voxels this function create.
                for second, 20000 is a good choice. Users should shuffle points
                before call this function because max_voxels may drop points.
                Default: 20000.

        Returns:
            tuple[torch.Tensor]: tuple[torch.Tensor]: A tuple contains three
            elements. The first one is the output voxels with the shape of
            [M, max_points, n_dim], which only contain points and returned
            when max_points != -1. The second is the voxel coordinates with
            shape of [M, 3]. The last is number of point per voxel with the
            shape of [M], which only returned when max_points != -1.
        """
        if max_points == -1 or max_voxels == -1:
            coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int)
            ext_module.dynamic_voxelize_forward(
                points,
                torch.tensor(voxel_size, dtype=torch.float),
                torch.tensor(coors_range, dtype=torch.float),
                coors,
                NDim=3)
            return coors
        else:
            voxels = points.new_zeros(
                size=(max_voxels, max_points, points.size(1)))
            coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int)
            num_points_per_voxel = points.new_zeros(
                size=(max_voxels, ), dtype=torch.int)
            voxel_num = torch.zeros(size=(), dtype=torch.long)
            ext_module.hard_voxelize_forward(
                points,
                torch.tensor(voxel_size, dtype=torch.float),
                torch.tensor(coors_range, dtype=torch.float),
                voxels,
                coors,
                num_points_per_voxel,
                voxel_num,
                max_points=max_points,
                max_voxels=max_voxels,
                NDim=3)
            # select the valid voxels
            voxels_out = voxels[:voxel_num]
            coors_out = coors[:voxel_num]
            num_points_per_voxel_out = num_points_per_voxel[:voxel_num]
            return voxels_out, coors_out, num_points_per_voxel_out


voxelization = _Voxelization.apply


class Voxelization(nn.Module):
    """Convert kitti points(N, >=3) to voxels.

    Please refer to `Point-Voxel CNN for Efficient 3D Deep Learning
    <https://arxiv.org/abs/1907.03739>`_ for more details.

    Args:
        voxel_size (tuple or float): The size of voxel with the shape of [3].
        point_cloud_range (tuple or float): The coordinate range of voxel with
            the shape of [6].
        max_num_points (int): maximum points contained in a voxel. if
            max_points=-1, it means using dynamic_voxelize.
        max_voxels (int, optional): maximum voxels this function create.
            for second, 20000 is a good choice. Users should shuffle points
            before call this function because max_voxels may drop points.
            Default: 20000.
    """

    def __init__(self,
                 voxel_size,
                 point_cloud_range,
                 max_num_points,
                 max_voxels=20000):
        super().__init__()

        self.voxel_size = voxel_size
        self.point_cloud_range = point_cloud_range
        self.max_num_points = max_num_points
        if isinstance(max_voxels, tuple):
            self.max_voxels = max_voxels
        else:
            self.max_voxels = _pair(max_voxels)

        point_cloud_range = torch.tensor(
            point_cloud_range, dtype=torch.float32)
        voxel_size = torch.tensor(voxel_size, dtype=torch.float32)
        grid_size = (point_cloud_range[3:] -
                     point_cloud_range[:3]) / voxel_size
        grid_size = torch.round(grid_size).long()
        input_feat_shape = grid_size[:2]
        self.grid_size = grid_size
        # the origin shape is as [x-len, y-len, z-len]
        # [w, h, d] -> [d, h, w]
        self.pcd_shape = [*input_feat_shape, 1][::-1]

    def forward(self, input):
        if self.training:
            max_voxels = self.max_voxels[0]
        else:
            max_voxels = self.max_voxels[1]

        return voxelization(input, self.voxel_size, self.point_cloud_range,
                            self.max_num_points, max_voxels)

    def __repr__(self):
        s = self.__class__.__name__ + '('
        s += 'voxel_size=' + str(self.voxel_size)
        s += ', point_cloud_range=' + str(self.point_cloud_range)
        s += ', max_num_points=' + str(self.max_num_points)
        s += ', max_voxels=' + str(self.max_voxels)
        s += ')'
        return s


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .collate import collate
from .data_container import DataContainer
from .data_parallel import MMDataParallel
from .distributed import MMDistributedDataParallel
from .registry import MODULE_WRAPPERS
from .scatter_gather import scatter, scatter_kwargs
from .utils import is_module_wrapper

__all__ = [
    'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel',
    'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/_functions.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch.nn.parallel._functions import _get_stream


def scatter(input, devices, streams=None):
    """Scatters tensor across multiple GPUs."""
    if streams is None:
        streams = [None] * len(devices)

    if isinstance(input, list):
        chunk_size = (len(input) - 1) // len(devices) + 1
        outputs = [
            scatter(input[i], [devices[i // chunk_size]],
                    [streams[i // chunk_size]]) for i in range(len(input))
        ]
        return outputs
    elif isinstance(input, torch.Tensor):
        output = input.contiguous()
        # TODO: copy to a pinned buffer first (if copying from CPU)
        stream = streams[0] if output.numel() > 0 else None
        if devices != [-1]:
            with torch.cuda.device(devices[0]), torch.cuda.stream(stream):
                output = output.cuda(devices[0], non_blocking=True)

        return output
    else:
        raise Exception(f'Unknown type {type(input)}.')


def synchronize_stream(output, devices, streams):
    if isinstance(output, list):
        chunk_size = len(output) // len(devices)
        for i in range(len(devices)):
            for j in range(chunk_size):
                synchronize_stream(output[i * chunk_size + j], [devices[i]],
                                   [streams[i]])
    elif isinstance(output, torch.Tensor):
        if output.numel() != 0:
            with torch.cuda.device(devices[0]):
                main_stream = torch.cuda.current_stream()
                main_stream.wait_stream(streams[0])
                output.record_stream(main_stream)
    else:
        raise Exception(f'Unknown type {type(output)}.')


def get_input_device(input):
    if isinstance(input, list):
        for item in input:
            input_device = get_input_device(item)
            if input_device != -1:
                return input_device
        return -1
    elif isinstance(input, torch.Tensor):
        return input.get_device() if input.is_cuda else -1
    else:
        raise Exception(f'Unknown type {type(input)}.')


class Scatter:

    @staticmethod
    def forward(target_gpus, input):
        input_device = get_input_device(input)
        streams = None
        if input_device == -1 and target_gpus != [-1]:
            # Perform CPU to GPU copies in a background stream
            streams = [_get_stream(device) for device in target_gpus]

        outputs = scatter(input, target_gpus, streams)
        # Synchronize with the copy stream
        if streams is not None:
            synchronize_stream(outputs, target_gpus, streams)

        return tuple(outputs) if isinstance(outputs, list) else (outputs, )


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/collate.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from collections.abc import Mapping, Sequence

import torch
import torch.nn.functional as F
from torch.utils.data.dataloader import default_collate

from .data_container import DataContainer


def collate(batch, samples_per_gpu=1):
    """Puts each data field into a tensor/DataContainer with outer dimension
    batch size.

    Extend default_collate to add support for
    :type:`~mmcv.parallel.DataContainer`. There are 3 cases.

    1. cpu_only = True, e.g., meta data
    2. cpu_only = False, stack = True, e.g., images tensors
    3. cpu_only = False, stack = False, e.g., gt bboxes
    """

    if not isinstance(batch, Sequence):
        raise TypeError(f'{batch.dtype} is not supported.')

    if isinstance(batch[0], DataContainer):
        stacked = []
        if batch[0].cpu_only:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
            return DataContainer(
                stacked, batch[0].stack, batch[0].padding_value, cpu_only=True)
        elif batch[0].stack:
            for i in range(0, len(batch), samples_per_gpu):
                assert isinstance(batch[i].data, torch.Tensor)

                if batch[i].pad_dims is not None:
                    ndim = batch[i].dim()
                    assert ndim > batch[i].pad_dims
                    max_shape = [0 for _ in range(batch[i].pad_dims)]
                    for dim in range(1, batch[i].pad_dims + 1):
                        max_shape[dim - 1] = batch[i].size(-dim)
                    for sample in batch[i:i + samples_per_gpu]:
                        for dim in range(0, ndim - batch[i].pad_dims):
                            assert batch[i].size(dim) == sample.size(dim)
                        for dim in range(1, batch[i].pad_dims + 1):
                            max_shape[dim - 1] = max(max_shape[dim - 1],
                                                     sample.size(-dim))
                    padded_samples = []
                    for sample in batch[i:i + samples_per_gpu]:
                        pad = [0 for _ in range(batch[i].pad_dims * 2)]
                        for dim in range(1, batch[i].pad_dims + 1):
                            pad[2 * dim -
                                1] = max_shape[dim - 1] - sample.size(-dim)
                        padded_samples.append(
                            F.pad(
                                sample.data, pad, value=sample.padding_value))
                    stacked.append(default_collate(padded_samples))
                elif batch[i].pad_dims is None:
                    stacked.append(
                        default_collate([
                            sample.data
                            for sample in batch[i:i + samples_per_gpu]
                        ]))
                else:
                    raise ValueError(
                        'pad_dims should be either None or integers (1-3)')

        else:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
        return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
    elif isinstance(batch[0], Sequence):
        transposed = zip(*batch)
        return [collate(samples, samples_per_gpu) for samples in transposed]
    elif isinstance(batch[0], Mapping):
        return {
            key: collate([d[key] for d in batch], samples_per_gpu)
            for key in batch[0]
        }
    else:
        return default_collate(batch)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/data_container.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import functools

import torch


def assert_tensor_type(func):

    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        if not isinstance(args[0].data, torch.Tensor):
            raise AttributeError(
                f'{args[0].__class__.__name__} has no attribute '
                f'{func.__name__} for type {args[0].datatype}')
        return func(*args, **kwargs)

    return wrapper


class DataContainer:
    """A container for any type of objects.

    Typically tensors will be stacked in the collate function and sliced along
    some dimension in the scatter function. This behavior has some limitations.
    1. All tensors have to be the same size.
    2. Types are limited (numpy array or Tensor).

    We design `DataContainer` and `MMDataParallel` to overcome these
    limitations. The behavior can be either of the following.

    - copy to GPU, pad all tensors to the same size and stack them
    - copy to GPU without stacking
    - leave the objects as is and pass it to the model
    - pad_dims specifies the number of last few dimensions to do padding
    """

    def __init__(self,
                 data,
                 stack=False,
                 padding_value=0,
                 cpu_only=False,
                 pad_dims=2):
        self._data = data
        self._cpu_only = cpu_only
        self._stack = stack
        self._padding_value = padding_value
        assert pad_dims in [None, 1, 2, 3]
        self._pad_dims = pad_dims

    def __repr__(self):
        return f'{self.__class__.__name__}({repr(self.data)})'

    def __len__(self):
        return len(self._data)

    @property
    def data(self):
        return self._data

    @property
    def datatype(self):
        if isinstance(self.data, torch.Tensor):
            return self.data.type()
        else:
            return type(self.data)

    @property
    def cpu_only(self):
        return self._cpu_only

    @property
    def stack(self):
        return self._stack

    @property
    def padding_value(self):
        return self._padding_value

    @property
    def pad_dims(self):
        return self._pad_dims

    @assert_tensor_type
    def size(self, *args, **kwargs):
        return self.data.size(*args, **kwargs)

    @assert_tensor_type
    def dim(self):
        return self.data.dim()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/data_parallel.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from itertools import chain

from torch.nn.parallel import DataParallel

from .scatter_gather import scatter_kwargs


class MMDataParallel(DataParallel):
    """The DataParallel module that supports DataContainer.

    MMDataParallel has two main differences with PyTorch DataParallel:

    - It supports a custom type :class:`DataContainer` which allows more
      flexible control of input data during both GPU and CPU inference.
    - It implement two more APIs ``train_step()`` and ``val_step()``.

    .. warning::
        MMDataParallel only supports single GPU training, if you need to
        train with multiple GPUs, please use MMDistributedDataParallel
        instead. If you have multiple GPUs and you just want to use
        MMDataParallel, you can set the environment variable
        ``CUDA_VISIBLE_DEVICES=0`` or instantiate ``MMDataParallel`` with
        ``device_ids=[0]``.

    Args:
        module (:class:`nn.Module`): Module to be encapsulated.
        device_ids (list[int]): Device IDS of modules to be scattered to.
            Defaults to None when GPU is not available.
        output_device (str | int): Device ID for output. Defaults to None.
        dim (int): Dimension used to scatter the data. Defaults to 0.
    """

    def __init__(self, *args, dim=0, **kwargs):
        super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs)
        self.dim = dim
        if isinstance(self.module, dict) and len(self.device_ids) == 1:
            for name, m in self.module.items():
                self.module[name] = m.to(self.src_device_obj)

    def forward(self, *inputs, **kwargs):
        """Override the original forward function.

        The main difference lies in the CPU inference where the data in
        :class:`DataContainers` will still be gathered.
        """
        if not self.device_ids:
            # We add the following line thus the module could gather and
            # convert data containers as those in GPU inference
            inputs, kwargs = self.scatter(inputs, kwargs, [-1])
            return self.module(*inputs[0], **kwargs[0])
        else:
            return super().forward(*inputs, **kwargs)

    def scatter(self, inputs, kwargs, device_ids):
        return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)

    def train_step(self, *inputs, **kwargs):
        if not self.device_ids:
            # We add the following line thus the module could gather and
            # convert data containers as those in GPU inference
            inputs, kwargs = self.scatter(inputs, kwargs, [-1])
            return self.module.train_step(*inputs[0], **kwargs[0])

        assert len(self.device_ids) == 1, \
            ('MMDataParallel only supports single GPU training, if you need to'
             ' train with multiple GPUs, please use MMDistributedDataParallel'
             ' instead.')

        for t in chain(self.module.parameters(), self.module.buffers()):
            if t.device != self.src_device_obj:
                raise RuntimeError(
                    'module must have its parameters and buffers '
                    f'on device {self.src_device_obj} (device_ids[0]) but '
                    f'found one of them on device: {t.device}')

        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
        return self.module.train_step(*inputs[0], **kwargs[0])

    def val_step(self, *inputs, **kwargs):
        if not self.device_ids:
            # We add the following line thus the module could gather and
            # convert data containers as those in GPU inference
            inputs, kwargs = self.scatter(inputs, kwargs, [-1])
            return self.module.val_step(*inputs[0], **kwargs[0])

        assert len(self.device_ids) == 1, \
            ('MMDataParallel only supports single GPU training, if you need to'
             ' train with multiple GPUs, please use MMDistributedDataParallel'
             ' instead.')

        for t in chain(self.module.parameters(), self.module.buffers()):
            if t.device != self.src_device_obj:
                raise RuntimeError(
                    'module must have its parameters and buffers '
                    f'on device {self.src_device_obj} (device_ids[0]) but '
                    f'found one of them on device: {t.device}')

        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
        return self.module.val_step(*inputs[0], **kwargs[0])


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/distributed.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch.nn.parallel.distributed import (DistributedDataParallel,
                                           _find_tensors)

from mmcv import print_log
from mmcv.utils import TORCH_VERSION, digit_version
from .scatter_gather import scatter_kwargs


class MMDistributedDataParallel(DistributedDataParallel):
    """The DDP module that supports DataContainer.

    MMDDP has two main differences with PyTorch DDP:

    - It supports a custom type :class:`DataContainer` which allows more
      flexible control of input data.
    - It implement two APIs ``train_step()`` and ``val_step()``.
    """

    def to_kwargs(self, inputs, kwargs, device_id):
        # Use `self.to_kwargs` instead of `self.scatter` in pytorch1.8
        # to move all tensors to device_id
        return scatter_kwargs(inputs, kwargs, [device_id], dim=self.dim)

    def scatter(self, inputs, kwargs, device_ids):
        return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)

    def train_step(self, *inputs, **kwargs):
        """train_step() API for module wrapped by DistributedDataParallel.

        This method is basically the same as
        ``DistributedDataParallel.forward()``, while replacing
        ``self.module.forward()`` with ``self.module.train_step()``.
        It is compatible with PyTorch 1.1 - 1.5.
        """

        # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the
        # end of backward to the beginning of forward.
        if ('parrots' not in TORCH_VERSION
                and digit_version(TORCH_VERSION) >= digit_version('1.7')
                and self.reducer._rebuild_buckets()):
            print_log(
                'Reducer buckets have been rebuilt in this iteration.',
                logger='mmcv')

        if getattr(self, 'require_forward_param_sync', True):
            self._sync_params()
        if self.device_ids:
            inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
            if len(self.device_ids) == 1:
                output = self.module.train_step(*inputs[0], **kwargs[0])
            else:
                outputs = self.parallel_apply(
                    self._module_copies[:len(inputs)], inputs, kwargs)
                output = self.gather(outputs, self.output_device)
        else:
            output = self.module.train_step(*inputs, **kwargs)

        if torch.is_grad_enabled() and getattr(
                self, 'require_backward_grad_sync', True):
            if self.find_unused_parameters:
                self.reducer.prepare_for_backward(list(_find_tensors(output)))
            else:
                self.reducer.prepare_for_backward([])
        else:
            if ('parrots' not in TORCH_VERSION
                    and digit_version(TORCH_VERSION) > digit_version('1.2')):
                self.require_forward_param_sync = False
        return output

    def val_step(self, *inputs, **kwargs):
        """val_step() API for module wrapped by DistributedDataParallel.

        This method is basically the same as
        ``DistributedDataParallel.forward()``, while replacing
        ``self.module.forward()`` with ``self.module.val_step()``.
        It is compatible with PyTorch 1.1 - 1.5.
        """
        # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the
        # end of backward to the beginning of forward.
        if ('parrots' not in TORCH_VERSION
                and digit_version(TORCH_VERSION) >= digit_version('1.7')
                and self.reducer._rebuild_buckets()):
            print_log(
                'Reducer buckets have been rebuilt in this iteration.',
                logger='mmcv')

        if getattr(self, 'require_forward_param_sync', True):
            self._sync_params()
        if self.device_ids:
            inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
            if len(self.device_ids) == 1:
                output = self.module.val_step(*inputs[0], **kwargs[0])
            else:
                outputs = self.parallel_apply(
                    self._module_copies[:len(inputs)], inputs, kwargs)
                output = self.gather(outputs, self.output_device)
        else:
            output = self.module.val_step(*inputs, **kwargs)

        if torch.is_grad_enabled() and getattr(
                self, 'require_backward_grad_sync', True):
            if self.find_unused_parameters:
                self.reducer.prepare_for_backward(list(_find_tensors(output)))
            else:
                self.reducer.prepare_for_backward([])
        else:
            if ('parrots' not in TORCH_VERSION
                    and digit_version(TORCH_VERSION) > digit_version('1.2')):
                self.require_forward_param_sync = False
        return output


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/distributed_deprecated.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.distributed as dist
import torch.nn as nn
from torch._utils import (_flatten_dense_tensors, _take_tensors,
                          _unflatten_dense_tensors)

from mmcv.utils import TORCH_VERSION, digit_version
from .registry import MODULE_WRAPPERS
from .scatter_gather import scatter_kwargs


@MODULE_WRAPPERS.register_module()
class MMDistributedDataParallel(nn.Module):

    def __init__(self,
                 module,
                 dim=0,
                 broadcast_buffers=True,
                 bucket_cap_mb=25):
        super(MMDistributedDataParallel, self).__init__()
        self.module = module
        self.dim = dim
        self.broadcast_buffers = broadcast_buffers

        self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024
        self._sync_params()

    def _dist_broadcast_coalesced(self, tensors, buffer_size):
        for tensors in _take_tensors(tensors, buffer_size):
            flat_tensors = _flatten_dense_tensors(tensors)
            dist.broadcast(flat_tensors, 0)
            for tensor, synced in zip(
                    tensors, _unflatten_dense_tensors(flat_tensors, tensors)):
                tensor.copy_(synced)

    def _sync_params(self):
        module_states = list(self.module.state_dict().values())
        if len(module_states) > 0:
            self._dist_broadcast_coalesced(module_states,
                                           self.broadcast_bucket_size)
        if self.broadcast_buffers:
            if (TORCH_VERSION != 'parrots'
                    and digit_version(TORCH_VERSION) < digit_version('1.0')):
                buffers = [b.data for b in self.module._all_buffers()]
            else:
                buffers = [b.data for b in self.module.buffers()]
            if len(buffers) > 0:
                self._dist_broadcast_coalesced(buffers,
                                               self.broadcast_bucket_size)

    def scatter(self, inputs, kwargs, device_ids):
        return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)

    def forward(self, *inputs, **kwargs):
        inputs, kwargs = self.scatter(inputs, kwargs,
                                      [torch.cuda.current_device()])
        return self.module(*inputs[0], **kwargs[0])

    def train_step(self, *inputs, **kwargs):
        inputs, kwargs = self.scatter(inputs, kwargs,
                                      [torch.cuda.current_device()])
        output = self.module.train_step(*inputs[0], **kwargs[0])
        return output

    def val_step(self, *inputs, **kwargs):
        inputs, kwargs = self.scatter(inputs, kwargs,
                                      [torch.cuda.current_device()])
        output = self.module.val_step(*inputs[0], **kwargs[0])
        return output


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/registry.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from torch.nn.parallel import DataParallel, DistributedDataParallel

from mmcv.utils import Registry

MODULE_WRAPPERS = Registry('module wrapper')
MODULE_WRAPPERS.register_module(module=DataParallel)
MODULE_WRAPPERS.register_module(module=DistributedDataParallel)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/scatter_gather.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch.nn.parallel._functions import Scatter as OrigScatter

from ._functions import Scatter
from .data_container import DataContainer


def scatter(inputs, target_gpus, dim=0):
    """Scatter inputs to target gpus.

    The only difference from original :func:`scatter` is to add support for
    :type:`~mmcv.parallel.DataContainer`.
    """

    def scatter_map(obj):
        if isinstance(obj, torch.Tensor):
            if target_gpus != [-1]:
                return OrigScatter.apply(target_gpus, None, dim, obj)
            else:
                # for CPU inference we use self-implemented scatter
                return Scatter.forward(target_gpus, obj)
        if isinstance(obj, DataContainer):
            if obj.cpu_only:
                return obj.data
            else:
                return Scatter.forward(target_gpus, obj.data)
        if isinstance(obj, tuple) and len(obj) > 0:
            return list(zip(*map(scatter_map, obj)))
        if isinstance(obj, list) and len(obj) > 0:
            out = list(map(list, zip(*map(scatter_map, obj))))
            return out
        if isinstance(obj, dict) and len(obj) > 0:
            out = list(map(type(obj), zip(*map(scatter_map, obj.items()))))
            return out
        return [obj for targets in target_gpus]

    # After scatter_map is called, a scatter_map cell will exist. This cell
    # has a reference to the actual function scatter_map, which has references
    # to a closure that has a reference to the scatter_map cell (because the
    # fn is recursive). To avoid this reference cycle, we set the function to
    # None, clearing the cell
    try:
        return scatter_map(inputs)
    finally:
        scatter_map = None


def scatter_kwargs(inputs, kwargs, target_gpus, dim=0):
    """Scatter with support for kwargs dictionary."""
    inputs = scatter(inputs, target_gpus, dim) if inputs else []
    kwargs = scatter(kwargs, target_gpus, dim) if kwargs else []
    if len(inputs) < len(kwargs):
        inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
    elif len(kwargs) < len(inputs):
        kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
    inputs = tuple(inputs)
    kwargs = tuple(kwargs)
    return inputs, kwargs


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/parallel/utils.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .registry import MODULE_WRAPPERS


def is_module_wrapper(module):
    """Check if a module is a module wrapper.

    The following 3 modules in MMCV (and their subclasses) are regarded as
    module wrappers: DataParallel, DistributedDataParallel,
    MMDistributedDataParallel (the deprecated version). You may add you own
    module wrapper by registering it to mmcv.parallel.MODULE_WRAPPERS.

    Args:
        module (nn.Module): The module to be checked.

    Returns:
        bool: True if the input module is a module wrapper.
    """
    module_wrappers = tuple(MODULE_WRAPPERS.module_dict.values())
    return isinstance(module, module_wrappers)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/readme.md
================================================
test


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .base_module import BaseModel, BaseModule, ModuleDict, ModuleList, Sequential, BaseBackbone, BaseNecks, BaseLosses, BaseNecksV2
from .base_runner import BaseRunner
from .builder import RUNNERS, build_runner
from .checkpoint import (CheckpointLoader, _load_checkpoint,
                         _load_checkpoint_with_prefix, load_checkpoint,
                         load_state_dict, save_checkpoint, weights_to_cpu)
from .default_constructor import DefaultRunnerConstructor
from .dist_utils import (allreduce_grads, allreduce_params, get_dist_info,
                         init_dist, master_only)
from .epoch_based_runner import EpochBasedRunner, Runner
from .fp16_utils import LossScaler, auto_fp16, force_fp32, wrap_fp16_model
from .hooks import (HOOKS, CheckpointHook, ClosureHook, DistEvalHook,
                    DistSamplerSeedHook, DvcliveLoggerHook, EMAHook, EvalHook,
                    Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook,
                    GradientCumulativeOptimizerHook, Hook, IterTimerHook,
                    LoggerHook, MlflowLoggerHook, NeptuneLoggerHook,
                    OptimizerHook, PaviLoggerHook, SyncBuffersHook,
                    TensorboardLoggerHook, TextLoggerHook, WandbLoggerHook)
from .hooks.lr_updater import StepLrUpdaterHook  # noqa
from .hooks.lr_updater import (CosineAnnealingLrUpdaterHook,
                               CosineRestartLrUpdaterHook, CyclicLrUpdaterHook,
                               ExpLrUpdaterHook, FixedLrUpdaterHook,
                               FlatCosineAnnealingLrUpdaterHook,
                               InvLrUpdaterHook, LrUpdaterHook,
                               OneCycleLrUpdaterHook, PolyLrUpdaterHook)
from .hooks.momentum_updater import (CosineAnnealingMomentumUpdaterHook,
                                     CyclicMomentumUpdaterHook,
                                     MomentumUpdaterHook,
                                     OneCycleMomentumUpdaterHook,
                                     StepMomentumUpdaterHook)
from .iter_based_runner import IterBasedRunner, IterLoader
from .log_buffer import LogBuffer
from .optimizer import (OPTIMIZER_BUILDERS, OPTIMIZERS,
                        DefaultOptimizerConstructor, build_optimizer,
                        build_optimizer_constructor)
from .priority import Priority, get_priority
from .utils import get_host_info, get_time_str, obj_from_dict, set_random_seed
from .record import MetricLogger
from .hooks.nni_hook import NNIHook
from .misc import find_latest_checkpoint

__all__ = [
    'BaseRunner', 'Runner', 'EpochBasedRunner', 'IterBasedRunner', 'LogBuffer',
    'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook',
    'FixedLrUpdaterHook', 'StepLrUpdaterHook', 'ExpLrUpdaterHook',
    'PolyLrUpdaterHook', 'InvLrUpdaterHook', 'CosineAnnealingLrUpdaterHook',
    'FlatCosineAnnealingLrUpdaterHook', 'CosineRestartLrUpdaterHook',
    'CyclicLrUpdaterHook', 'OneCycleLrUpdaterHook', 'MomentumUpdaterHook',
    'StepMomentumUpdaterHook', 'CosineAnnealingMomentumUpdaterHook',
    'CyclicMomentumUpdaterHook', 'OneCycleMomentumUpdaterHook',
    'OptimizerHook', 'IterTimerHook', 'DistSamplerSeedHook', 'LoggerHook',
    'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook',
    'NeptuneLoggerHook', 'WandbLoggerHook', 'MlflowLoggerHook',
    'DvcliveLoggerHook', '_load_checkpoint', 'load_state_dict',
    'load_checkpoint', 'weights_to_cpu', 'save_checkpoint', 'Priority',
    'get_priority', 'get_host_info', 'get_time_str', 'obj_from_dict',
    'init_dist', 'get_dist_info', 'master_only', 'OPTIMIZER_BUILDERS',
    'OPTIMIZERS', 'DefaultOptimizerConstructor', 'build_optimizer',
    'build_optimizer_constructor', 'IterLoader', 'set_random_seed',
    'auto_fp16', 'force_fp32', 'wrap_fp16_model', 'Fp16OptimizerHook',
    'SyncBuffersHook', 'EMAHook', 'build_runner', 'RUNNERS', 'allreduce_grads',
    'allreduce_params', 'LossScaler', 'CheckpointLoader', 'BaseModule', 'BaseBackbone', 'BaseNecks', 'BaseLosses', 'BaseNecksV2',
    '_load_checkpoint_with_prefix', 'EvalHook', 'DistEvalHook', 'Sequential',
    'ModuleDict', 'ModuleList', 'GradientCumulativeOptimizerHook',
    'GradientCumulativeFp16OptimizerHook', 'DefaultRunnerConstructor', 'find_latest_checkpoint'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/base_module.py
================================================
    # Copyright (c) OpenMMLab. All rights reserved.
import copy
import warnings
from abc import ABCMeta
from collections import defaultdict
from logging import FileHandler

import torch.nn as nn

from mmcv.runner.dist_utils import master_only
from mmcv.utils.logging import print_log, get_logger, logger_initialized

class BaseModel(nn.Module):
    _task = {}

    def __init_subclass__(cls, name='', **kwargs):
        if name != '':
            # if name in cls._taskhead.keys():
            #     raise ValueError(f'Got name={name} existed'
            #                      f'in{cls._taskhead.keys()}')
            # else:
            cls._task[name] = cls
            cls._name = name
        else:
            # if cls.__name__ in cls._taskhead.keys():
            #     raise ValueError(f'Got cls.__name__={cls.__name__} existed '
            #                      f'in{cls._taskhead.keys()}')
            # else:
            #     warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.')
            cls._task[cls.__name__] = cls
            cls._name = cls.__name__

    @classmethod
    def build_model(cls, *args, **kwargs):

        # if cls is StreroSRModel:
        model = kwargs.pop('model')
        try:
            cls = cls._models[model]
            # print(cls)
        except KeyError:
            raise ValueError(f'Got model={model} but expected '
                             f'one of {cls._models.keys()}')

        return cls(None, None)


    @classmethod
    def new(cls, *args, **kwargs):
        task = kwargs.pop('task')
        try:
            cls = cls._task[task]
        except KeyError:
            raise ValueError(f'Got task={task} but expected '
                             f'one of {cls._task.keys()}')

        return cls(*args, **kwargs)

class BaseModule(BaseModel, name='BaseModule'):#nn.Module, metaclass=ABCMeta
    """Base module for all modules in openmmlab.

    ``BaseModule`` is a wrapper of ``torch.nn.Module`` with additional
    functionality of parameter initialization. Compared with
    ``torch.nn.Module``, ``BaseModule`` mainly adds three attributes.

    - ``init_cfg``: the config to control the initialization.
    - ``init_weights``: The function of parameter initialization and recording
      initialization information.
    - ``_params_init_info``: Used to track the parameter initialization
      information. This attribute only exists during executing the
      ``init_weights``.

    Args:
        init_cfg (dict, optional): Initialization config dict.
    """
    # _task = {}
    #
    # def __init_subclass__(cls, name='', **kwargs):
    #     if name != '':
    #         # if name in cls._taskhead.keys():
    #         #     raise ValueError(f'Got name={name} existed'
    #         #                      f'in{cls._taskhead.keys()}')
    #         # else:
    #             cls._task[name] = cls
    #             cls._name = name
    #     else:
    #         # if cls.__name__ in cls._taskhead.keys():
    #         #     raise ValueError(f'Got cls.__name__={cls.__name__} existed '
    #         #                      f'in{cls._taskhead.keys()}')
    #         # else:
    #         #     warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.')
    #             cls._task[cls.__name__] = cls
    #             cls._name = cls.__name__
    #
    #
    # @classmethod
    # def new(cls, *args, **kwargs):
    #     task = kwargs.pop('task')
    #     try:
    #         cls = cls._task[task]
    #     except KeyError:
    #         raise ValueError(f'Got task={task} but expected '
    #                          f'one of {cls._task.keys()}')
    #
    #     return cls

    def __init__(self, init_cfg=None):
        """Initialize BaseModule, inherited from `torch.nn.Module`"""

        # NOTE init_cfg can be defined in different levels, but init_cfg
        # in low levels has a higher priority.

        super(BaseModule, self).__init__()
        # define default value of init_cfg instead of hard code
        # in init_weights() function
        self._is_init = False

        self.init_cfg = copy.deepcopy(init_cfg)

        # Backward compatibility in derived classes
        # if pretrained is not None:
        #     warnings.warn('DeprecationWarning: pretrained is a deprecated \
        #         key, please consider using init_cfg')
        #     self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)

    @property
    def is_init(self):
        return self._is_init

    def init_weights(self):
        """Initialize the weights."""

        is_top_level_module = False
        # check if it is top-level module
        if not hasattr(self, '_params_init_info'):
            # The `_params_init_info` is used to record the initialization
            # information of the parameters
            # the key should be the obj:`nn.Parameter` of model and the value
            # should be a dict containing
            # - init_info (str): The string that describes the initialization.
            # - tmp_mean_value (FloatTensor): The mean of the parameter,
            #       which indicates whether the parameter has been modified.
            # this attribute would be deleted after all parameters
            # is initialized.
            self._params_init_info = defaultdict(dict)
            is_top_level_module = True

            # Initialize the `_params_init_info`,
            # When detecting the `tmp_mean_value` of
            # the corresponding parameter is changed, update related
            # initialization information
            for name, param in self.named_parameters():
                self._params_init_info[param][
                    'init_info'] = f'The value is the same before and ' \
                                   f'after calling `init_weights` ' \
                                   f'of {self.__class__.__name__} '
                self._params_init_info[param][
                    'tmp_mean_value'] = param.data.mean()

            # pass `params_init_info` to all submodules
            # All submodules share the same `params_init_info`,
            # so it will be updated when parameters are
            # modified at any level of the model.
            for sub_module in self.modules():
                sub_module._params_init_info = self._params_init_info

        # Get the initialized logger, if not exist,
        # create a logger named `mmcv`
        logger_names = list(logger_initialized.keys())
        logger_name = logger_names[0] if logger_names else 'mmcv'

        from ..cnn import initialize
        from ..cnn.utils.weight_init import update_init_info
        module_name = self.__class__.__name__
        if not self._is_init:
            if self.init_cfg:
                print_log(
                    f'initialize {module_name} with init_cfg {self.init_cfg}',
                    logger=logger_name)
                initialize(self, self.init_cfg)
                if isinstance(self.init_cfg, dict):
                    # prevent the parameters of
                    # the pre-trained model
                    # from being overwritten by
                    # the `init_weights`
                    if self.init_cfg['type'] == 'Pretrained':
                        return

            for m in self.children():
                if hasattr(m, 'init_weights'):
                    m.init_weights()
                    # users may overload the `init_weights`
                    update_init_info(
                        m,
                        init_info=f'Initialized by '
                        f'user-defined `init_weights`'
                        f' in {m.__class__.__name__} ')

            self._is_init = True
        else:
            warnings.warn(f'init_weights of {self.__class__.__name__} has '
                          f'been called more than once.')

        if is_top_level_module:
            self._dump_init_info(logger_name)

            for sub_module in self.modules():
                del sub_module._params_init_info

    @master_only
    def _dump_init_info(self, logger_name):
        """Dump the initialization information to a file named
        `initialization.log.json` in workdir.

        Args:
            logger_name (str): The name of logger.
        """

        logger = get_logger(logger_name)

        with_file_handler = False
        # dump the information to the logger file if there is a `FileHandler`
        for handler in logger.handlers:
            if isinstance(handler, FileHandler):
                handler.stream.write(
                    'Name of parameter - Initialization information\n')
                for name, param in self.named_parameters():
                    handler.stream.write(
                        f'\n{name} - {param.shape}: '
                        f"\n{self._params_init_info[param]['init_info']} \n")
                handler.stream.flush()
                with_file_handler = True
        if not with_file_handler:
            for name, param in self.named_parameters():
                print_log(
                    f'\n{name} - {param.shape}: '
                    f"\n{self._params_init_info[param]['init_info']} \n ",
                    logger=logger_name)

    def __repr__(self):
        s = super().__repr__()
        if self.init_cfg:
            s += f'\ninit_cfg={self.init_cfg}'
        return s


class Sequential(BaseModule, nn.Sequential, name='Sequential'):
    """Sequential module in openmmlab.

    Args:
        init_cfg (dict, optional): Initialization config dict.
    """

    def __init__(self, *args, init_cfg=None):
        BaseModule.__init__(self, init_cfg)
        nn.Sequential.__init__(self, *args)


class ModuleList(BaseModule, nn.ModuleList, name='ModuleList'):
    """ModuleList in openmmlab.

    Args:
        modules (iterable, optional): an iterable of modules to add.
        init_cfg (dict, optional): Initialization config dict.
    """

    def __init__(self, modules=None, init_cfg=None):
        BaseModule.__init__(self, init_cfg)
        nn.ModuleList.__init__(self, modules)


class ModuleDict(BaseModule, nn.ModuleDict, name='ModuleDict'):
    """ModuleDict in openmmlab.

    Args:
        modules (dict, optional): a mapping (dictionary) of (string: module)
            or an iterable of key-value pairs of type (string, module).
        init_cfg (dict, optional): Initialization config dict.
    """

    def __init__(self, modules=None, init_cfg=None):
        BaseModule.__init__(self, init_cfg)
        nn.ModuleDict.__init__(self, modules)


class BaseBackbone(BaseModule, name='BaseBackbone'):
    _models = {}

    def __init_subclass__(cls, name='', **kwargs):
        if name != '':
            # if name in cls._models.keys():
            #     raise ValueError(f'Got name={name} existed'
            #                      f'in{cls._models.keys()}')
            # else:
                cls._models[name] = cls
                cls._name = name
        else:
            # if cls.__name__ in cls._models.keys():
            #     raise ValueError(f'Got cls.__name__={cls.__name__} existed'
            #                      f'in{cls._models.keys()}')
            # else:
            #     warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.')
                cls._models[cls.__name__] = cls
                cls._name = cls.__name__


    @classmethod
    def build_model(cls, *args, **kwargs):
        model = kwargs.pop('model')
        try:
            cls = cls._models[model]
        except KeyError:
            raise ValueError(f'Got models={model} but expected '
                             f'one of {cls._models.keys()}')

        return cls

class BaseLosses(nn.Module):
    _models = {}

    def __init_subclass__(cls, name='', **kwargs):

        # print(name, cls)
        if name != '':
            # if name in cls._models.keys():
            #     raise ValueError(f'Got name={name} existed'
            #                      f'in{cls._models.keys()}')
            # else:
                cls._models[name] = cls
                cls._name = name
        else:
            # if cls.__name__ in cls._models.keys():
            #     raise ValueError(f'Got cls.__name__={cls.__name__} existed'
            #                      f'in{cls._models.keys()}')
            # else:
            #     warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.')
                cls._models[cls.__name__] = cls
                cls._name = cls.__name__

    @classmethod
    def build_model(cls, *args, **kwargs):
        model = kwargs.pop('model')
        try:
            cls = cls._models[model]
        except KeyError:
            raise ValueError(f'Got models={model} but expected '
                             f'one of {cls._models.keys()}')

        return cls

class BaseNecks(nn.Module):
    _models = {}

    def __init_subclass__(cls, name='', **kwargs):
        # print(name, cls)
        if name != '':
            # if name in cls._models.keys():
            #     raise ValueError(f'Got name={name} existed'
            #                      f'in{cls._models.keys()}')
            # else:
                cls._models[name] = cls
                cls._name = name
        else:
            # if cls.__name__ in cls._models.keys():
            #     raise ValueError(f'Got cls.__name__={cls.__name__} existed'
            #                      f'in{cls._models.keys()}')
            # else:
            #     warnings.warn(f'Creating a subclass of MetaModel {cls.__name__} with no name.')
                cls._models[cls.__name__] = cls
                cls._name = cls.__name__

    @classmethod
    def build_model(cls, *args, **kwargs):
        model = kwargs.pop('model')
        try:
            cls = cls._models[model]
        except KeyError:
            raise ValueError(f'Got models={model} but expected '
                             f'one of {cls._models.keys()}')

        return cls

class BaseNecksV2(BaseModule, BaseNecks, name='BaseNecksV2'):
    '''
    父类的_models, __init_subclass__都会被继承
    '''
    ...


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/base_runner.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import logging
import os.path as osp
import warnings
from abc import ABCMeta, abstractmethod

import torch
from torch.optim import Optimizer

import mmcv
from ..parallel import is_module_wrapper
from .checkpoint import load_checkpoint
from .dist_utils import get_dist_info
from .hooks import HOOKS, Hook
from .log_buffer import LogBuffer
from .priority import Priority, get_priority
from .utils import get_time_str
from .record import MetricLogger


class BaseRunner(metaclass=ABCMeta):
    """The base class of Runner, a training helper for PyTorch.

    All subclasses should implement the following APIs:

    - ``run()``
    - ``train()``
    - ``val()``
    - ``save_checkpoint()``

    Args:
        model (:obj:`torch.nn.Module`): The model to be run.
        batch_processor (callable): A callable method that process a data
            batch. The interface of this method should be
            `batch_processor(model, data, train_mode) -> dict`
        optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an
            optimizer (in most cases) or a dict of optimizers (in models that
            requires more than one optimizer, e.g., GAN).
        work_dir (str, optional): The working directory to save checkpoints
            and logs. Defaults to None.
        logger (:obj:`logging.Logger`): Logger used during training.
             Defaults to None. (The default value is just for backward
             compatibility)
        meta (dict | None): A dict records some import information such as
            environment info and seed, which will be logged in logger hook.
            Defaults to None.
        max_epochs (int, optional): Total training epochs.
        max_iters (int, optional): Total training iterations.
    """

    def __init__(self,
                 model,
                 batch_processor=None,
                 optimizer=None,
                 work_dir=None,
                 logger=None,
                 meta=None,
                 max_iters=None,
                 max_epochs=None,
                 opt_cfg=None):
        if batch_processor is not None:
            if not callable(batch_processor):
                raise TypeError('batch_processor must be callable, '
                                f'but got {type(batch_processor)}')
            warnings.warn(
                'batch_processor is deprecated, please implement '
                'train_step() and val_step() in the model instead.',
                DeprecationWarning)
            # raise an error is `batch_processor` is not None and
            # `model.train_step()` exists.
            if is_module_wrapper(model):
                _model = model.module
            else:
                _model = model
            if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'):
                raise RuntimeError(
                    'batch_processor and model.train_step()/model.val_step() '
                    'cannot be both available.')
        # else:
        #     assert hasattr(model, 'train_step')

        # check the type of `optimizer`
        if isinstance(optimizer, dict):
            for name, optim in optimizer.items():
                if not isinstance(optim, Optimizer):
                    raise TypeError(
                        f'optimizer must be a dict of torch.optim.Optimizers, '
                        f'but optimizer["{name}"] is a {type(optim)}')
        elif not isinstance(optimizer, Optimizer) and optimizer is not None:
            raise TypeError(
                f'optimizer must be a torch.optim.Optimizer object '
                f'or dict or None, but got {type(optimizer)}')

        # check the type of `logger`
        if not isinstance(logger, logging.Logger):
            raise TypeError(f'logger must be a logging.Logger object, '
                            f'but got {type(logger)}')

        # check the type of `meta`
        if meta is not None and not isinstance(meta, dict):
            raise TypeError(
                f'meta must be a dict or None, but got {type(meta)}')

        self.model = model
        self.batch_processor = batch_processor
        self.optimizer = optimizer
        self.logger = logger
        self.meta = meta
        self.opt_cfg = opt_cfg
        self.earlyStop = False
        # create work_dir
        save_dir = opt_cfg['save_dir']
        if mmcv.is_str(work_dir):
            self.work_dir = osp.abspath(work_dir)
            self.save_dir = osp.abspath(save_dir)
            mmcv.mkdir_or_exist(self.save_dir)
            mmcv.mkdir_or_exist(self.work_dir)
        elif work_dir is None:
            self.work_dir = None
            self.save_dir = None
        else:
            raise TypeError(f'"work_dir: {work_dir}" must be a str or None')


        # get model name from the model class
        if hasattr(self.model, 'module'):
            self._model_name = self.model.module.__class__.__name__
        else:
            self._model_name = self.model.__class__.__name__

        self._rank, self._world_size = get_dist_info()
        self.timestamp = get_time_str()
        self.mode = None
        self._hooks = []
        self._epoch = 0
        self._iter = 0
        self._inner_iter = 0
        self.outputs = {}

        if max_epochs is not None and max_iters is not None:
            raise ValueError(
                'Only one of `max_epochs` or `max_iters` can be set.')

        self._max_epochs = max_epochs
        self._max_iters = max_iters
        # TODO: Redesign LogBuffer, it is not flexible and elegant enough
        self.log_buffer = MetricLogger(logger=logger, delimiter="  ")  # LogBuffer()

    @property
    def model_name(self):
        """str: Name of the model, usually the module class name."""
        return self._model_name

    @property
    def rank(self):
        """int: Rank of current process. (distributed training)"""
        return self._rank

    @property
    def world_size(self):
        """int: Number of processes participating in the job.
        (distributed training)"""
        return self._world_size

    @property
    def hooks(self):
        """list[:obj:`Hook`]: A list of registered hooks."""
        return self._hooks

    @property
    def epoch(self):
        """int: Current epoch."""
        return self._epoch

    @property
    def iter(self):
        """int: Current iteration."""
        return self._iter

    @property
    def inner_iter(self):
        """int: Iteration in an epoch."""
        return self._inner_iter

    @property
    def max_epochs(self):
        """int: Maximum training epochs."""
        return self._max_epochs

    @property
    def max_iters(self):
        """int: Maximum training iterations."""
        return self._max_iters

    @abstractmethod
    def train(self):
        pass

    @abstractmethod
    def val(self):
        pass

    @abstractmethod
    def run(self, data_loaders, workflow, **kwargs):
        pass

    @abstractmethod
    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl,
                        save_optimizer=True,
                        meta=None,
                        create_symlink=True):
        pass

    def current_lr(self):
        """Get current learning rates.

        Returns:
            list[float] | dict[str, list[float]]: Current learning rates of all
            param groups. If the runner has a dict of optimizers, this method
            will return a dict.
        """
        if isinstance(self.optimizer, torch.optim.Optimizer):
            lr = [group['lr'] for group in self.optimizer.param_groups]
        elif isinstance(self.optimizer, dict):
            lr = dict()
            for name, optim in self.optimizer.items():
                lr[name] = [group['lr'] for group in optim.param_groups]
        else:
            raise RuntimeError(
                'lr is not applicable because optimizer does not exist.')
        return lr

    def current_momentum(self):
        """Get current momentums.

        Returns:
            list[float] | dict[str, list[float]]: Current momentums of all
            param groups. If the runner has a dict of optimizers, this method
            will return a dict.
        """

        def _get_momentum(optimizer):
            momentums = []
            for group in optimizer.param_groups:
                if 'momentum' in group.keys():
                    momentums.append(group['momentum'])
                elif 'betas' in group.keys():
                    momentums.append(group['betas'][0])
                else:
                    momentums.append(0)
            return momentums

        if self.optimizer is None:
            raise RuntimeError(
                'momentum is not applicable because optimizer does not exist.')
        elif isinstance(self.optimizer, torch.optim.Optimizer):
            momentums = _get_momentum(self.optimizer)
        elif isinstance(self.optimizer, dict):
            momentums = dict()
            for name, optim in self.optimizer.items():
                momentums[name] = _get_momentum(optim)
        return momentums

    def register_hook(self, hook, priority='NORMAL'):
        """Register a hook into the hook list.

        The hook will be inserted into a priority queue, with the specified
        priority (See :class:`Priority` for details of priorities).
        For hooks with the same priority, they will be triggered in the same
        order as they are registered.

        Args:
            hook (:obj:`Hook`): The hook to be registered.
            priority (int or str or :obj:`Priority`): Hook priority.
                Lower value means higher priority.
        """
        assert isinstance(hook, Hook)
        if hasattr(hook, 'priority'):
            raise ValueError('"priority" is a reserved attribute for hooks')
        priority = get_priority(priority)
        hook.priority = priority
        # insert the hook to a sorted list
        inserted = False
        for i in range(len(self._hooks) - 1, -1, -1):
            if priority >= self._hooks[i].priority:
                self._hooks.insert(i + 1, hook)
                inserted = True
                break
        if not inserted:
            self._hooks.insert(0, hook)

    def register_hook_from_cfg(self, hook_cfg):
        """Register a hook from its cfg.

        Args:
            hook_cfg (dict): Hook config. It should have at least keys 'type'
              and 'priority' indicating its type and priority.

        Note:
            The specific hook class to register should not use 'type' and
            'priority' arguments during initialization.
        """
        hook_cfg = hook_cfg.copy()
        priority = hook_cfg.pop('priority', 'NORMAL')
        hook = mmcv.build_from_cfg(hook_cfg, HOOKS)
        self.register_hook(hook, priority=priority)

    def call_hook(self, fn_name):
        """Call all hooks.

        Args:
            fn_name (str): The function name in each hook to be called, such as
                "before_train_epoch".
        """
        for hook in self._hooks:
            getattr(hook, fn_name)(self)

    def get_hook_info(self):
        # Get hooks info in each stage
        stage_hook_map = {stage: [] for stage in Hook.stages}
        for hook in self.hooks:
            try:
                priority = Priority(hook.priority).name
            except ValueError:
                priority = hook.priority
            classname = hook.__class__.__name__
            hook_info = f'({priority:<12}) {classname:<35}'
            for trigger_stage in hook.get_triggered_stages():
                stage_hook_map[trigger_stage].append(hook_info)

        stage_hook_infos = []
        for stage in Hook.stages:
            hook_infos = stage_hook_map[stage]
            if len(hook_infos) > 0:
                info = f'{stage}:\n'
                info += '\n'.join(hook_infos)
                info += '\n -------------------- '
                stage_hook_infos.append(info)
        return '\n'.join(stage_hook_infos)

    def load_checkpoint(self,
                        filename,
                        resume_mode,
                        map_location='cpu',
                        strict=False,
                        revise_keys=[(r'^module.', '')]):

        return load_checkpoint(
            resume_mode,
            self.work_dir,
            self.model,
            filename,
            map_location,
            strict,
            self.logger,
            revise_keys=revise_keys)

    def resume(self,
               resume, resume_mode,
               reset_lr, lr,
               resume_optimizer=True,
               map_location='default'):

        if map_location == 'default':
            if torch.cuda.is_available():
                device_id = torch.cuda.current_device()
                checkpoint = self.load_checkpoint(
                    resume, resume_mode,
                    map_location=lambda storage, loc: storage.cuda(device_id))
            else:
                checkpoint = self.load_checkpoint(resume, resume_mode)
        else:
            checkpoint = self.load_checkpoint(
                resume, resume_mode, map_location=map_location)

        self._epoch = checkpoint['meta']['epoch']
        if self.opt_cfg['eval']:
            self._max_epochs = self._epoch
        self._iter = checkpoint['meta']['iter']
        if self.meta is None:
            self.meta = {}
        self.meta.setdefault('hook_msgs', {})
        # load `last_ckpt`, `best_score`, `best_ckpt`, etc. for hook messages
        self.meta['hook_msgs'].update(checkpoint['meta'].get('hook_msgs', {}))

        # Re-calculate the number of iterations when resuming
        # models with different number of GPUs
        if 'config' in checkpoint['meta']:
            config = mmcv.Config.fromstring(
                checkpoint['meta']['config'], file_format='.py')
            previous_gpu_ids = config.get('gpu_ids', None)
            if previous_gpu_ids and len(previous_gpu_ids) > 0 and len(
                    previous_gpu_ids) != self.world_size:
                self._iter = int(self._iter * len(previous_gpu_ids) /
                                 self.world_size)
                self.logger.info('the iteration number is changed due to '
                                 'change of GPU number')

        # resume meta information meta
        self.meta = checkpoint['meta']
        # if optimizer is not None:
        #     if checkpoint.get('optimizer') is not None:
        #         optimizer.load_state_dict(checkpoint['optimizer'])
        #
        #     if lr > 0 and reset_lr:
        #         for param_group in optimizer.param_groups:
        #             param_group['lr'] = lr
        #     print_log("loaded checkpoint.optimizer")
        if 'optimizer' in checkpoint and resume_optimizer:
            if isinstance(self.optimizer, Optimizer):
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                if lr > 0 and reset_lr:
                    for param_group in self.optimizer.param_groups:
                            param_group['lr'] = lr
                    self.logger.info("loaded checkpoint.optimizer")
            elif isinstance(self.optimizer, dict):
                for k in self.optimizer.keys():
                    self.optimizer[k].load_state_dict(
                        checkpoint['optimizer'][k])
                if lr > 0 and reset_lr:
                    for param_group in self.optimizer[k].param_groups:
                            param_group['lr'] = lr
                    self.logger.info("loaded checkpoint.optimizer")
            else:
                raise TypeError(
                    'Optimizer should be dict or torch.optim.Optimizer '
                    f'but got {type(self.optimizer)}')

        self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter)

    def register_lr_hook(self, lr_config):
        if lr_config is None:
            return
        elif isinstance(lr_config, dict):
            assert 'policy' in lr_config
            policy_type = lr_config.pop('policy')
            # If the type of policy is all in lower case, e.g., 'cyclic',
            # then its first letter will be capitalized, e.g., to be 'Cyclic'.
            # This is for the convenient usage of Lr updater.
            # Since this is not applicable for `
            # CosineAnnealingLrUpdater`,
            # the string will not be changed if it contains capital letters.
            if policy_type == policy_type.lower():
                policy_type = policy_type.title()
            hook_type = policy_type + 'LrUpdaterHook'
            lr_config['type'] = hook_type
            hook = mmcv.build_from_cfg(lr_config, HOOKS)
        else:
            hook = lr_config
        self.register_hook(hook, priority='VERY_HIGH')

    def register_momentum_hook(self, momentum_config):
        if momentum_config is None:
            return
        if isinstance(momentum_config, dict):
            assert 'policy' in momentum_config
            policy_type = momentum_config.pop('policy')
            # If the type of policy is all in lower case, e.g., 'cyclic',
            # then its first letter will be capitalized, e.g., to be 'Cyclic'.
            # This is for the convenient usage of momentum updater.
            # Since this is not applicable for
            # `CosineAnnealingMomentumUpdater`,
            # the string will not be changed if it contains capital letters.
            if policy_type == policy_type.lower():
                policy_type = policy_type.title()
            hook_type = policy_type + 'MomentumUpdaterHook'
            momentum_config['type'] = hook_type
            hook = mmcv.build_from_cfg(momentum_config, HOOKS)
        else:
            hook = momentum_config
        self.register_hook(hook, priority='HIGH')

    def register_optimizer_hook(self, optimizer_config):
        if optimizer_config is None:
            return
        if isinstance(optimizer_config, dict):
            optimizer_config.setdefault('type', 'OptimizerHook')
            hook = mmcv.build_from_cfg(optimizer_config, HOOKS)
        else:
            hook = optimizer_config
        self.register_hook(hook, priority='ABOVE_NORMAL')

    def register_checkpoint_hook(self, checkpoint_config):
        if checkpoint_config is None:
            return
        if isinstance(checkpoint_config, dict):
            checkpoint_config.setdefault('type', 'CheckpointHook')
            hook = mmcv.build_from_cfg(checkpoint_config, HOOKS)
        else:
            hook = checkpoint_config
        self.register_hook(hook, priority='NORMAL')

    def register_logger_hooks(self, log_config):
        if log_config is None:
            return
        log_interval = log_config['interval']
        for info in log_config['hooks']:
            logger_hook = mmcv.build_from_cfg(
                info, HOOKS, default_args=dict(interval=log_interval))
            self.register_hook(logger_hook, priority='VERY_LOW')

    def register_timer_hook(self, timer_config):
        if timer_config is None:
            return
        if isinstance(timer_config, dict):
            timer_config_ = copy.deepcopy(timer_config)
            hook = mmcv.build_from_cfg(timer_config_, HOOKS)
        else:
            hook = timer_config
        self.register_hook(hook, priority='LOW')

    def register_custom_hooks(self, custom_config):
        if custom_config is None:
            return

        if not isinstance(custom_config, list):
            custom_config = [custom_config]

        for item in custom_config:
            if isinstance(item, dict):
                self.register_hook_from_cfg(item)
            else:
                self.register_hook(item, priority='NORMAL')

    def register_profiler_hook(self, profiler_config):
        if profiler_config is None:
            return
        if isinstance(profiler_config, dict):
            profiler_config.setdefault('type', 'ProfilerHook')
            hook = mmcv.build_from_cfg(profiler_config, HOOKS)
        else:
            hook = profiler_config
        self.register_hook(hook)

    def register_training_hooks(self,
                                lr_config,
                                optimizer_config=None,
                                checkpoint_config=None,
                                log_config=None,
                                momentum_config=None,
                                timer_config=dict(type='IterTimerHook'),
                                custom_hooks_config=None):
        """Register default and custom hooks for training.

        Default and custom hooks include:

        +----------------------+-------------------------+
        | Hooks                | Priority                |
        +======================+=========================+
        | LrUpdaterHook        | VERY_HIGH (10)          |
        +----------------------+-------------------------+
        | MomentumUpdaterHook  | HIGH (30)               |
        +----------------------+-------------------------+
        | OptimizerStepperHook | ABOVE_NORMAL (40)       |
        +----------------------+-------------------------+
        | CheckpointSaverHook  | NORMAL (50)             |
        +----------------------+-------------------------+
        | IterTimerHook        | LOW (70)                |
        +----------------------+-------------------------+
        | LoggerHook(s)        | VERY_LOW (90)           |
        +----------------------+-------------------------+
        | CustomHook(s)        | defaults to NORMAL (50) |
        +----------------------+-------------------------+

        If custom hooks have same priority with default hooks, custom hooks
        will be triggered after default hooks.
        """
        self.register_lr_hook(lr_config)
        self.register_momentum_hook(momentum_config)
        self.register_optimizer_hook(optimizer_config)
        self.register_checkpoint_hook(checkpoint_config)
        self.register_timer_hook(timer_config)
        self.register_logger_hooks(log_config)
        self.register_custom_hooks(custom_hooks_config)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/builder.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import copy

from ..utils import Registry

RUNNERS = Registry('runner')
RUNNER_BUILDERS = Registry('runner builder')


def build_runner_constructor(cfg):
    return RUNNER_BUILDERS.build(cfg)


def build_runner(cfg, default_args=None):
    runner_cfg = copy.deepcopy(cfg)
    constructor_type = runner_cfg.pop('constructor',
                                      'DefaultRunnerConstructor')
    runner_constructor = build_runner_constructor(
        dict(
            type=constructor_type,
            runner_cfg=runner_cfg,
            default_args=default_args))
    runner = runner_constructor()
    return runner


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/checkpoint.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import io
import os
import os.path as osp
import pkgutil
import re
import time
import warnings
from collections import OrderedDict
from importlib import import_module
from tempfile import TemporaryDirectory
from glob import glob
import torch
import torchvision
from torch.optim import Optimizer

import mmcv
from ..fileio import FileClient
from ..fileio import load as load_file
from ..parallel import is_module_wrapper
from ..utils import load_url, mkdir_or_exist, print_log
from .dist_utils import get_dist_info

ENV_MMCV_HOME = 'MMCV_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache'


def _get_mmcv_home():
    mmcv_home = os.path.expanduser(
        os.getenv(
            ENV_MMCV_HOME,
            os.path.join(
                os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv')))

    mkdir_or_exist(mmcv_home)
    return mmcv_home


def load_state_dict(module, state_dict, strict=False, logger=None):
    """Load state_dict to a module.

    This method is modified from :meth:`torch.nn.Module.load_state_dict`.
    Default value for ``strict`` is set to ``False`` and the message for
    param mismatch will be shown even if strict is False.

    Args:
        module (Module): Module that receives the state_dict.
        state_dict (OrderedDict): Weights.
        strict (bool): whether to strictly enforce that the keys
            in :attr:`state_dict` match the keys returned by this module's
            :meth:`~torch.nn.Module.state_dict` function. Default: ``False``.
        logger (:obj:`logging.Logger`, optional): Logger to log the error
            message. If not specified, print function will be used.
    """
    unexpected_keys = []
    all_missing_keys = []
    err_msg = []
    if hasattr(module, 'train'):
        metadata = getattr(state_dict, '_metadata', None)
        state_dict = state_dict.copy()
        if metadata is not None:
            state_dict._metadata = metadata
    else:
        for name in state_dict.keys():
            metadata = getattr(state_dict[name], '_metadata', None)
            state_dict[name] = state_dict[name].copy()
            if metadata is not None:
                state_dict[name]._metadata = metadata


    # use _load_from_state_dict to enable checkpoint version control
    def load(module, prefix=''):
        # recursively check parallel module in case that the model has a
        # complicated structure, e.g., nn.Module(nn.Module(DDP))
        if not hasattr(module, '_load_from_state_dict'):
            for name, m in module.model.items():
                if is_module_wrapper(m):
                    m = m.module
                local_metadata = {} if metadata is None else metadata.get(
                    prefix[:-1], {})
                m._load_from_state_dict(state_dict[name], prefix, local_metadata, True,
                                             all_missing_keys, unexpected_keys,
                                             err_msg)
                for name, child in m._modules.items():
                    if child is not None:
                        load(child, prefix + name + '.')
        else:
            if is_module_wrapper(module):
                module = module.module
            local_metadata = {} if metadata is None else metadata.get(
                prefix[:-1], {})
            module._load_from_state_dict(state_dict, prefix, local_metadata, True,
                                         all_missing_keys, unexpected_keys,
                                         err_msg)
            for name, child in module._modules.items():
                if child is not None:
                    load(child, prefix + name + '.')

    load(module)
    load = None  # break load->load reference cycle

    # ignore "num_batches_tracked" of BN layers
    missing_keys = [
        key for key in all_missing_keys if 'num_batches_tracked' not in key
    ]

    if unexpected_keys:
        err_msg.append('unexpected key in source '
                       f'state_dict: {", ".join(unexpected_keys)}\n')
    if missing_keys:
        err_msg.append(
            f'missing keys in source state_dict: {", ".join(missing_keys)}\n')

    rank, _ = get_dist_info()
    if len(err_msg) > 0 and rank == 0:
        err_msg.insert(
            0, 'The model and loaded state dict do not match exactly\n')
        err_msg = '\n'.join(err_msg)
        if strict:
            raise RuntimeError(err_msg)
        elif logger is not None:
            logger.warning(err_msg)
        else:
            print(err_msg)


def get_torchvision_models():
    model_urls = dict()
    for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__):
        if ispkg:
            continue
        _zoo = import_module(f'torchvision.models.{name}')
        if hasattr(_zoo, 'model_urls'):
            _urls = getattr(_zoo, 'model_urls')
            model_urls.update(_urls)
    return model_urls


def get_external_models():
    mmcv_home = _get_mmcv_home()
    default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json')
    default_urls = load_file(default_json_path)
    assert isinstance(default_urls, dict)
    external_json_path = osp.join(mmcv_home, 'open_mmlab.json')
    if osp.exists(external_json_path):
        external_urls = load_file(external_json_path)
        assert isinstance(external_urls, dict)
        default_urls.update(external_urls)

    return default_urls


def get_mmcls_models():
    mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json')
    mmcls_urls = load_file(mmcls_json_path)

    return mmcls_urls


def get_deprecated_model_names():
    deprecate_json_path = osp.join(mmcv.__path__[0],
                                   'model_zoo/deprecated.json')
    deprecate_urls = load_file(deprecate_json_path)
    assert isinstance(deprecate_urls, dict)

    return deprecate_urls


def _process_mmcls_checkpoint(checkpoint):
    if 'state_dict' in checkpoint:
        state_dict = checkpoint['state_dict']
    else:
        # Some checkpoints converted from 3rd-party repo don't
        # have the "state_dict" key.
        state_dict = checkpoint
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if k.startswith('backbone.'):
            new_state_dict[k[9:]] = v
    new_checkpoint = dict(state_dict=new_state_dict)

    return new_checkpoint


class CheckpointLoader:
    """A general checkpoint loader to manage all schemes."""

    _schemes = {}

    @classmethod
    def _register_scheme(cls, prefixes, loader, force=False):
        if isinstance(prefixes, str):
            prefixes = [prefixes]
        else:
            assert isinstance(prefixes, (list, tuple))
        for prefix in prefixes:
            if (prefix not in cls._schemes) or force:
                cls._schemes[prefix] = loader
            else:
                raise KeyError(
                    f'{prefix} is already registered as a loader backend, '
                    'add "force=True" if you want to override it')
        # sort, longer prefixes take priority
        cls._schemes = OrderedDict(
            sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True))

    @classmethod
    def register_scheme(cls, prefixes, loader=None, force=False):
        """Register a loader to CheckpointLoader.

        This method can be used as a normal class method or a decorator.

        Args:
            prefixes (str or list[str] or tuple[str]):
            The prefix of the registered loader.
            loader (function, optional): The loader function to be registered.
                When this method is used as a decorator, loader is None.
                Defaults to None.
            force (bool, optional): Whether to override the loader
                if the prefix has already been registered. Defaults to False.
        """

        if loader is not None:
            cls._register_scheme(prefixes, loader, force=force)
            return

        def _register(loader_cls):
            cls._register_scheme(prefixes, loader_cls, force=force)
            return loader_cls

        return _register

    @classmethod
    def _get_checkpoint_loader(cls, path):
        """Finds a loader that supports the given path. Falls back to the local
        loader if no other loader is found.

        Args:
            path (str): checkpoint path

        Returns:
            callable: checkpoint loader
        """
        for p in cls._schemes:
            # use regular match to handle some cases that where the prefix of
            # loader has a prefix. For example, both 's3://path' and
            # 'open-mmlab:s3://path' should return `load_from_ceph`
            if re.match(p, path) is not None:
                return cls._schemes[p]

    @classmethod
    def load_checkpoint(cls, filename, map_location=None, logger=None):
        """load checkpoint through URL scheme path.

        Args:
            filename (str): checkpoint file name with given prefix
            map_location (str, optional): Same as :func:`torch.load`.
                Default: None
            logger (:mod:`logging.Logger`, optional): The logger for message.
                Default: None

        Returns:
            dict or OrderedDict: The loaded checkpoint.
        """

        checkpoint_loader = cls._get_checkpoint_loader(filename)
        class_name = checkpoint_loader.__name__
        print_log(
            f'load checkpoint from {class_name[10:]} path: {filename}', logger=logger)
        return checkpoint_loader(filename, map_location)


@CheckpointLoader.register_scheme(prefixes='')
def load_from_local(filename, map_location):
    """load checkpoint by local file path.

    Args:
        filename (str): local checkpoint file path
        map_location (str, optional): Same as :func:`torch.load`.

    Returns:
        dict or OrderedDict: The loaded checkpoint.
    """
    filename = osp.expanduser(filename)
    if not osp.isfile(filename):
        raise FileNotFoundError(f'{filename} can not be found.')
    checkpoint = torch.load(filename, map_location=map_location)
    return checkpoint


@CheckpointLoader.register_scheme(prefixes=('http://', 'https://'))
def load_from_http(filename, map_location=None, model_dir=None):
    """load checkpoint through HTTP or HTTPS scheme path. In distributed
    setting, this function only download checkpoint at local rank 0.

    Args:
        filename (str): checkpoint file path with modelzoo or
            torchvision prefix
        map_location (str, optional): Same as :func:`torch.load`.
        model_dir (string, optional): directory in which to save the object,
            Default: None

    Returns:
        dict or OrderedDict: The loaded checkpoint.
    """
    rank, world_size = get_dist_info()
    if rank == 0:
        checkpoint = load_url(
            filename, model_dir=model_dir, map_location=map_location)
    if world_size > 1:
        torch.distributed.barrier()
        if rank > 0:
            checkpoint = load_url(
                filename, model_dir=model_dir, map_location=map_location)
    return checkpoint


@CheckpointLoader.register_scheme(prefixes='pavi://')
def load_from_pavi(filename, map_location=None):
    """load checkpoint through the file path prefixed with pavi. In distributed
    setting, this function download ckpt at all ranks to different temporary
    directories.

    Args:
        filename (str): checkpoint file path with pavi prefix
        map_location (str, optional): Same as :func:`torch.load`.
          Default: None

    Returns:
        dict or OrderedDict: The loaded checkpoint.
    """
    assert filename.startswith('pavi://'), \
        f'Expected filename startswith `pavi://`, but get {filename}'
    model_path = filename[7:]

    try:
        from pavi import modelcloud
    except ImportError:
        raise ImportError(
            'Please install pavi to load checkpoint from modelcloud.')

    model = modelcloud.get(model_path)
    with TemporaryDirectory() as tmp_dir:
        downloaded_file = osp.join(tmp_dir, model.name)
        model.download(downloaded_file)
        checkpoint = torch.load(downloaded_file, map_location=map_location)
    return checkpoint


@CheckpointLoader.register_scheme(prefixes=r'(\S+\:)?s3://')
def load_from_ceph(filename, map_location=None, backend='petrel'):
    """load checkpoint through the file path prefixed with s3.  In distributed
    setting, this function download ckpt at all ranks to different temporary
    directories.

    Note:
        Since v1.4.1, the registered scheme prefixes have been enhanced to
        support bucket names in the path prefix, e.g. 's3://xx.xx/xx.path',
        'bucket1:s3://xx.xx/xx.path'.

    Args:
        filename (str): checkpoint file path with s3 prefix
        map_location (str, optional): Same as :func:`torch.load`.
        backend (str, optional): The storage backend type. Options are 'ceph',
            'petrel'. Default: 'petrel'.

    .. warning::
        :class:`mmcv.fileio.file_client.CephBackend` will be deprecated,
        please use :class:`mmcv.fileio.file_client.PetrelBackend` instead.

    Returns:
        dict or OrderedDict: The loaded checkpoint.
    """
    allowed_backends = ['ceph', 'petrel']
    if backend not in allowed_backends:
        raise ValueError(f'Load from Backend {backend} is not supported.')

    if backend == 'ceph':
        warnings.warn(
            'CephBackend will be deprecated, please use PetrelBackend instead',
            DeprecationWarning)

    # CephClient and PetrelBackend have the same prefix 's3://' and the latter
    # will be chosen as default. If PetrelBackend can not be instantiated
    # successfully, the CephClient will be chosen.
    try:
        file_client = FileClient(backend=backend)
    except ImportError:
        allowed_backends.remove(backend)
        file_client = FileClient(backend=allowed_backends[0])

    with io.BytesIO(file_client.get(filename)) as buffer:
        checkpoint = torch.load(buffer, map_location=map_location)
    return checkpoint


@CheckpointLoader.register_scheme(prefixes=('modelzoo://', 'torchvision://'))
def load_from_torchvision(filename, map_location=None):
    """load checkpoint through the file path prefixed with modelzoo or
    torchvision.

    Args:
        filename (str): checkpoint file path with modelzoo or
            torchvision prefix
        map_location (str, optional): Same as :func:`torch.load`.

    Returns:
        dict or OrderedDict: The loaded checkpoint.
    """
    model_urls = get_torchvision_models()
    if filename.startswith('modelzoo://'):
        warnings.warn(
            'The URL scheme of "modelzoo://" is deprecated, please '
            'use "torchvision://" instead', DeprecationWarning)
        model_name = filename[11:]
    else:
        model_name = filename[14:]
    return load_from_http(model_urls[model_name], map_location=map_location)


@CheckpointLoader.register_scheme(prefixes=('open-mmlab://', 'openmmlab://'))
def load_from_openmmlab(filename, map_location=None):
    """load checkpoint through the file path prefixed with open-mmlab or
    openmmlab.

    Args:
        filename (str): checkpoint file path with open-mmlab or
        openmmlab prefix
        map_location (str, optional): Same as :func:`torch.load`.
          Default: None

    Returns:
        dict or OrderedDict: The loaded checkpoint.
    """

    model_urls = get_external_models()
    prefix_str = 'open-mmlab://'
    if filename.startswith(prefix_str):
        model_name = filename[13:]
    else:
        model_name = filename[12:]
        prefix_str = 'openmmlab://'

    deprecated_urls = get_deprecated_model_names()
    if model_name in deprecated_urls:
        warnings.warn(
            f'{prefix_str}{model_name} is deprecated in favor '
            f'of {prefix_str}{deprecated_urls[model_name]}',
            DeprecationWarning)
        model_name = deprecated_urls[model_name]
    model_url = model_urls[model_name]
    # check if is url
    if model_url.startswith(('http://', 'https://')):
        checkpoint = load_from_http(model_url, map_location=map_location)
    else:
        filename = osp.join(_get_mmcv_home(), model_url)
        if not osp.isfile(filename):
            raise FileNotFoundError(f'{filename} can not be found.')
        checkpoint = torch.load(filename, map_location=map_location)
    return checkpoint


@CheckpointLoader.register_scheme(prefixes='mmcls://')
def load_from_mmcls(filename, map_location=None):
    """load checkpoint through the file path prefixed with mmcls.

    Args:
        filename (str): checkpoint file path with mmcls prefix
        map_location (str, optional): Same as :func:`torch.load`.

    Returns:
        dict or OrderedDict: The loaded checkpoint.
    """

    model_urls = get_mmcls_models()
    model_name = filename[8:]
    checkpoint = load_from_http(
        model_urls[model_name], map_location=map_location)
    checkpoint = _process_mmcls_checkpoint(checkpoint)
    return checkpoint


def _load_checkpoint(filename, map_location=None, logger=None):
    """Load checkpoint from somewhere (modelzoo, file, url).

    Args:
        filename (str): Accept local filepath, URL, ``torchvision://xxx``,
            ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
            details.
        map_location (str, optional): Same as :func:`torch.load`.
           Default: None.
        logger (:mod:`logging.Logger`, optional): The logger for error message.
           Default: None

    Returns:
        dict or OrderedDict: The loaded checkpoint. It can be either an
           OrderedDict storing model weights or a dict containing other
           information, which depends on the checkpoint.
    """
    return CheckpointLoader.load_checkpoint(filename, map_location, logger)


def _load_checkpoint_with_prefix(prefix, filename, map_location=None):
    """Load partial pretrained model with specific prefix.

    Args:
        prefix (str): The prefix of sub-module.
        filename (str): Accept local filepath, URL, ``torchvision://xxx``,
            ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
            details.
        map_location (str | None): Same as :func:`torch.load`. Default: None.

    Returns:
        dict or OrderedDict: The loaded checkpoint.
    """

    checkpoint = _load_checkpoint(filename, map_location=map_location)

    if 'state_dict' in checkpoint:
        state_dict = checkpoint['state_dict']
    else:
        state_dict = checkpoint
    if not prefix.endswith('.'):
        prefix += '.'
    prefix_len = len(prefix)

    state_dict = {
        k[prefix_len:]: v
        for k, v in state_dict.items() if k.startswith(prefix)
    }

    assert state_dict, f'{prefix} is not in the pretrained model'
    return state_dict


def get_checkpoint_dir(OUT_DIR):
    """Retrieves the location for storing checkpoints."""
    return os.path.join(OUT_DIR)


def get_last_checkpoint(OUT_DIR, NAME_PREFIX="amp_model_best", logger=None):
    """Retrieves the most recent checkpoint (highest epoch number)."""
    checkpoint_dir = get_checkpoint_dir(OUT_DIR)
    checkpoints = glob(checkpoint_dir + f"/{NAME_PREFIX}*")
    if len(checkpoints) > 0:
        last_checkpoint_name = sorted(checkpoints)[-1]
        last_checkpoint = os.path.join(checkpoint_dir, last_checkpoint_name)
        print_log(f"loading last_checkpoint file: {last_checkpoint}", logger=logger)
        return last_checkpoint
    else:
        return None


def get_best_k_model(OUT_DIR, indicator, _NAME_PREFIX="ckpt_ep_"):
    best_k_models = []
    best_fname = []
    if os.path.isfile(OUT_DIR):
        with open(OUT_DIR, 'r') as f:
            # TODO: 通常checkpoint不会很大，如果太大open方法不合适，因为我们需要的是最后几行，open是从头遍历的
            stats2user = [line.strip('\n') for line in f.readlines()]

        for line in stats2user:
            metric = {}
            line = line.split(',')
            for v in line[1:]:
                v = re.sub(r"[{}'' ]", "", v)
                k, v = v.split(':')
                metric[str(k)] = v
            # fname, v = line.split('-')
            epoch = line[0].replace('.pth.tar', '')
            epoch = epoch.replace('model_best_', '')
            # best_k_models[str(epoch)] = float(v)

            # 第一个line[0]用于save_top_k触发时, 删除多余的模型
            # 第二个用于加载best模型, 因为best_k_models是个[[]],索引不方便
            best_k_models.append([epoch, metric, line[0]])
            best_fname.append(line[0])
            # best_k_models['epoch'].append(epoch)
            # best_k_models[indicator].append(float(v))

    if len(best_k_models) == 0:
        msg = f"checkpoint in directory {OUT_DIR} don't exist or is empty"
        warnings.warn(msg)

    return best_k_models, best_fname


def load_checkpoint(resume_mode,
                    work_dir,
                    model,
                    filename,
                    map_location=None,
                    strict=False,
                    logger=None,
                    revise_keys=[(r'^module\.', '')]):
    """Load checkpoint from a file or URI.

    Args:
        model (Module): Module to load checkpoint.
        filename (str): Accept local filepath, URL, ``torchvision://xxx``,
            ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
            details.
        map_location (str): Same as :func:`torch.load`.
        strict (bool): Whether to allow different params for the model and
            checkpoint.
        logger (:mod:`logging.Logger` or None): The logger for error message.
        revise_keys (list): A list of customized keywords to modify the
            state_dict in checkpoint. Each item is a (pattern, replacement)
            pair of the regular expression operations. Default: strip
            the prefix 'module.' by [(r'^module\\.', '')].

    Returns:
        dict or OrderedDict: The loaded checkpoint.
    """
    ################
    # 只从work_dir里读ckpt，用于模型的继续训练
    resume_mode = resume_mode.lower()
    if resume_mode == 'best':
        _, best_k_fname = get_best_k_model(os.path.join(work_dir, "checkpoint"), None)
        if len(best_k_fname) > 0:
            best_k_model = sorted(best_k_fname)[-1]
            filename = os.path.join(work_dir, best_k_model)
        else:
            print_log("loading best model failed, maybe it's from scratch currently.", logger=logger)

    elif resume_mode == 'auto':
        ckpt = get_last_checkpoint(work_dir)
        if ckpt is not None:
            filename = ckpt
    ################
    if not os.path.isfile(filename):
        print_log(f"no checkpoint found at {filename}", logger=logger)
        return {'meta': {'epoch': 1,
                         'iter': 1,
                         'best_epoch': 1,
                         'best_metric': None}}
    ################

    checkpoint = _load_checkpoint(filename, map_location, logger)
    # OrderedDict is a subclass of dict
    if not isinstance(checkpoint, dict):
        raise RuntimeError(
            f'No state_dict found in checkpoint file {filename}')

    if 'meta' not in checkpoint.keys():
        checkpoint['meta'] = {}
    if hasattr(model, 'train'):
        mod = {'model': model}
        checkpoint = {'model': checkpoint}
    else:
        mod = model.model
    if isinstance(mod, dict):
        for name, m in mod.items():
            if 'state_dict' in checkpoint[name]:
                state_dict = checkpoint[name]['state_dict']
            else:
                state_dict = checkpoint[name]

            # strip prefix of state_dict
            metadata = getattr(state_dict, '_metadata', OrderedDict())
            for p, r in revise_keys:
                state_dict = OrderedDict(
                    {re.sub(p, r, k): v
                     for k, v in state_dict.items()})
            # Keep metadata in state_dict
            state_dict._metadata = metadata
            load_state_dict(m, state_dict, strict, logger)
    else:
        if 'state_dict' in checkpoint:
            state_dict = checkpoint['state_dict']
        else:
            state_dict = checkpoint

        # strip prefix of state_dict
        metadata = getattr(state_dict, '_metadata', OrderedDict())
        for p, r in revise_keys:
            state_dict = OrderedDict(
                {re.sub(p, r, k): v
                 for k, v in state_dict.items()})
        # Keep metadata in state_dict
        state_dict._metadata = metadata
        load_state_dict(mod, state_dict, strict, logger)
        # if optimizer is not None:
        #     if checkpoint.get('optimizer') is not None:
        #         optimizer.load_state_dict(checkpoint['optimizer'])
        #
        #     if lr > 0 and reset_lr:
        #         for param_group in optimizer.param_groups:
        #             param_group['lr'] = lr
        #     print_log("loaded checkpoint.optimizer")

        # load state_dict

        checkpoint['meta'].setdefault('epoch', 1)
        checkpoint['meta'].setdefault('iter', 1)
        checkpoint['meta'].setdefault('best_epoch', 1)
        checkpoint['meta'].setdefault('best_metric', None)

    return checkpoint


def weights_to_cpu(state_dict):
    """Copy a model state_dict to cpu.

    Args:
        state_dict (OrderedDict): Model weights on GPU.

    Returns:
        OrderedDict: Model weights on GPU.
    """
    state_dict_cpu = OrderedDict()
    for key, val in state_dict.items():
        state_dict_cpu[key] = val.cpu()
    # Keep metadata in state_dict
    state_dict_cpu._metadata = getattr(state_dict, '_metadata', OrderedDict())
    return state_dict_cpu


def _save_to_state_dict(module, destination, prefix, keep_vars):
    """Saves module state to `destination` dictionary.

    This method is modified from :meth:`torch.nn.Module._save_to_state_dict`.

    Args:
        module (nn.Module): The module to generate state_dict.
        destination (dict): A dict where state will be stored.
        prefix (str): The prefix for parameters and buffers used in this
            module.
    """
    for name, param in module._parameters.items():
        if param is not None:
            destination[prefix + name] = param if keep_vars else param.detach()
    for name, buf in module._buffers.items():
        # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d
        if buf is not None:
            destination[prefix + name] = buf if keep_vars else buf.detach()


def get_state_dict(module, destination=None, prefix='', keep_vars=False):
    """Returns a dictionary containing a whole state of the module.

    Both parameters and persistent buffers (e.g. running averages) are
    included. Keys are corresponding parameter and buffer names.

    This method is modified from :meth:`torch.nn.Module.state_dict` to
    recursively check parallel module in case that the model has a complicated
    structure, e.g., nn.Module(nn.Module(DDP)).

    Args:
        module (nn.Module): The module to generate state_dict.
        destination (OrderedDict): Returned dict for the state of the
            module.
        prefix (str): Prefix of the key.
        keep_vars (bool): Whether to keep the variable property of the
            parameters. Default: False.

    Returns:
        dict: A dictionary containing a whole state of the module.
    """
    # recursively check parallel module in case that the model has a
    # complicated structure, e.g., nn.Module(nn.Module(DDP))
    if is_module_wrapper(module):
        module = module.module

    # below is the same as torch.nn.Module.state_dict()
    if destination is None:
        destination = OrderedDict()
        destination._metadata = OrderedDict()
    destination._metadata[prefix[:-1]] = local_metadata = dict(
        version=module._version)
    _save_to_state_dict(module, destination, prefix, keep_vars)
    for name, child in module._modules.items():
        if child is not None:
            get_state_dict(
                child, destination, prefix + name + '.', keep_vars=keep_vars)
    for hook in module._state_dict_hooks.values():
        hook_result = hook(module, destination, prefix, local_metadata)
        if hook_result is not None:
            destination = hook_result
    return destination


def save_checkpoint(#model,
                    filename,
                    #optimizer=None,
                    meta=None,
                    file_client_args=None):
    """Save checkpoint to file.

    The checkpoint will have 3 fields: ``meta``, ``state_dict`` and
    ``optimizer``. By default ``meta`` will contain version and time info.

    Args:
        model (Module): Module whose params are to be saved.
        filename (str): Checkpoint filename.
        optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
        meta (dict, optional): Metadata to be saved in checkpoint.
        file_client_args (dict, optional): Arguments to instantiate a
            FileClient. See :class:`mmcv.fileio.FileClient` for details.
            Default: None.
            `New in version 1.3.16.`
    """
    if meta is None:
        meta = {}
    elif not isinstance(meta, dict):
        raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
    # meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
    if 'model' not in meta:
        checkpoint = {}
        for name,  sub_meta in meta.items():

            model = sub_meta.pop('model')
            optimizer = sub_meta.pop('optimizer')
            if is_module_wrapper(model):
                model = model.module

            if hasattr(model, 'CLASSES') and model.CLASSES is not None:
                # save class name to the meta
                sub_meta.update(CLASSES=model.CLASSES)
            checkpoint[name] = {
                'meta': sub_meta,
                'state_dict': weights_to_cpu(get_state_dict(model))
            }

            # save optimizer state dict in the checkpoint
            if isinstance(optimizer, Optimizer):
                checkpoint[name]['optimizer'] = optimizer.state_dict()

            file_client = FileClient.infer_client(file_client_args, filename)
            with io.BytesIO() as f:
                torch.save(checkpoint, f)
                file_client.put(f.getvalue(), filename)

    else:
        model = meta.pop('model')
        optimizer = meta.pop('optimizer')
        if is_module_wrapper(model):
            model = model.module

        if hasattr(model, 'CLASSES') and model.CLASSES is not None:
            # save class name to the meta
            meta.update(CLASSES=model.CLASSES)

        checkpoint = {
            'meta': meta,
            'state_dict': weights_to_cpu(get_state_dict(model.model))
        }
        # save optimizer state dict in the checkpoint
        if isinstance(optimizer, Optimizer):
            checkpoint['optimizer'] = optimizer.state_dict()
        elif isinstance(optimizer, dict):
            checkpoint['optimizer'] = {}
            for name, optim in optimizer.items():
                checkpoint['optimizer'][name] = optim.state_dict()

        if filename.startswith('pavi://'):
            if file_client_args is not None:
                raise ValueError(
                    'file_client_args should be "None" if filename starts with'
                    f'"pavi://", but got {file_client_args}')
            try:
                from pavi import exception, modelcloud
            except ImportError:
                raise ImportError(
                    'Please install pavi to load checkpoint from modelcloud.')
            model_path = filename[7:]
            root = modelcloud.Folder()
            model_dir, model_name = osp.split(model_path)
            try:
                model = modelcloud.get(model_dir)
            except exception.NodeNotFoundError:
                model = root.create_training_model(model_dir)
            with TemporaryDirectory() as tmp_dir:
                checkpoint_file = osp.join(tmp_dir, model_name)
                with open(checkpoint_file, 'wb') as f:
                    torch.save(checkpoint, f)
                    f.flush()
                model.create_file(checkpoint_file, name=model_name)
        else:
            file_client = FileClient.infer_client(file_client_args, filename)
            with io.BytesIO() as f:
                torch.save(checkpoint, f)
                file_client.put(f.getvalue(), filename)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/default_constructor.py
================================================
from .builder import RUNNER_BUILDERS, RUNNERS


@RUNNER_BUILDERS.register_module()
class DefaultRunnerConstructor:
    """Default constructor for runners.

    Custom existing `Runner` like `EpocBasedRunner` though `RunnerConstructor`.
    For example, We can inject some new properties and functions for `Runner`.

    Example:
        >>> from mmcv.runner import RUNNER_BUILDERS, build_runner
        >>> # Define a new RunnerReconstructor
        >>> @RUNNER_BUILDERS.register_module()
        >>> class MyRunnerConstructor:
        ...     def __init__(self, runner_cfg, default_args=None):
        ...         if not isinstance(runner_cfg, dict):
        ...             raise TypeError('runner_cfg should be a dict',
        ...                             f'but got {type(runner_cfg)}')
        ...         self.runner_cfg = runner_cfg
        ...         self.default_args = default_args
        ...
        ...     def __call__(self):
        ...         runner = RUNNERS.build(self.runner_cfg,
        ...                                default_args=self.default_args)
        ...         # Add new properties for existing runner
        ...         runner.my_name = 'my_runner'
        ...         runner.my_function = lambda self: print(self.my_name)
        ...         ...
        >>> # build your runner
        >>> runner_cfg = dict(type='EpochBasedRunner', max_epochs=40,
        ...                   constructor='MyRunnerConstructor')
        >>> runner = build_runner(runner_cfg)
    """

    def __init__(self, runner_cfg, default_args=None):
        if not isinstance(runner_cfg, dict):
            raise TypeError('runner_cfg should be a dict',
                            f'but got {type(runner_cfg)}')
        self.runner_cfg = runner_cfg
        self.default_args = default_args

    def __call__(self):
        return RUNNERS.build(self.runner_cfg, default_args=self.default_args)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/dist_utils.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import functools
import os
import subprocess
from collections import OrderedDict

import torch
import torch.multiprocessing as mp
from torch import distributed as dist
from torch._utils import (_flatten_dense_tensors, _take_tensors,
                          _unflatten_dense_tensors)


def init_dist(launcher, backend='nccl', **kwargs):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    if launcher == 'pytorch':
        _init_dist_pytorch(backend, **kwargs)
    elif launcher == 'mpi':
        _init_dist_mpi(backend, **kwargs)
    elif launcher == 'slurm':
        _init_dist_slurm(backend, **kwargs)
    else:
        raise ValueError(f'Invalid launcher type: {launcher}')


def _init_dist_pytorch(backend, **kwargs):
    # TODO: use local_rank instead of rank % num_gpus
    rank = int(os.environ['RANK'])
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend, **kwargs)


def _init_dist_mpi(backend, **kwargs):
    # TODO: use local_rank instead of rank % num_gpus
    rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend, **kwargs)


def _init_dist_slurm(backend, port=None):
    """Initialize slurm distributed training environment.

    If argument ``port`` is not specified, then the master port will be system
    environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
    environment variable, then a default port ``29500`` will be used.

    Args:
        backend (str): Backend of torch.distributed.
        port (int, optional): Master port. Defaults to None.
    """
    proc_id = int(os.environ['SLURM_PROCID'])
    ntasks = int(os.environ['SLURM_NTASKS'])
    node_list = os.environ['SLURM_NODELIST']
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(proc_id % num_gpus)
    addr = subprocess.getoutput(
        f'scontrol show hostname {node_list} | head -n1')
    # specify master port
    if port is not None:
        os.environ['MASTER_PORT'] = str(port)
    elif 'MASTER_PORT' in os.environ:
        pass  # use MASTER_PORT in the environment variable
    else:
        # 29500 is torch.distributed default port
        os.environ['MASTER_PORT'] = '29500'
    # use MASTER_ADDR in the environment variable if it already exists
    if 'MASTER_ADDR' not in os.environ:
        os.environ['MASTER_ADDR'] = addr
    os.environ['WORLD_SIZE'] = str(ntasks)
    os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
    os.environ['RANK'] = str(proc_id)
    dist.init_process_group(backend=backend)


def get_dist_info():
    if dist.is_available() and dist.is_initialized():
        rank = dist.get_rank()
        world_size = dist.get_world_size()
    else:
        rank = 0
        world_size = 1
    return rank, world_size


def master_only(func):

    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        rank, _ = get_dist_info()
        if rank == 0:
            return func(*args, **kwargs)

    return wrapper


def allreduce_params(params, coalesce=True, bucket_size_mb=-1):
    """Allreduce parameters.

    Args:
        params (list[torch.Parameters]): List of parameters or buffers of a
            model.
        coalesce (bool, optional): Whether allreduce parameters as a whole.
            Defaults to True.
        bucket_size_mb (int, optional): Size of bucket, the unit is MB.
            Defaults to -1.
    """
    _, world_size = get_dist_info()
    if world_size == 1:
        return
    params = [param.data for param in params]
    if coalesce:
        _allreduce_coalesced(params, world_size, bucket_size_mb)
    else:
        for tensor in params:
            dist.all_reduce(tensor.div_(world_size))


def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
    """Allreduce gradients.

    Args:
        params (list[torch.Parameters]): List of parameters of a model
        coalesce (bool, optional): Whether allreduce parameters as a whole.
            Defaults to True.
        bucket_size_mb (int, optional): Size of bucket, the unit is MB.
            Defaults to -1.
    """
    grads = [
        param.grad.data for param in params
        if param.requires_grad and param.grad is not None
    ]
    _, world_size = get_dist_info()
    if world_size == 1:
        return
    if coalesce:
        _allreduce_coalesced(grads, world_size, bucket_size_mb)
    else:
        for tensor in grads:
            dist.all_reduce(tensor.div_(world_size))


def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
    if bucket_size_mb > 0:
        bucket_size_bytes = bucket_size_mb * 1024 * 1024
        buckets = _take_tensors(tensors, bucket_size_bytes)
    else:
        buckets = OrderedDict()
        for tensor in tensors:
            tp = tensor.type()
            if tp not in buckets:
                buckets[tp] = []
            buckets[tp].append(tensor)
        buckets = buckets.values()

    for bucket in buckets:
        flat_tensors = _flatten_dense_tensors(bucket)
        dist.all_reduce(flat_tensors)
        flat_tensors.div_(world_size)
        for tensor, synced in zip(
                bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
            tensor.copy_(synced)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/epoch_based_runner.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import platform
import shutil
import time
import warnings
import time
import datetime
import torch
import mmcv
from .base_runner import BaseRunner
from .builder import RUNNERS
from .checkpoint import save_checkpoint
from .utils import get_host_info
from mmcv.utils.logging import print_log
from .record import MetricLogger, get_grad_norm


@RUNNERS.register_module()
class EpochBasedRunner(BaseRunner):
    """Epoch-based Runner.

    This runner train models epoch by epoch.
    """

    def run_iter(self, data_batch, train_mode, **kwargs):
        if self.batch_processor is not None:
            outputs = self.batch_processor(
                self.model, data_batch, train_mode=train_mode, **kwargs)
        elif train_mode:
            outputs = self.model.train_step(data_batch, self.optimizer,
                                        **kwargs)

            # if not isinstance(self.model, dict):
            #     outputs = self.model.train_step(data_batch, self.optimizer,
            #                                     **kwargs)
            # else:
            #     outputs = {}
            #     for name in self.model.keys():
            #         outputs.update(self.model[name].train_step(data_batch, self.optimizer,
            #                                         **kwargs))

        else:
            outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
        if not isinstance(outputs, dict):
            raise TypeError('"batch_processor()" or "model.train_step()"'
                            'and "model.val_step()" must return a dict')
        if 'log_vars' in outputs:
            self.log_buffer.update_dict(outputs['log_vars'])
        # {'loss': loss, 'log_vars': {'loss': loss, 'metric_1': ..., 'metric_2': ....} }
        self.outputs = outputs

    def train(self, data_loader, **kwargs):
        if hasattr(self.model, 'train'):
            self.model.train()
        elif isinstance(self.model.model, dict):
            for name in self.model.model.keys():
                self.model.model[name].train()
        else:
            self.model.model.train()
        # if not isinstance(self.model, dict):
        #     self.model.train()
        # else:
        #     for name in self.model.keys():
        #         self.model[name].train()

        self.mode = 'train'
        self.data_loader = data_loader
        self._max_iters = self._max_epochs * len(self.data_loader)
        self.call_hook('before_train_epoch')
        time.sleep(2)  # Prevent possible deadlock during epoch transition
        for i, data_batch in enumerate(self.data_loader):
            self._inner_iter = i
            self.call_hook('before_train_iter')
            self.run_iter(data_batch, train_mode=True, **kwargs)
            self.call_hook('after_train_iter')
            self._iter += 1

        self.metrics = {k: meter.avg for k, meter in self.log_buffer.meters.items()}
        self.call_hook('after_train_epoch')
        self._epoch += 1

    def simple_train(self, data_loader, **kwargs):
        optimizer = self.optimizer
        accumulated_step = self.opt_cfg.get('accumulated_step', 1)
        clip_max_norm = self.opt_cfg.get('clip_max_norm', 0)
        print_freq = self.opt_cfg.get('print_freq', 1)
        nni = self.opt_cfg.get('nni', None)
        self.model.train()
        self.mode = 'train'
        self.data_loader = data_loader
        self._max_iters = self._max_epochs * len(self.data_loader)
        # metric_logger = MetricLogger(delimiter="  ", dist_print=0, logger=self.logger)
        header = 'Epoch: [{}]'.format(self._epoch)
        print_freq = len(data_loader) if print_freq <= 0 else print_freq
        metric_logger = self.log_buffer
        for data_batch, idx in metric_logger.log_every(data_loader, print_freq, header):
            self._inner_iter = idx
            self.run_iter(data_batch, train_mode=True, **kwargs)
            losses = self.outputs['loss'] / accumulated_step
            losses.backward()
            if clip_max_norm > 0:
                grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), clip_max_norm)
            else:
                grad_norm = get_grad_norm(self.model.parameters())
            if idx % accumulated_step == 0:
                optimizer.step()
                optimizer.zero_grad()

            metric_logger.update(lr=optimizer.param_groups[0]["lr"])
            metric_logger.update(grad_norm=grad_norm)
            metric_logger.update_dict(self.outputs['log_vars'])
            self._iter += 1


        self.metrics = {k: meter.avg for k, meter in metric_logger.meters.items()}
        self.call_hook('after_train_epoch')
        metric_logger.clear()
        self._epoch += 1
        if nni is not None:
            nni.report_intermediate_result(
                {name: value for name, value in self.metrics.items() if self.opt_cfg.metrics in name})

    @torch.no_grad()
    def simple_val(self, data_loader, **kwargs):
        # 用IterBasedRunner是否会更统一？
        # 如果要更进一步整合，应该变成eval_hook,但这是一个simple case
        self.model.eval()
        self.mode = 'val'
        opt_cfg = self.opt_cfg
        save_fmt = opt_cfg['save_fmt']
        # metric_logger = MetricLogger(dist_print=0, delimiter="  ", logger=self.logger)
        metric_logger = self.log_buffer
        header = 'TestEpoch: [{0}]'.format(self.epoch - 1)
        save_dir = os.path.join(self.work_dir, f"{opt_cfg['dataset']}")
        if save_fmt and self._epoch == 1:
            os.makedirs(save_dir, exist_ok=True)
        for batch, idx in metric_logger.log_every(data_loader, 1, header):
            metrics = self.model.val_step(batch, save_dir,
                                          idx=idx, save_fmt=save_fmt, filename=batch.get('filename', None))
            # self.run_iter()
            metric_logger.update_dict(metrics)
        stats = {k: meter.avg for k, meter in metric_logger.meters.items()}
        if opt_cfg['mode'] == 'nni':
            self.nni.report_final_result({name: value for name, value in stats.items() if opt_cfg['metrics'] in name})
        # 仅进行验证时触发，结束while
        metric_logger.clear()
        if not self.flag:
            self._epoch += 1

    @torch.no_grad()
    def val(self, data_loader, **kwargs):
        if hasattr(self.model, 'eval'):
            self.model.eval()
        elif isinstance(self.model.model, dict):
            for name in self.model.model.keys():
                self.model.model[name].eval()
        else:
            self.model.model.eval()
        self.mode = 'val'
        self.data_loader = data_loader
        self.call_hook('before_val_epoch')
        time.sleep(2)  # Prevent possible deadlock during epoch transition
        tic = time.time()
        for i, data_batch in enumerate(self.data_loader):
            self._inner_iter = i
            self.call_hook('before_val_iter')
            self.run_iter(data_batch, train_mode=False, idx=i,
                          img_range=self.opt_cfg['img_range'],
                          save_fmt=self.opt_cfg['save_fmt'], filename=data_batch.get('filename', [None])[0], save_dir=self.save_dir)
            self.call_hook('after_val_iter')
        print("test time:", time.time() - tic)
        self.call_hook('after_val_epoch')
        if self.opt_cfg['eval']:
            self._epoch += 1

    def run(self, data_loaders, workflow, max_epochs=None, **kwargs):
        """Start running.

        Args:
            data_loaders (list[:obj:`DataLoader`]): Dataloaders for training
                and validation.
            workflow (list[tuple]): A list of (phase, epochs) to specify the
                running order and epochs. E.g, [('train', 2), ('val', 1)] means
                running 2 epochs for training and 1 epoch for validation,
                iteratively.
        """
        assert isinstance(data_loaders, dict)
        assert mmcv.is_list_of(workflow, tuple)
        assert len(data_loaders) == len(workflow), print_log(f"{len(data_loaders)} == {len(workflow)}")
        if max_epochs is not None:
            warnings.warn(
                'setting max_epochs in run is deprecated, '
                'please set max_epochs in runner_config', DeprecationWarning)
            self._max_epochs = max_epochs

        assert self._max_epochs is not None, (
            'max_epochs must be specified during instantiation')
        self.flag = any('train' in mode for mode, _ in workflow)
        self.workflow = workflow
        self.data_length = 1
        for i, flow in enumerate(workflow):
            mode, epochs = flow
            if mode == 'train':
                self._max_iters = self._max_epochs * len(data_loaders[mode])
                self.data_length = len(data_loaders[mode])
                break


        work_dir = self.work_dir if self.work_dir is not None else 'NONE'
        print_log(f'Start running, host: {get_host_info()}, work_dir: {work_dir}',
                  logger=self.logger)
        print_log(f'Hooks will be executed in the following order:\n{self.get_hook_info()}',
                  logger=self.logger)
        print_log(f'workflow: {workflow}, max: {self._max_epochs} epochs',
                  logger=self.logger)
        self.call_hook('before_run')
        tic = time.time()
        print_freq = self.opt_cfg.get('print_freq', 1)
        # from 1 to self._max_epochs, not from 0
        while self.epoch <= self._max_epochs:
            for i, flow in enumerate(workflow):
                mode, epochs = flow
                if isinstance(mode, str):  # self.train()
                    if not hasattr(self, mode):
                        raise ValueError(
                            f'runner has no method named "{mode}" to run an '
                            'epoch')
                    epoch_runner = getattr(self, mode)
                else:
                    raise TypeError(
                        'mode in workflow must be a str, but got {}'.format(
                            type(mode)))

                for epoch in range(epochs):
                    if mode == 'train' and self.epoch >= self._max_epochs:
                        break
                    epoch_runner(data_loaders[mode], **kwargs)
            if self.earlyStop:
                print_log("model train has diverged, python will stop training", logger=self.logger)
                break
        time.sleep(1)  # wait for some hooks like loggers to finish
        self.call_hook('after_run')
        total_time = time.time() - tic
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print_log('Training time {}'.format(total_time_str), logger=self.logger)

    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='epoch_{}.pth',
                        save_optimizer=True,
                        meta=None,
                        create_symlink=True):
        """Save the checkpoint.

        Args:
            out_dir (str): The directory that checkpoints are saved.
            filename_tmpl (str, optional): The checkpoint filename template,
                which contains a placeholder for the epoch number.
                Defaults to 'epoch_{}.pth'.
            save_optimizer (bool, optional): Whether to save the optimizer to
                the checkpoint. Defaults to True.
            meta (dict, optional): The meta information to be saved in the
                checkpoint. Defaults to None.
            create_symlink (bool, optional): Whether to create a symlink
                "latest.pth" to point to the latest checkpoint.
                Defaults to True.
        """
        if meta is None:
            meta = {}
        elif not isinstance(meta, dict):
            raise TypeError(
                f'meta should be a dict or None, but got {type(meta)}')
        if self.meta is not None:
            meta.update(self.meta)
            # Note: meta.update(self.meta) should be done before
            # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise
            # there will be problems with resumed checkpoints.
            # More details in https://github.com/open-mmlab/mmcv/pull/1108
        meta.update(epoch=self.epoch + 1, iter=self.iter)

        filename = filename_tmpl.format(self.epoch + 1)
        filepath = os.path.join(out_dir, filename)
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
        # in some environments, `os.symlink` is not supported, you may need to
        # set `create_symlink` to False
        if create_symlink:
            dst_file = os.path.join(out_dir, 'latest.pth')
            if platform.system() != 'Windows':
                mmcv.symlink(filename, dst_file)
            else:
                shutil.copy(filepath, dst_file)


@RUNNERS.register_module()
class Runner(EpochBasedRunner):
    """Deprecated name of EpochBasedRunner."""

    def __init__(self, *args, **kwargs):
        warnings.warn(
            'Runner was deprecated, please use EpochBasedRunner instead',
            DeprecationWarning)
        super().__init__(*args, **kwargs)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/fp16_utils.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import functools
import warnings
from collections import abc
from inspect import getfullargspec

import numpy as np
import torch
import torch.nn as nn

from mmcv.utils import TORCH_VERSION, digit_version
from .dist_utils import allreduce_grads as _allreduce_grads

try:
    # If PyTorch version >= 1.6.0, torch.cuda.amp.autocast would be imported
    # and used; otherwise, auto fp16 will adopt mmcv's implementation.
    # Note that when PyTorch >= 1.6.0, we still cast tensor types to fp16
    # manually, so the behavior may not be consistent with real amp.
    from torch.cuda.amp import autocast
except ImportError:
    pass


def cast_tensor_type(inputs, src_type, dst_type):
    """Recursively convert Tensor in inputs from src_type to dst_type.

    Note:
        In v1.4.4 and later, ``cast_tersor_type`` will only convert the
        torch.Tensor which is consistent with ``src_type`` to the ``dst_type``.
        Before v1.4.4, it ignores the ``src_type`` argument, leading to some
        potential problems. For example,
        ``cast_tensor_type(inputs, torch.float, torch.half)`` will convert all
        tensors in inputs to ``torch.half`` including those originally in
        ``torch.Int`` or other types, which is not expected.

    Args:
        inputs: Inputs that to be casted.
        src_type (torch.dtype): Source type..
        dst_type (torch.dtype): Destination type.

    Returns:
        The same type with inputs, but all contained Tensors have been cast.
    """
    if isinstance(inputs, nn.Module):
        return inputs
    elif isinstance(inputs, torch.Tensor):
        # we need to ensure that the type of inputs to be casted are the same
        # as the argument `src_type`.
        return inputs.to(dst_type) if inputs.dtype == src_type else inputs
    elif isinstance(inputs, str):
        return inputs
    elif isinstance(inputs, np.ndarray):
        return inputs
    elif isinstance(inputs, abc.Mapping):
        return type(inputs)({
            k: cast_tensor_type(v, src_type, dst_type)
            for k, v in inputs.items()
        })
    elif isinstance(inputs, abc.Iterable):
        return type(inputs)(
            cast_tensor_type(item, src_type, dst_type) for item in inputs)
    else:
        return inputs


def auto_fp16(apply_to=None, out_fp32=False):
    """Decorator to enable fp16 training automatically.

    This decorator is useful when you write custom modules and want to support
    mixed precision training. If inputs arguments are fp32 tensors, they will
    be converted to fp16 automatically. Arguments other than fp32 tensors are
    ignored. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the
    backend, otherwise, original mmcv implementation will be adopted.

    Args:
        apply_to (Iterable, optional): The argument names to be converted.
            `None` indicates all arguments.
        out_fp32 (bool): Whether to convert the output back to fp32.

    Example:

        >>> import torch.nn as nn
        >>> class MyModule1(nn.Module):
        >>>
        >>>     # Convert x and y to fp16
        >>>     @auto_fp16()
        >>>     def forward(self, x, y):
        >>>         pass

        >>> import torch.nn as nn
        >>> class MyModule2(nn.Module):
        >>>
        >>>     # convert pred to fp16
        >>>     @auto_fp16(apply_to=('pred', ))
        >>>     def do_something(self, pred, others):
        >>>         pass
    """

    def auto_fp16_wrapper(old_func):

        @functools.wraps(old_func)
        def new_func(*args, **kwargs):
            # check if the module has set the attribute `fp16_enabled`, if not,
            # just fallback to the original method.
            if not isinstance(args[0], torch.nn.Module):
                raise TypeError('@auto_fp16 can only be used to decorate the '
                                'method of nn.Module')
            if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
                return old_func(*args, **kwargs)

            # get the arg spec of the decorated method
            args_info = getfullargspec(old_func)
            # get the argument names to be casted
            args_to_cast = args_info.args if apply_to is None else apply_to
            # convert the args that need to be processed
            new_args = []
            # NOTE: default args are not taken into consideration
            if args:
                arg_names = args_info.args[:len(args)]
                for i, arg_name in enumerate(arg_names):
                    if arg_name in args_to_cast:
                        new_args.append(
                            cast_tensor_type(args[i], torch.float, torch.half))
                    else:
                        new_args.append(args[i])
            # convert the kwargs that need to be processed
            new_kwargs = {}
            if kwargs:
                for arg_name, arg_value in kwargs.items():
                    if arg_name in args_to_cast:
                        new_kwargs[arg_name] = cast_tensor_type(
                            arg_value, torch.float, torch.half)
                    else:
                        new_kwargs[arg_name] = arg_value
            # apply converted arguments to the decorated method
            if (TORCH_VERSION != 'parrots' and
                    digit_version(TORCH_VERSION) >= digit_version('1.6.0')):
                with autocast(enabled=True):
                    output = old_func(*new_args, **new_kwargs)
            else:
                output = old_func(*new_args, **new_kwargs)
            # cast the results back to fp32 if necessary
            if out_fp32:
                output = cast_tensor_type(output, torch.half, torch.float)
            return output

        return new_func

    return auto_fp16_wrapper


def force_fp32(apply_to=None, out_fp16=False):
    """Decorator to convert input arguments to fp32 in force.

    This decorator is useful when you write custom modules and want to support
    mixed precision training. If there are some inputs that must be processed
    in fp32 mode, then this decorator can handle it. If inputs arguments are
    fp16 tensors, they will be converted to fp32 automatically. Arguments other
    than fp16 tensors are ignored. If you are using PyTorch >= 1.6,
    torch.cuda.amp is used as the backend, otherwise, original mmcv
    implementation will be adopted.

    Args:
        apply_to (Iterable, optional): The argument names to be converted.
            `None` indicates all arguments.
        out_fp16 (bool): Whether to convert the output back to fp16.

    Example:

        >>> import torch.nn as nn
        >>> class MyModule1(nn.Module):
        >>>
        >>>     # Convert x and y to fp32
        >>>     @force_fp32()
        >>>     def loss(self, x, y):
        >>>         pass

        >>> import torch.nn as nn
        >>> class MyModule2(nn.Module):
        >>>
        >>>     # convert pred to fp32
        >>>     @force_fp32(apply_to=('pred', ))
        >>>     def post_process(self, pred, others):
        >>>         pass
    """

    def force_fp32_wrapper(old_func):

        @functools.wraps(old_func)
        def new_func(*args, **kwargs):
            # check if the module has set the attribute `fp16_enabled`, if not,
            # just fallback to the original method.
            if not isinstance(args[0], torch.nn.Module):
                raise TypeError('@force_fp32 can only be used to decorate the '
                                'method of nn.Module')
            if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
                return old_func(*args, **kwargs)
            # get the arg spec of the decorated method
            args_info = getfullargspec(old_func)
            # get the argument names to be casted
            args_to_cast = args_info.args if apply_to is None else apply_to
            # convert the args that need to be processed
            new_args = []
            if args:
                arg_names = args_info.args[:len(args)]
                for i, arg_name in enumerate(arg_names):
                    if arg_name in args_to_cast:
                        new_args.append(
                            cast_tensor_type(args[i], torch.half, torch.float))
                    else:
                        new_args.append(args[i])
            # convert the kwargs that need to be processed
            new_kwargs = dict()
            if kwargs:
                for arg_name, arg_value in kwargs.items():
                    if arg_name in args_to_cast:
                        new_kwargs[arg_name] = cast_tensor_type(
                            arg_value, torch.half, torch.float)
                    else:
                        new_kwargs[arg_name] = arg_value
            # apply converted arguments to the decorated method
            if (TORCH_VERSION != 'parrots' and
                    digit_version(TORCH_VERSION) >= digit_version('1.6.0')):
                with autocast(enabled=False):
                    output = old_func(*new_args, **new_kwargs)
            else:
                output = old_func(*new_args, **new_kwargs)
            # cast the results back to fp32 if necessary
            if out_fp16:
                output = cast_tensor_type(output, torch.float, torch.half)
            return output

        return new_func

    return force_fp32_wrapper


def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
    warnings.warning(
        '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be '
        'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads',
        DeprecationWarning)
    _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb)


def wrap_fp16_model(model):
    """Wrap the FP32 model to FP16.

    If you are using PyTorch >= 1.6, torch.cuda.amp is used as the
    backend, otherwise, original mmcv implementation will be adopted.

    For PyTorch >= 1.6, this function will
    1. Set fp16 flag inside the model to True.

    Otherwise:
    1. Convert FP32 model to FP16.
    2. Remain some necessary layers to be FP32, e.g., normalization layers.
    3. Set `fp16_enabled` flag inside the model to True.

    Args:
        model (nn.Module): Model in FP32.
    """
    if (TORCH_VERSION == 'parrots'
            or digit_version(TORCH_VERSION) < digit_version('1.6.0')):
        # convert model to fp16
        model.half()
        # patch the normalization layers to make it work in fp32 mode
        patch_norm_fp32(model)
    # set `fp16_enabled` flag
    for m in model.modules():
        if hasattr(m, 'fp16_enabled'):
            m.fp16_enabled = True


def patch_norm_fp32(module):
    """Recursively convert normalization layers from FP16 to FP32.

    Args:
        module (nn.Module): The modules to be converted in FP16.

    Returns:
        nn.Module: The converted module, the normalization layers have been
            converted to FP32.
    """
    if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)):
        module.float()
        if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3':
            module.forward = patch_forward_method(module.forward, torch.half,
                                                  torch.float)
    for child in module.children():
        patch_norm_fp32(child)
    return module


def patch_forward_method(func, src_type, dst_type, convert_output=True):
    """Patch the forward method of a module.

    Args:
        func (callable): The original forward method.
        src_type (torch.dtype): Type of input arguments to be converted from.
        dst_type (torch.dtype): Type of input arguments to be converted to.
        convert_output (bool): Whether to convert the output back to src_type.

    Returns:
        callable: The patched forward method.
    """

    def new_forward(*args, **kwargs):
        output = func(*cast_tensor_type(args, src_type, dst_type),
                      **cast_tensor_type(kwargs, src_type, dst_type))
        if convert_output:
            output = cast_tensor_type(output, dst_type, src_type)
        return output

    return new_forward


class LossScaler:
    """Class that manages loss scaling in mixed precision training which
    supports both dynamic or static mode.

    The implementation refers to
    https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py.
    Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling.
    It's important to understand how :class:`LossScaler` operates.
    Loss scaling is designed to combat the problem of underflowing
    gradients encountered at long times when training fp16 networks.
    Dynamic loss scaling begins by attempting a very high loss
    scale.  Ironically, this may result in OVERflowing gradients.
    If overflowing gradients are encountered, :class:`FP16_Optimizer` then
    skips the update step for this particular iteration/minibatch,
    and :class:`LossScaler` adjusts the loss scale to a lower value.
    If a certain number of iterations occur without overflowing gradients
    detected,:class:`LossScaler` increases the loss scale once more.
    In this way :class:`LossScaler` attempts to "ride the edge" of always
    using the highest loss scale possible without incurring overflow.

    Args:
        init_scale (float): Initial loss scale value, default: 2**32.
        scale_factor (float): Factor used when adjusting the loss scale.
            Default: 2.
        mode (str): Loss scaling mode. 'dynamic' or 'static'
        scale_window (int): Number of consecutive iterations without an
            overflow to wait before increasing the loss scale. Default: 1000.
    """

    def __init__(self,
                 init_scale=2**32,
                 mode='dynamic',
                 scale_factor=2.,
                 scale_window=1000):
        self.cur_scale = init_scale
        self.cur_iter = 0
        assert mode in ('dynamic',
                        'static'), 'mode can only be dynamic or static'
        self.mode = mode
        self.last_overflow_iter = -1
        self.scale_factor = scale_factor
        self.scale_window = scale_window

    def has_overflow(self, params):
        """Check if params contain overflow."""
        if self.mode != 'dynamic':
            return False
        for p in params:
            if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data):
                return True
        return False

    def _has_inf_or_nan(x):
        """Check if params contain NaN."""
        try:
            cpu_sum = float(x.float().sum())
        except RuntimeError as instance:
            if 'value cannot be converted' not in instance.args[0]:
                raise
            return True
        else:
            if cpu_sum == float('inf') or cpu_sum == -float('inf') \
                    or cpu_sum != cpu_sum:
                return True
            return False

    def update_scale(self, overflow):
        """update the current loss scale value when overflow happens."""
        if self.mode != 'dynamic':
            return
        if overflow:
            self.cur_scale = max(self.cur_scale / self.scale_factor, 1)
            self.last_overflow_iter = self.cur_iter
        else:
            if (self.cur_iter - self.last_overflow_iter) % \
                    self.scale_window == 0:
                self.cur_scale *= self.scale_factor
        self.cur_iter += 1

    def state_dict(self):
        """Returns the state of the scaler as a :class:`dict`."""
        return dict(
            cur_scale=self.cur_scale,
            cur_iter=self.cur_iter,
            mode=self.mode,
            last_overflow_iter=self.last_overflow_iter,
            scale_factor=self.scale_factor,
            scale_window=self.scale_window)

    def load_state_dict(self, state_dict):
        """Loads the loss_scaler state dict.

        Args:
           state_dict (dict): scaler state.
        """
        self.cur_scale = state_dict['cur_scale']
        self.cur_iter = state_dict['cur_iter']
        self.mode = state_dict['mode']
        self.last_overflow_iter = state_dict['last_overflow_iter']
        self.scale_factor = state_dict['scale_factor']
        self.scale_window = state_dict['scale_window']

    @property
    def loss_scale(self):
        return self.cur_scale


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .checkpoint import CheckpointHook
from .closure import ClosureHook
from .ema import EMAHook
from .evaluation import DistEvalHook, EvalHook
from .hook import HOOKS, Hook
from .iter_timer import IterTimerHook
from .logger import (DvcliveLoggerHook, LoggerHook, MlflowLoggerHook,
                     NeptuneLoggerHook, PaviLoggerHook, TensorboardLoggerHook,
                     TextLoggerHook, WandbLoggerHook)
from .lr_updater import (CosineAnnealingLrUpdaterHook,
                         CosineRestartLrUpdaterHook, CyclicLrUpdaterHook,
                         ExpLrUpdaterHook, FixedLrUpdaterHook,
                         FlatCosineAnnealingLrUpdaterHook, InvLrUpdaterHook,
                         LrUpdaterHook, OneCycleLrUpdaterHook,
                         PolyLrUpdaterHook, StepLrUpdaterHook)
from .memory import EmptyCacheHook
from .momentum_updater import (CosineAnnealingMomentumUpdaterHook,
                               CyclicMomentumUpdaterHook, MomentumUpdaterHook,
                               OneCycleMomentumUpdaterHook,
                               StepMomentumUpdaterHook)
from .optimizer import (Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook,
                        GradientCumulativeOptimizerHook, OptimizerHook)
from .profiler import ProfilerHook
from .sampler_seed import DistSamplerSeedHook
from .sync_buffer import SyncBuffersHook

__all__ = [
    'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook',
    'FixedLrUpdaterHook', 'StepLrUpdaterHook', 'ExpLrUpdaterHook',
    'PolyLrUpdaterHook', 'InvLrUpdaterHook', 'CosineAnnealingLrUpdaterHook',
    'FlatCosineAnnealingLrUpdaterHook', 'CosineRestartLrUpdaterHook',
    'CyclicLrUpdaterHook', 'OneCycleLrUpdaterHook', 'OptimizerHook',
    'Fp16OptimizerHook', 'IterTimerHook', 'DistSamplerSeedHook',
    'EmptyCacheHook', 'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook',
    'TextLoggerHook', 'TensorboardLoggerHook', 'NeptuneLoggerHook',
    'WandbLoggerHook', 'DvcliveLoggerHook', 'MomentumUpdaterHook',
    'StepMomentumUpdaterHook', 'CosineAnnealingMomentumUpdaterHook',
    'CyclicMomentumUpdaterHook', 'OneCycleMomentumUpdaterHook',
    'SyncBuffersHook', 'EMAHook', 'EvalHook', 'DistEvalHook', 'ProfilerHook',
    'GradientCumulativeOptimizerHook', 'GradientCumulativeFp16OptimizerHook'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/checkpoint.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import warnings
from mmcv.utils.logging import print_log
from mmcv.fileio import FileClient
from ..dist_utils import allreduce_params, master_only
from .hook import HOOKS, Hook
from math import inf
import os
import re
from ..checkpoint import save_checkpoint, get_best_k_model
import platform
import mmcv
import shutil

@HOOKS.register_module()
class CheckpointHook(Hook):
    """Save checkpoints periodically.

    Args:
        interval (int): The saving period. If ``by_epoch=True``, interval
            indicates epochs, otherwise it indicates iterations.
            Default: -1, which means "never".
        by_epoch (bool): Saving checkpoints by epoch or by iteration.
            Default: True.
        save_optimizer (bool): Whether to save optimizer state_dict in the
            checkpoint. It is usually used for resuming experiments.
            Default: True.
        out_dir (str, optional): The root directory to save checkpoints. If not
            specified, ``runner.work_dir`` will be used by default. If
            specified, the ``out_dir`` will be the concatenation of ``out_dir``
            and the last level directory of ``runner.work_dir``.
            `Changed in version 1.3.16.`
        max_keep_ckpts (int, optional): The maximum checkpoints to keep.
            In some cases we want only the latest few checkpoints and would
            like to delete old ones to save the disk space.
            Default: -1, which means unlimited.
        save_last (bool, optional): Whether to force the last checkpoint to be
            saved regardless of interval. Default: True.
        sync_buffer (bool, optional): Whether to synchronize buffers in
            different gpus. Default: False.
        file_client_args (dict, optional): Arguments to instantiate a
            FileClient. See :class:`mmcv.fileio.FileClient` for details.
            Default: None.
            `New in version 1.3.16.`

    .. warning::
        Before v1.3.16, the ``out_dir`` argument indicates the path where the
        checkpoint is stored. However, since v1.3.16, ``out_dir`` indicates the
        root directory and the final path to save checkpoint is the
        concatenation of ``out_dir`` and the last level directory of
        ``runner.work_dir``. Suppose the value of ``out_dir`` is "/path/of/A"
        and the value of ``runner.work_dir`` is "/path/of/B", then the final
        path will be "/path/of/A/B".
    """

    def __init__(self,
                 interval=-1,
                 by_epoch=True,
                 save_optimizer=True,
                 out_dir=None,
                 max_keep_ckpts=-1,
                 save_last=True,
                 sync_buffer=False,
                 file_client_args=None,
                 **kwargs):
        self.interval = interval
        self.by_epoch = by_epoch
        self.save_optimizer = save_optimizer
        self.out_dir = out_dir
        self.max_keep_ckpts = max_keep_ckpts
        self.save_last = save_last
        self.args = kwargs
        self.sync_buffer = sync_buffer
        self.file_client_args = file_client_args

    def before_run(self, runner):
        if not self.out_dir:
            self.out_dir = runner.work_dir

        self.file_client = FileClient.infer_client(self.file_client_args,
                                                   self.out_dir)

        # if `self.out_dir` is not equal to `runner.work_dir`, it means that
        # `self.out_dir` is set so the final `self.out_dir` is the
        # concatenation of `self.out_dir` and the last level directory of
        # `runner.work_dir`
        if self.out_dir != runner.work_dir:
            basename = osp.basename(runner.work_dir.rstrip(osp.sep))
            self.out_dir = self.file_client.join_path(self.out_dir, basename)

        print_log((f'Checkpoints will be saved to {self.out_dir} by '
                   f'{self.file_client.name}.'), logger=runner.logger)

        # disable the create_symlink option because some file backends do not
        # allow to create a symlink
        if 'create_symlink' in self.args:
            if self.args[
                'create_symlink'] and not self.file_client.allow_symlink:
                self.args['create_symlink'] = False
                warnings.warn(
                    ('create_symlink is set as True by the user but is changed'
                     'to be False because creating symbolic link is not '
                     f'allowed in {self.file_client.name}'))
        else:
            self.args['create_symlink'] = self.file_client.allow_symlink

    def after_train_epoch(self, runner):
        if not self.by_epoch:
            return

        # save checkpoint for following cases:
        # 1. every ``self.interval`` epochs
        # 2. reach the last epoch of training
        if self.every_n_epochs(
                runner, self.interval) or (self.save_last
                                           and self.is_last_epoch(runner)):
            print_log(
                f'Saving checkpoint at {runner.epoch + 1} epochs', logger=runner.logger)
            if self.sync_buffer:
                allreduce_params(runner.model.buffers())
            self._save_checkpoint(runner)

    @master_only
    def _save_checkpoint(self, runner):
        """Save the current checkpoint and delete unwanted checkpoint."""
        runner.save_checkpoint(
            self.out_dir, save_optimizer=self.save_optimizer, **self.args)
        if runner.meta is not None:
            if self.by_epoch:
                cur_ckpt_filename = self.args.get(
                    'filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1)
            else:
                cur_ckpt_filename = self.args.get(
                    'filename_tmpl', 'iter_{}.pth').format(runner.iter + 1)
            runner.meta.setdefault('hook_msgs', dict())
            runner.meta['hook_msgs']['last_ckpt'] = self.file_client.join_path(
                self.out_dir, cur_ckpt_filename)
        # remove other checkpoints
        if self.max_keep_ckpts > 0:
            if self.by_epoch:
                name = 'epoch_{}.pth'
                current_ckpt = runner.epoch + 1
            else:
                name = 'iter_{}.pth'
                current_ckpt = runner.iter + 1
            redundant_ckpts = range(
                current_ckpt - self.max_keep_ckpts * self.interval, 0,
                -self.interval)
            filename_tmpl = self.args.get('filename_tmpl', name)
            for _step in redundant_ckpts:
                ckpt_path = self.file_client.join_path(
                    self.out_dir, filename_tmpl.format(_step))
                if self.file_client.isfile(ckpt_path):
                    self.file_client.remove(ckpt_path)
                else:
                    break

    def after_train_iter(self, runner):
        if self.by_epoch:
            return

        # save checkpoint for following cases:
        # 1. every ``self.interval`` iterations
        # 2. reach the last iteration of training
        if self.every_n_iters(
                runner, self.interval) or (self.save_last
                                           and self.is_last_iter(runner)):
            print_log(
                f'Saving checkpoint at {runner.iter + 1} iterations', logger=runner.logger)
            if self.sync_buffer:
                allreduce_params(runner.model.buffers())
            self._save_checkpoint(runner)


@HOOKS.register_module()
class ModelCheckpoint(Hook):

    rule_map = {'greater': lambda x, y: x >= y, 'less': lambda x, y: x <= y}
    indicator_rule_map = {'greater': lambda x, y: max(x, y), 'less': lambda x, y: min(x, y)}
    _default_greater_keys = [
        'acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU',
        'mAcc', 'aAcc', 'psnr', 'ssim', 'q'
    ]
    _default_best_prec1 = {'greater': -inf, 'less': inf}
    _default_less_keys = ['loss', 'sam', 'ergas']

    def __init__(self, indicator: str, formatter_filename="model_best_{epoch},{best_metric}", print_freq=1, save_top_k: int=1,
                 greater_keys=None, less_keys=None, best_prec1=None, best_epoch=0, sync_buffer=False):
        '''
        Args:
            save_interval:
            save_top_k: ``save_top_k == k``,
                        if ``save_top_k == 0``, no models are saved.
                        if ``save_top_k == -1``, all models are saved.
                        Please note that the monitors are checked every ``every_n_epochs`` epochs.
            Returns:
        '''
        self.best_epoch = best_epoch
        self.print_freq = print_freq
        self.save_top_k = save_top_k
        self.sync_buffer = sync_buffer
        self.indicator = 'top-1' if indicator == 'top' else indicator
        self.formatter_filename = formatter_filename

        # indicator_lc = indicator.lower()

        if greater_keys is None:
            greater_keys = ModelCheckpoint._default_greater_keys
        else:
            if not isinstance(greater_keys, (list, tuple)):
                greater_keys = (greater_keys,)
            # assert is_seq_of(greater_keys, str)
            greater_keys = [key.lower() for key in greater_keys]

        if less_keys is None:
            less_keys = self._default_less_keys
        else:
            if not isinstance(less_keys, (list, tuple)):
                less_keys = (less_keys,)
            # assert is_seq_of(less_keys, str)
            less_keys = [key.lower() for key in less_keys]

        if indicator in greater_keys:
            rule = 'greater'
        elif indicator in less_keys:
            rule = 'less'
        elif any(key in indicator for key in greater_keys):
            rule = 'greater'
        elif any(key in indicator for key in less_keys):
            rule = 'less'
        else:
            raise ValueError(f'Cannot infer the rule for key '
                             f'{indicator}, thus a specific rule '
                             f'must be specified.')
        self.best_prec1 = self._default_best_prec1[rule] if best_prec1 is None else best_prec1
        self.compare_func = self.rule_map[rule]
        self.indicator_func = self.indicator_rule_map[rule]
        self.rule = rule

    def before_run(self, runner):
        self.save_model_path = runner.work_dir
        self.ckpt = os.path.join(self.save_model_path, 'checkpoint')
        os.makedirs(self.save_model_path, exist_ok=True)
        print_log(f'Checkpoints will be saved to {self.save_model_path}', logger=runner.logger)


    def earlyStopping(self, avg_grad_norm):

        if avg_grad_norm > 100:
            return True


    def after_train_epoch(self, runner):
        if self.sync_buffer:
            allreduce_params(runner.model.buffers())
        metrics = runner.metrics# metrics = {k: meter.avg for k, meter in runner.log_buffer.meters.items()}
        runner.earlyStop = self.earlyStopping(metrics.get('grad_norm', 0))
        self.save_checkpoint(runner, metrics)

        # print_log(' * Best training metrics so far@ {best_metric} in epoch {best_epoch}'.format(
        #     best_metric=metrics['best_metric'], best_epoch=metrics['best_epoch']), logger=runner.logger)

    def _save_checkpoint(self, meta, out_dir, filename, is_best, create_symlink=True):
        if meta is None:
            meta = {}
        elif not isinstance(meta, dict):
            raise TypeError(
                f'meta should be a dict or None, but got {type(meta)}')
        # meta.update(epoch=meta.pop('epoch') + 1, iter=meta.pop('iter'))
        filepath = os.path.join(out_dir, filename)
        # save_checkpoint(meta.pop('model'), filepath, optimizer=meta.pop('optimizer'), meta=meta)
        save_checkpoint(filepath, meta=meta)
        if create_symlink or is_best:
            dst_file = os.path.join(out_dir, 'model_best_.pth')
            if platform.system() != 'Windows':
                mmcv.symlink(filename, dst_file)
            else:
                shutil.copy(filepath, dst_file)

    @master_only
    def save_checkpoint(self, runner, metrics):
        flag = False
        if not hasattr(runner.model, 'train') and isinstance(runner.model.model, dict):
            flag = True
            stats = {}
            for k, m in runner.model.model.items():
                stats[k] = {
                    'epoch': runner.epoch,
                    'iter': runner.iter,
                    'model': m,
                    'best_metric': {name: value for name, value in metrics.items() if
                                    name not in ['grad_norm', 'lr', 'time', 'data_time']},
                    # 保存多个metric的数值,  实际比较的时候还是只有一个
                    'loss': metrics['loss'],
                    'best_epoch': runner._epoch,
                    'optimizer': runner.optimizer[k]
                }
                runner.metrics.update(
                    {'best_metric': {k: stats[k]['best_metric']}, 'best_epoch': {k: stats[k]['best_epoch']}})
        else:
            stats = {
                'epoch': runner.epoch,
                'iter': runner.iter,
                'model': runner.model,
                'best_metric': {name: value for name, value in metrics.items() if
                                name not in ['grad_norm', 'lr', 'time', 'data_time']},
                # 保存多个metric的数值,  实际比较的时候还是只有一个
                'loss': metrics['loss'],
                'best_epoch': runner._epoch,
                'optimizer': runner.optimizer
            }
            runner.metrics.update(best_metric=stats['best_metric'], best_epoch=stats['best_epoch'])

        new_best_k_model_flag = []
        indicator = self.indicator
        save_top_k = self.save_top_k
        # stats 应当是{epoch: X, score: Y} -> [epoch, score]
        assert isinstance(stats, dict), print(f"stats in model_checkpoint should be dict but be {type(stats)}")
        # stats = list(stats.values())
        best_k_model, _ = get_best_k_model(self.save_model_path + "/checkpoint", indicator)

        # print(best_k_model)
        if save_top_k < 0:
            raise ValueError(f"Invalid value for save_top_k={save_top_k}. Must be >= 0")
        if save_top_k == 0:
            stats['best_metric'] = self._default_best_prec1[self.rule]
            stats['best_epoch'] = 0
            self._save_checkpoint(stats, self.save_model_path, is_best=False, filename=f"{stats['epoch']}.pth.tar")

        if save_top_k >= 1:
            # self.best_prec1 = self.indicator_func(self.best_prec1, stats[self.indicator])
            if len(best_k_model) >= save_top_k:
                # reverse=True, 降序, default： False
                # 使用索引去对best_k_model进行排序,best_k_model应是列表，才能返回索引
                best_k_model.append([stats['epoch'], stats['best_metric'], None])
                sortedIndex_best_k_model = sorted(range(len(best_k_model)),
                                                  key=lambda k: float(best_k_model[k][1][indicator]),
                                                  reverse=self.rule == "less")
                # print(sortedIndex_best_k_model)
                new_best_k_model_flag = [
                    not self.compare_func(float(query_score[indicator]), stats['best_metric'][indicator]) for
                    _, query_score, _ in best_k_model]
                # print(new_best_k_model_flag)

                # ckpt_stats = [] # {}
                # key会冲突导致popitem出错
                # ckpt_stats[str(stats['epoch'])] = stats[indicator]
                ckpt_stats = [stats['epoch'], stats['best_metric']]

                for index in sortedIndex_best_k_model:
                    if new_best_k_model_flag[index]:
                        # top_k_count += 1
                        # best_k_model[indicator][index] = stats[indicator]
                        # best_k_model['epoch'][index] = stats['epoch']
                        # best_k_model.pop(str(index))
                        # best_k_model.update(ckpt_stats)
                        # best_k_model[index] = list(ckpt_stats.popitem())
                        fname = self.save_model_path + "/" + best_k_model[index][2]
                        ckpt_stats.append(None)
                        best_k_model[index] = ckpt_stats

                        if os.path.isfile(fname):
                            os.remove(fname)
                        break
                stats['best_epoch'], stats['best_metric'] = best_k_model[sortedIndex_best_k_model[-1]][:2]
                best_k_model = best_k_model[:-1]
                # best_k_model = [{'epoch': k, 'score': v} for k, v in best_k_model.items()]
                best_k_model = [{'epoch': epoch, 'best_metric': score} for (epoch, score, _) in best_k_model]
                with open(self.ckpt, 'w') as f:
                    outs = [self.formatter_filename.format(**line) + "\n" for line in best_k_model]
                    f.writelines(outs)
            else:
                if not flag:
                    with open(self.ckpt, 'a') as f:
                        outs = self.formatter_filename.format(**stats) + "\n"
                        f.writelines(outs)
                # 训练初期，不满topk时候, 模型是否保存下来
                # if save_top_k == 1:
                # if len(best_k_model) < save_top_k:
                #     new_best_k_model_flag = [True]

            is_best = any(new_best_k_model_flag)
            if runner.epoch % self.print_freq == 0 or is_best:
                self._save_checkpoint(
                        stats, out_dir=self.save_model_path, is_best=is_best, filename=f"{runner.epoch}.pth.tar")

                if not flag:
                    print_log(' * Best training metrics so far@ {best_metric} in epoch {best_epoch}'.format(
                        best_metric=stats['best_metric'], best_epoch=stats['best_epoch']), logger=runner.logger
                    )

            return stats

    def after_train_iter(self, runner):
        if hasattr(runner.model, 'train'):
            if type(runner.model.module.model).__name__ == 'INN':
                runner.model.module.model.free()
        else:
            if isinstance(runner.model.model, dict):
                runner.model.model['PAN2MS'].module.free()
    # raise NotImplementedError("after_train_iter is not implemented by ModelCheckpoint (customed)")


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/closure.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .hook import HOOKS, Hook


@HOOKS.register_module()
class ClosureHook(Hook):

    def __init__(self, fn_name, fn):
        assert hasattr(self, fn_name)
        assert callable(fn)
        setattr(self, fn_name, fn)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/ema.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from ...parallel import is_module_wrapper
from ..hooks.hook import HOOKS, Hook


@HOOKS.register_module()
class EMAHook(Hook):
    r"""Exponential Moving Average Hook.

    Use Exponential Moving Average on all parameters of model in training
    process. All parameters have a ema backup, which update by the formula
    as below. EMAHook takes priority over EvalHook and CheckpointSaverHook.

        .. math::

            Xema\_{t+1} = (1 - \text{momentum}) \times
            Xema\_{t} +  \text{momentum} \times X_t

    Args:
        momentum (float): The momentum used for updating ema parameter.
            Defaults to 0.0002.
        interval (int): Update ema parameter every interval iteration.
            Defaults to 1.
        warm_up (int): During first warm_up steps, we may use smaller momentum
            to update ema parameters more slowly. Defaults to 100.
        resume_from (str): The checkpoint path. Defaults to None.
    """

    def __init__(self,
                 momentum=0.0002,
                 interval=1,
                 warm_up=100,
                 resume_from=None):
        assert isinstance(interval, int) and interval > 0
        self.warm_up = warm_up
        self.interval = interval
        assert momentum > 0 and momentum < 1
        self.momentum = momentum**interval
        self.checkpoint = resume_from

    def before_run(self, runner):
        """To resume model with it's ema parameters more friendly.

        Register ema parameter as ``named_buffer`` to model
        """
        model = runner.model
        if is_module_wrapper(model):
            model = model.module
        self.param_ema_buffer = {}
        self.model_parameters = dict(model.named_parameters(recurse=True))
        for name, value in self.model_parameters.items():
            # "." is not allowed in module's buffer name
            buffer_name = f"ema_{name.replace('.', '_')}"
            self.param_ema_buffer[name] = buffer_name
            model.register_buffer(buffer_name, value.data.clone())
        self.model_buffers = dict(model.named_buffers(recurse=True))
        if self.checkpoint is not None:
            runner.resume(self.checkpoint)

    def after_train_iter(self, runner):
        """Update ema parameter every self.interval iterations."""
        curr_step = runner.iter
        # We warm up the momentum considering the instability at beginning
        momentum = min(self.momentum,
                       (1 + curr_step) / (self.warm_up + curr_step))
        if curr_step % self.interval != 0:
            return
        for name, parameter in self.model_parameters.items():
            buffer_name = self.param_ema_buffer[name]
            buffer_parameter = self.model_buffers[buffer_name]
            buffer_parameter.mul_(1 - momentum).add_(momentum, parameter.data)

    def after_train_epoch(self, runner):
        """We load parameter values from ema backup to model before the
        EvalHook."""
        self._swap_ema_parameters()

    def before_train_epoch(self, runner):
        """We recover model's parameter from ema backup after last epoch's
        EvalHook."""
        self._swap_ema_parameters()

    def _swap_ema_parameters(self):
        """Swap the parameter of model with parameter in ema_buffer."""
        for name, value in self.model_parameters.items():
            temp = value.data.clone()
            ema_buffer = self.model_buffers[self.param_ema_buffer[name]]
            value.data.copy_(ema_buffer.data)
            ema_buffer.data.copy_(temp)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/evaluation.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import warnings
from math import inf

import torch.distributed as dist
from torch.nn.modules.batchnorm import _BatchNorm
from torch.utils.data import DataLoader

from mmcv.fileio import FileClient
from mmcv.utils import is_seq_of
from .hook import Hook
from .logger import LoggerHook


class EvalHook(Hook):
    """Non-Distributed evaluation hook.

    This hook will regularly perform evaluation in a given interval when
    performing in non-distributed environment.

    Args:
        dataloader (DataLoader): A PyTorch dataloader, whose dataset has
            implemented ``evaluate`` function.
        start (int | None, optional): Evaluation starting epoch. It enables
            evaluation before the training starts if ``start`` <= the resuming
            epoch. If None, whether to evaluate is merely decided by
            ``interval``. Default: None.
        interval (int): Evaluation interval. Default: 1.
        by_epoch (bool): Determine perform evaluation by epoch or by iteration.
            If set to True, it will perform by epoch. Otherwise, by iteration.
            Default: True.
        save_best (str, optional): If a metric is specified, it would measure
            the best checkpoint during evaluation. The information about best
            checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep
            best score value and best checkpoint path, which will be also
            loaded when resume checkpoint. Options are the evaluation metrics
            on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox
            detection and instance segmentation. ``AR@100`` for proposal
            recall. If ``save_best`` is ``auto``, the first key of the returned
            ``OrderedDict`` result will be used. Default: None.
        rule (str | None, optional): Comparison rule for best score. If set to
            None, it will infer a reasonable rule. Keys such as 'acc', 'top'
            .etc will be inferred by 'greater' rule. Keys contain 'loss' will
            be inferred by 'less' rule. Options are 'greater', 'less', None.
            Default: None.
        test_fn (callable, optional): test a model with samples from a
            dataloader, and return the test results. If ``None``, the default
            test function ``mmcv.engine.single_gpu_test`` will be used.
            (default: ``None``)
        greater_keys (List[str] | None, optional): Metric keys that will be
            inferred by 'greater' comparison rule. If ``None``,
            _default_greater_keys will be used. (default: ``None``)
        less_keys (List[str] | None, optional): Metric keys that will be
            inferred by 'less' comparison rule. If ``None``, _default_less_keys
            will be used. (default: ``None``)
        out_dir (str, optional): The root directory to save checkpoints. If not
            specified, `runner.work_dir` will be used by default. If specified,
            the `out_dir` will be the concatenation of `out_dir` and the last
            level directory of `runner.work_dir`.
            `New in version 1.3.16.`
        file_client_args (dict): Arguments to instantiate a FileClient.
            See :class:`mmcv.fileio.FileClient` for details. Default: None.
            `New in version 1.3.16.`
        **eval_kwargs: Evaluation arguments fed into the evaluate function of
            the dataset.

    Note:
        If new arguments are added for EvalHook, tools/test.py,
        tools/eval_metric.py may be affected.
    """

    # Since the key for determine greater or less is related to the downstream
    # tasks, downstream repos may need to overwrite the following inner
    # variable accordingly.

    rule_map = {'greater': lambda x, y: x > y, 'less': lambda x, y: x < y}
    init_value_map = {'greater': -inf, 'less': inf}
    _default_greater_keys = [
        'acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU',
        'mAcc', 'aAcc'
    ]
    _default_less_keys = ['loss']

    def __init__(self,
                 dataloader,
                 start=None,
                 interval=1,
                 by_epoch=True,
                 save_best=None,
                 rule=None,
                 test_fn=None,
                 greater_keys=None,
                 less_keys=None,
                 out_dir=None,
                 file_client_args=None,
                 **eval_kwargs):
        if not isinstance(dataloader, DataLoader):
            raise TypeError(f'dataloader must be a pytorch DataLoader, '
                            f'but got {type(dataloader)}')

        if interval <= 0:
            raise ValueError(f'interval must be a positive number, '
                             f'but got {interval}')

        assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean'

        if start is not None and start < 0:
            raise ValueError(f'The evaluation start epoch {start} is smaller '
                             f'than 0')

        self.dataloader = dataloader
        self.interval = interval
        self.start = start
        self.by_epoch = by_epoch

        assert isinstance(save_best, str) or save_best is None, \
            '""save_best"" should be a str or None ' \
            f'rather than {type(save_best)}'
        self.save_best = save_best
        self.eval_kwargs = eval_kwargs
        self.initial_flag = True

        if test_fn is None:
            from mmcv.engine import single_gpu_test
            self.test_fn = single_gpu_test
        else:
            self.test_fn = test_fn

        if greater_keys is None:
            self.greater_keys = self._default_greater_keys
        else:
            if not isinstance(greater_keys, (list, tuple)):
                greater_keys = (greater_keys, )
            assert is_seq_of(greater_keys, str)
            self.greater_keys = greater_keys

        if less_keys is None:
            self.less_keys = self._default_less_keys
        else:
            if not isinstance(less_keys, (list, tuple)):
                less_keys = (less_keys, )
            assert is_seq_of(less_keys, str)
            self.less_keys = less_keys

        if self.save_best is not None:
            self.best_ckpt_path = None
            self._init_rule(rule, self.save_best)

        self.out_dir = out_dir
        self.file_client_args = file_client_args

    def _init_rule(self, rule, key_indicator):
        """Initialize rule, key_indicator, comparison_func, and best score.

        Here is the rule to determine which rule is used for key indicator
        when the rule is not specific (note that the key indicator matching
        is case-insensitive):
        1. If the key indicator is in ``self.greater_keys``, the rule will be
           specified as 'greater'.
        2. Or if the key indicator is in ``self.less_keys``, the rule will be
           specified as 'less'.
        3. Or if the key indicator is equal to the substring in any one item
           in ``self.greater_keys``, the rule will be specified as 'greater'.
        4. Or if the key indicator is equal to the substring in any one item
           in ``self.less_keys``, the rule will be specified as 'less'.

        Args:
            rule (str | None): Comparison rule for best score.
            key_indicator (str | None): Key indicator to determine the
                comparison rule.
        """
        if rule not in self.rule_map and rule is not None:
            raise KeyError(f'rule must be greater, less or None, '
                           f'but got {rule}.')

        if rule is None:
            if key_indicator != 'auto':
                # `_lc` here means we use the lower case of keys for
                # case-insensitive matching
                key_indicator_lc = key_indicator.lower()
                greater_keys = [key.lower() for key in self.greater_keys]
                less_keys = [key.lower() for key in self.less_keys]

                if key_indicator_lc in greater_keys:
                    rule = 'greater'
                elif key_indicator_lc in less_keys:
                    rule = 'less'
                elif any(key in key_indicator_lc for key in greater_keys):
                    rule = 'greater'
                elif any(key in key_indicator_lc for key in less_keys):
                    rule = 'less'
                else:
                    raise ValueError(f'Cannot infer the rule for key '
                                     f'{key_indicator}, thus a specific rule '
                                     f'must be specified.')
        self.rule = rule
        self.key_indicator = key_indicator
        if self.rule is not None:
            self.compare_func = self.rule_map[self.rule]

    def before_run(self, runner):
        if not self.out_dir:
            self.out_dir = runner.work_dir

        self.file_client = FileClient.infer_client(self.file_client_args,
                                                   self.out_dir)

        # if `self.out_dir` is not equal to `runner.work_dir`, it means that
        # `self.out_dir` is set so the final `self.out_dir` is the
        # concatenation of `self.out_dir` and the last level directory of
        # `runner.work_dir`
        if self.out_dir != runner.work_dir:
            basename = osp.basename(runner.work_dir.rstrip(osp.sep))
            self.out_dir = self.file_client.join_path(self.out_dir, basename)
            runner.logger.info(
                (f'The best checkpoint will be saved to {self.out_dir} by '
                 f'{self.file_client.name}'))

        if self.save_best is not None:
            if runner.meta is None:
                warnings.warn('runner.meta is None. Creating an empty one.')
                runner.meta = dict()
            runner.meta.setdefault('hook_msgs', dict())
            self.best_ckpt_path = runner.meta['hook_msgs'].get(
                'best_ckpt', None)

    def before_train_iter(self, runner):
        """Evaluate the model only at the start of training by iteration."""
        if self.by_epoch or not self.initial_flag:
            return
        if self.start is not None and runner.iter >= self.start:
            self.after_train_iter(runner)
        self.initial_flag = False

    def before_train_epoch(self, runner):
        """Evaluate the model only at the start of training by epoch."""
        if not (self.by_epoch and self.initial_flag):
            return
        if self.start is not None and runner.epoch >= self.start:
            self.after_train_epoch(runner)
        self.initial_flag = False

    def after_train_iter(self, runner):
        """Called after every training iter to evaluate the results."""
        if not self.by_epoch and self._should_evaluate(runner):
            # Because the priority of EvalHook is higher than LoggerHook, the
            # training log and the evaluating log are mixed. Therefore,
            # we need to dump the training log and clear it before evaluating
            # log is generated. In addition, this problem will only appear in
            # `IterBasedRunner` whose `self.by_epoch` is False, because
            # `EpochBasedRunner` whose `self.by_epoch` is True calls
            # `_do_evaluate` in `after_train_epoch` stage, and at this stage
            # the training log has been printed, so it will not cause any
            # problem. more details at
            # https://github.com/open-mmlab/mmsegmentation/issues/694
            for hook in runner._hooks:
                if isinstance(hook, LoggerHook):
                    hook.after_train_iter(runner)
            runner.log_buffer.clear()

            self._do_evaluate(runner)

    def after_train_epoch(self, runner):
        """Called after every training epoch to evaluate the results."""
        # if self.by_epoch and self._should_evaluate(runner):
        #     self._do_evaluate(runner)
        ...

    def _do_evaluate(self, runner):
        """perform evaluation and save ckpt."""
        results = self.test_fn(runner.model, self.dataloader)
        runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
        key_score = self.evaluate(runner, results)
        # the key_score may be `None` so it needs to skip the action to save
        # the best checkpoint
        if self.save_best and key_score:
            self._save_ckpt(runner, key_score)

    def _should_evaluate(self, runner):
        """Judge whether to perform evaluation.

        Here is the rule to judge whether to perform evaluation:
        1. It will not perform evaluation during the epoch/iteration interval,
           which is determined by ``self.interval``.
        2. It will not perform evaluation if the start time is larger than
           current time.
        3. It will not perform evaluation when current time is larger than
           the start time but during epoch/iteration interval.

        Returns:
            bool: The flag indicating whether to perform evaluation.
        """
        if self.by_epoch:
            current = runner.epoch
            check_time = self.every_n_epochs
        else:
            current = runner.iter
            check_time = self.every_n_iters

        if self.start is None:
            if not check_time(runner, self.interval):
                # No evaluation during the interval.
                return False
        elif (current + 1) < self.start:
            # No evaluation if start is larger than the current time.
            return False
        else:
            # Evaluation only at epochs/iters 3, 5, 7...
            # if start==3 and interval==2
            if (current + 1 - self.start) % self.interval:
                return False
        return True

    def _save_ckpt(self, runner, key_score):
        """Save the best checkpoint.

        It will compare the score according to the compare function, write
        related information (best score, best checkpoint path) and save the
        best checkpoint into ``work_dir``.
        """
        if self.by_epoch:
            current = f'epoch_{runner.epoch + 1}'
            cur_type, cur_time = 'epoch', runner.epoch + 1
        else:
            current = f'iter_{runner.iter + 1}'
            cur_type, cur_time = 'iter', runner.iter + 1

        best_score = runner.meta['hook_msgs'].get(
            'best_score', self.init_value_map[self.rule])
        if self.compare_func(key_score, best_score):
            best_score = key_score
            runner.meta['hook_msgs']['best_score'] = best_score

            if self.best_ckpt_path and self.file_client.isfile(
                    self.best_ckpt_path):
                self.file_client.remove(self.best_ckpt_path)
                runner.logger.info(
                    (f'The previous best checkpoint {self.best_ckpt_path} was '
                     'removed'))

            best_ckpt_name = f'best_{self.key_indicator}_{current}.pth'
            self.best_ckpt_path = self.file_client.join_path(
                self.out_dir, best_ckpt_name)
            runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path

            runner.save_checkpoint(
                self.out_dir, best_ckpt_name, create_symlink=False)
            runner.logger.info(
                f'Now best checkpoint is saved as {best_ckpt_name}.')
            runner.logger.info(
                f'Best {self.key_indicator} is {best_score:0.4f} '
                f'at {cur_time} {cur_type}.')

    def evaluate(self, runner, results):
        """Evaluate the results.

        Args:
            runner (:obj:`mmcv.Runner`): The underlined training runner.
            results (list): Output results.
        """
        eval_res = self.dataloader.dataset.evaluate(
            results, logger=runner.logger, **self.eval_kwargs)

        for name, val in eval_res.items():
            runner.log_buffer.output[name] = val
        runner.log_buffer.ready = True

        if self.save_best is not None:
            # If the performance of model is pool, the `eval_res` may be an
            # empty dict and it will raise exception when `self.save_best` is
            # not None. More details at
            # https://github.com/open-mmlab/mmdetection/issues/6265.
            if not eval_res:
                warnings.warn(
                    'Since `eval_res` is an empty dict, the behavior to save '
                    'the best checkpoint will be skipped in this evaluation.')
                return None

            if self.key_indicator == 'auto':
                # infer from eval_results
                self._init_rule(self.rule, list(eval_res.keys())[0])
            return eval_res[self.key_indicator]

        return None


class DistEvalHook(EvalHook):
    """Distributed evaluation hook.

    This hook will regularly perform evaluation in a given interval when
    performing in distributed environment.

    Args:
        dataloader (DataLoader): A PyTorch dataloader, whose dataset has
            implemented ``evaluate`` function.
        start (int | None, optional): Evaluation starting epoch. It enables
            evaluation before the training starts if ``start`` <= the resuming
            epoch. If None, whether to evaluate is merely decided by
            ``interval``. Default: None.
        interval (int): Evaluation interval. Default: 1.
        by_epoch (bool): Determine perform evaluation by epoch or by iteration.
            If set to True, it will perform by epoch. Otherwise, by iteration.
            default: True.
        save_best (str, optional): If a metric is specified, it would measure
            the best checkpoint during evaluation. The information about best
            checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep
            best score value and best checkpoint path, which will be also
            loaded when resume checkpoint. Options are the evaluation metrics
            on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox
            detection and instance segmentation. ``AR@100`` for proposal
            recall. If ``save_best`` is ``auto``, the first key of the returned
            ``OrderedDict`` result will be used. Default: None.
        rule (str | None, optional): Comparison rule for best score. If set to
            None, it will infer a reasonable rule. Keys such as 'acc', 'top'
            .etc will be inferred by 'greater' rule. Keys contain 'loss' will
            be inferred by 'less' rule. Options are 'greater', 'less', None.
            Default: None.
        test_fn (callable, optional): test a model with samples from a
            dataloader in a multi-gpu manner, and return the test results. If
            ``None``, the default test function ``mmcv.engine.multi_gpu_test``
            will be used. (default: ``None``)
        tmpdir (str | None): Temporary directory to save the results of all
            processes. Default: None.
        gpu_collect (bool): Whether to use gpu or cpu to collect results.
            Default: False.
        broadcast_bn_buffer (bool): Whether to broadcast the
            buffer(running_mean and running_var) of rank 0 to other rank
            before evaluation. Default: True.
        out_dir (str, optional): The root directory to save checkpoints. If not
            specified, `runner.work_dir` will be used by default. If specified,
            the `out_dir` will be the concatenation of `out_dir` and the last
            level directory of `runner.work_dir`.
        file_client_args (dict): Arguments to instantiate a FileClient.
            See :class:`mmcv.fileio.FileClient` for details. Default: None.
        **eval_kwargs: Evaluation arguments fed into the evaluate function of
            the dataset.
    """

    def __init__(self,
                 dataloader,
                 start=None,
                 interval=1,
                 by_epoch=True,
                 save_best=None,
                 rule=None,
                 test_fn=None,
                 greater_keys=None,
                 less_keys=None,
                 broadcast_bn_buffer=True,
                 tmpdir=None,
                 gpu_collect=False,
                 out_dir=None,
                 file_client_args=None,
                 **eval_kwargs):

        if test_fn is None:
            from mmcv.engine import multi_gpu_test
            test_fn = multi_gpu_test

        super().__init__(
            dataloader,
            start=start,
            interval=interval,
            by_epoch=by_epoch,
            save_best=save_best,
            rule=rule,
            test_fn=test_fn,
            greater_keys=greater_keys,
            less_keys=less_keys,
            out_dir=out_dir,
            file_client_args=file_client_args,
            **eval_kwargs)

        self.broadcast_bn_buffer = broadcast_bn_buffer
        self.tmpdir = tmpdir
        self.gpu_collect = gpu_collect

    def _do_evaluate(self, runner):
        """perform evaluation and save ckpt."""
        # Synchronization of BatchNorm's buffer (running_mean
        # and running_var) is not supported in the DDP of pytorch,
        # which may cause the inconsistent performance of models in
        # different ranks, so we broadcast BatchNorm's buffers
        # of rank 0 to other ranks to avoid this.
        if self.broadcast_bn_buffer:
            model = runner.model
            for name, module in model.named_modules():
                if isinstance(module,
                              _BatchNorm) and module.track_running_stats:
                    dist.broadcast(module.running_var, 0)
                    dist.broadcast(module.running_mean, 0)

        tmpdir = self.tmpdir
        if tmpdir is None:
            tmpdir = osp.join(runner.work_dir, '.eval_hook')

        results = self.test_fn(
            runner.model,
            self.dataloader,
            tmpdir=tmpdir,
            gpu_collect=self.gpu_collect)
        if runner.rank == 0:
            print('\n')
            runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
            key_score = self.evaluate(runner, results)
            # the key_score may be `None` so it needs to skip the action to
            # save the best checkpoint
            if self.save_best and key_score:
                self._save_ckpt(runner, key_score)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/hook.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.utils import Registry, is_method_overridden

HOOKS = Registry('hook')


class Hook:
    stages = ('before_run', 'before_train_epoch', 'before_train_iter',
              'after_train_iter', 'after_train_epoch', 'before_val_epoch',
              'before_val_iter', 'after_val_iter', 'after_val_epoch',
              'after_run')

    def before_run(self, runner):
        pass

    def after_run(self, runner):
        pass

    def before_epoch(self, runner):
        pass

    def after_epoch(self, runner):
        pass

    def before_iter(self, runner):
        pass

    def after_iter(self, runner):
        pass

    def before_train_epoch(self, runner):
        self.before_epoch(runner)

    def before_val_epoch(self, runner):
        self.before_epoch(runner)

    def after_train_epoch(self, runner):
        self.after_epoch(runner)

    def after_val_epoch(self, runner):
        self.after_epoch(runner)

    def before_train_iter(self, runner):
        self.before_iter(runner)

    def before_val_iter(self, runner):
        self.before_iter(runner)

    def after_train_iter(self, runner):
        self.after_iter(runner)

    def after_val_iter(self, runner):
        self.after_iter(runner)

    def every_n_epochs(self, runner, n):
        return (runner.epoch + 1) % n == 0 if n > 0 else False

    def every_n_inner_iters(self, runner, n):
        return (runner.inner_iter + 1) % n == 0 if n > 0 else False

    def every_n_iters(self, runner, n):
        return (runner.iter + 1) % n == 0 if n > 0 else False

    def end_of_epoch(self, runner):
        return runner.inner_iter + 1 == len(runner.data_loader)

    def is_last_epoch(self, runner):
        return runner.epoch + 1 == runner._max_epochs

    def is_last_iter(self, runner):
        return runner.iter + 1 == runner._max_iters

    def get_triggered_stages(self):
        trigger_stages = set()
        for stage in Hook.stages:
            if is_method_overridden(stage, Hook, self):
                trigger_stages.add(stage)

        # some methods will be triggered in multi stages
        # use this dict to map method to stages.
        method_stages_map = {
            'before_epoch': ['before_train_epoch', 'before_val_epoch'],
            'after_epoch': ['after_train_epoch', 'after_val_epoch'],
            'before_iter': ['before_train_iter', 'before_val_iter'],
            'after_iter': ['after_train_iter', 'after_val_iter'],
        }

        for method, map_stages in method_stages_map.items():
            if is_method_overridden(method, Hook, self):
                trigger_stages.update(map_stages)

        return [stage for stage in Hook.stages if stage in trigger_stages]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/iter_timer.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import time

from .hook import HOOKS, Hook


@HOOKS.register_module()
class IterTimerHook(Hook):

    def before_epoch(self, runner):
        self.t = time.time()

    def before_iter(self, runner):
        runner.log_buffer.update(data_time=time.time() - self.t)#{'data_time': time.time() - self.t}

    def after_iter(self, runner):
        runner.log_buffer.update(time=time.time() - self.t)#{'time': time.time() - self.t}
        self.t = time.time()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .base import LoggerHook
from .dvclive import DvcliveLoggerHook
from .mlflow import MlflowLoggerHook
from .neptune import NeptuneLoggerHook
from .pavi import PaviLoggerHook
from .tensorboard import TensorboardLoggerHook
from .text import TextLoggerHook
from .wandb import WandbLoggerHook

__all__ = [
    'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook',
    'TensorboardLoggerHook', 'TextLoggerHook', 'WandbLoggerHook',
    'NeptuneLoggerHook', 'DvcliveLoggerHook'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/base.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import numbers
from abc import ABCMeta, abstractmethod

import numpy as np
import torch

from ..hook import Hook


class LoggerHook(Hook):
    """Base class for logger hooks.

    Args:
        interval (int): Logging interval (every k iterations). Default 10.
        ignore_last (bool): Ignore the log of last iterations in each epoch
            if less than `interval`. Default True.
        reset_flag (bool): Whether to clear the output buffer after logging.
            Default False.
        by_epoch (bool): Whether EpochBasedRunner is used. Default True.
    """

    __metaclass__ = ABCMeta

    def __init__(self,
                 interval=10,
                 ignore_last=True,
                 reset_flag=False,
                 by_epoch=True):
        self.interval = interval
        self.ignore_last = ignore_last
        self.reset_flag = reset_flag
        self.by_epoch = by_epoch

    @abstractmethod
    def log(self, runner):
        pass

    @staticmethod
    def is_scalar(val, include_np=True, include_torch=True):
        """Tell the input variable is a scalar or not.

        Args:
            val: Input variable.
            include_np (bool): Whether include 0-d np.ndarray as a scalar.
            include_torch (bool): Whether include 0-d torch.Tensor as a scalar.

        Returns:
            bool: True or False.
        """
        if isinstance(val, numbers.Number):
            return True
        elif include_np and isinstance(val, np.ndarray) and val.ndim == 0:
            return True
        elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1:
            return True
        else:
            return False

    def get_mode(self, runner):
        if runner.mode == 'train':
            if 'time' in runner.log_buffer.meters: #output
                mode = 'train'
            else:
                mode = 'val'
        elif runner.mode == 'val':
            mode = 'val'
        else:
            raise ValueError(f"runner mode should be 'train' or 'val', "
                             f'but got {runner.mode}')
        return mode

    def get_epoch(self, runner):
        if runner.mode == 'train':
            epoch = runner.epoch# + 1
        elif runner.mode == 'val':
            # normal val mode
            # runner.epoch += 1 has been done before val workflow
            epoch = runner.epoch
        else:
            raise ValueError(f"runner mode should be 'train' or 'val', "
                             f'but got {runner.mode}')
        return epoch

    def get_iter(self, runner, inner_iter=False):
        """Get the current training iteration step."""
        if self.by_epoch and inner_iter:
            current_iter = runner.inner_iter + 1
        else:
            current_iter = runner.iter + 1
        return current_iter

    def get_lr_tags(self, runner):
        tags = {}
        lrs = runner.current_lr()
        if isinstance(lrs, dict):
            for name, value in lrs.items():
                tags[f'learning_rate/{name}'] = value[0]
        else:
            tags['learning_rate'] = lrs[0]
        return tags

    def get_momentum_tags(self, runner):
        tags = {}
        momentums = runner.current_momentum()
        if isinstance(momentums, dict):
            for name, value in momentums.items():
                tags[f'momentum/{name}'] = value[0]
        else:
            tags['momentum'] = momentums[0]
        return tags

    def get_loggable_tags(self,
                          runner,
                          allow_scalar=True,
                          allow_text=False,
                          add_mode=True,
                          tags_to_skip=('time', 'data_time', 'learning_rate', 'pan2ms', 'grad_norm', 'lr', 'memory')):
        tags = {}
        for var, val in runner.metrics.items():#log_buffer.output
            if var in tags_to_skip:
                continue
            if self.is_scalar(val) and not allow_scalar:
                continue
            if isinstance(val, str) and not allow_text:
                continue
            if add_mode:
                var = f'{self.get_mode(runner)}/{var}'
            tags[var] = val
        tags.update(self.get_lr_tags(runner))
        tags.update(self.get_momentum_tags(runner))
        return tags

    def before_run(self, runner):
        for hook in runner.hooks[::-1]:
            if isinstance(hook, LoggerHook):
                hook.reset_flag = True
                break

    def before_epoch(self, runner):
        runner.log_buffer.clear()  # clear logs of last epoch

    def after_train_iter(self, runner):
        # if self.by_epoch and self.every_n_inner_iters(runner, self.interval):
        #     runner.log_buffer.average(self.interval)
        # elif not self.by_epoch and self.every_n_iters(runner, self.interval):
        #     runner.log_buffer.average(self.interval)
        # elif self.end_of_epoch(runner) and not self.ignore_last:
        #     # not precise but more stable
        #     runner.log_buffer.average(self.interval)

        # if runner.log_buffer.ready:
        #     self.log(runner)
        #     if self.reset_flag:
        #         runner.log_buffer.clear_output()

        if self.by_epoch and self.every_n_inner_iters(runner, self.interval):
            # runner.log_buffer.ready = True
            self.log(runner)
            # if self.reset_flag:
            #     runner.log_buffer.clear_output()


    def after_train_epoch(self, runner):
        # if runner.log_buffer.ready:
        if self.every_n_epochs(runner, self.interval):
            self.log(runner)
            if self.reset_flag:
                runner.log_buffer.clear_output()

    def after_val_epoch(self, runner):
        # runner.log_buffer.average()
        self.log(runner)
        if self.reset_flag:
            runner.log_buffer.clear_output()

    def after_val_iter(self, runner):
        self.log(runner)

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/dvclive.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from pathlib import Path

from ...dist_utils import master_only
from ..hook import HOOKS
from .base import LoggerHook


@HOOKS.register_module()
class DvcliveLoggerHook(LoggerHook):
    """Class to log metrics with dvclive.

    It requires `dvclive`_ to be installed.

    Args:
        model_file (str): Default None. If not None, after each epoch the
            model will be saved to {model_file}.
        interval (int): Logging interval (every k iterations). Default 10.
        ignore_last (bool): Ignore the log of last iterations in each epoch
            if less than `interval`. Default: True.
        reset_flag (bool): Whether to clear the output buffer after logging.
            Default: False.
        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.
        kwargs: Arguments for instantiating `Live`_.

    .. _dvclive:
        https://dvc.org/doc/dvclive

    .. _Live:
        https://dvc.org/doc/dvclive/api-reference/live#parameters
    """

    def __init__(self,
                 model_file=None,
                 interval=10,
                 ignore_last=True,
                 reset_flag=False,
                 by_epoch=True,
                 **kwargs):
        super().__init__(interval, ignore_last, reset_flag, by_epoch)
        self.model_file = model_file
        self.import_dvclive(**kwargs)

    def import_dvclive(self, **kwargs):
        try:
            from dvclive import Live
        except ImportError:
            raise ImportError(
                'Please run "pip install dvclive" to install dvclive')
        self.dvclive = Live(**kwargs)

    @master_only
    def log(self, runner):
        tags = self.get_loggable_tags(runner)
        if tags:
            self.dvclive.set_step(self.get_iter(runner))
            for k, v in tags.items():
                self.dvclive.log(k, v)

    @master_only
    def after_train_epoch(self, runner):
        super().after_train_epoch(runner)
        if self.model_file is not None:
            runner.save_checkpoint(
                Path(self.model_file).parent,
                filename_tmpl=Path(self.model_file).name,
                create_symlink=False,
            )


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/mlflow.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.utils import TORCH_VERSION
from ...dist_utils import master_only
from ..hook import HOOKS
from .base import LoggerHook


@HOOKS.register_module()
class MlflowLoggerHook(LoggerHook):
    """Class to log metrics and (optionally) a trained model to MLflow.

    It requires `MLflow`_ to be installed.

    Args:
        exp_name (str, optional): Name of the experiment to be used.
            Default None. If not None, set the active experiment.
            If experiment does not exist, an experiment with provided name
            will be created.
        tags (Dict[str], optional): Tags for the current run.
            Default None. If not None, set tags for the current run.
        log_model (bool, optional): Whether to log an MLflow artifact.
            Default True. If True, log runner.model as an MLflow artifact
            for the current run.
        interval (int): Logging interval (every k iterations). Default: 10.
        ignore_last (bool): Ignore the log of last iterations in each epoch
            if less than `interval`. Default: True.
        reset_flag (bool): Whether to clear the output buffer after logging.
            Default: False.
        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.

    .. _MLflow:
        https://www.mlflow.org/docs/latest/index.html
    """

    def __init__(self,
                 exp_name=None,
                 tags=None,
                 log_model=True,
                 interval=10,
                 ignore_last=True,
                 reset_flag=False,
                 by_epoch=True):
        super(MlflowLoggerHook, self).__init__(interval, ignore_last,
                                               reset_flag, by_epoch)
        self.import_mlflow()
        self.exp_name = exp_name
        self.tags = tags
        self.log_model = log_model

    def import_mlflow(self):
        try:
            import mlflow
            import mlflow.pytorch as mlflow_pytorch
        except ImportError:
            raise ImportError(
                'Please run "pip install mlflow" to install mlflow')
        self.mlflow = mlflow
        self.mlflow_pytorch = mlflow_pytorch

    @master_only
    def before_run(self, runner):
        super(MlflowLoggerHook, self).before_run(runner)
        if self.exp_name is not None:
            self.mlflow.set_experiment(self.exp_name)
        if self.tags is not None:
            self.mlflow.set_tags(self.tags)

    @master_only
    def log(self, runner):
        tags = self.get_loggable_tags(runner)
        if tags:
            self.mlflow.log_metrics(tags, step=self.get_iter(runner))

    @master_only
    def after_run(self, runner):
        if self.log_model:
            self.mlflow_pytorch.log_model(
                runner.model,
                'models',
                pip_requirements=[f'torch=={TORCH_VERSION}'])


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/neptune.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from ...dist_utils import master_only
from ..hook import HOOKS
from .base import LoggerHook


@HOOKS.register_module()
class NeptuneLoggerHook(LoggerHook):
    """Class to log metrics to NeptuneAI.

    It requires `Neptune`_ to be installed.

    Args:
        init_kwargs (dict): a dict contains the initialization keys as below:

            - project (str): Name of a project in a form of
              namespace/project_name. If None, the value of NEPTUNE_PROJECT
              environment variable will be taken.
            - api_token (str): User’s API token. If None, the value of
              NEPTUNE_API_TOKEN environment variable will be taken. Note: It is
              strongly recommended to use NEPTUNE_API_TOKEN environment
              variable rather than placing your API token in plain text in your
              source code.
            - name (str, optional, default is 'Untitled'): Editable name of the
              run. Name is displayed in the run's Details and in Runs table as
              a column.

            Check https://docs.neptune.ai/api-reference/neptune#init for more
            init arguments.
        interval (int): Logging interval (every k iterations). Default: 10.
        ignore_last (bool): Ignore the log of last iterations in each epoch
            if less than ``interval``. Default: True.
        reset_flag (bool): Whether to clear the output buffer after logging.
            Default: True.
        with_step (bool): If True, the step will be logged from
            ``self.get_iters``. Otherwise, step will not be logged.
            Default: True.
        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.

    .. _Neptune:
        https://docs.neptune.ai
    """

    def __init__(self,
                 init_kwargs=None,
                 interval=10,
                 ignore_last=True,
                 reset_flag=True,
                 with_step=True,
                 by_epoch=True):

        super(NeptuneLoggerHook, self).__init__(interval, ignore_last,
                                                reset_flag, by_epoch)
        self.import_neptune()
        self.init_kwargs = init_kwargs
        self.with_step = with_step

    def import_neptune(self):
        try:
            import neptune.new as neptune
        except ImportError:
            raise ImportError(
                'Please run "pip install neptune-client" to install neptune')
        self.neptune = neptune
        self.run = None

    @master_only
    def before_run(self, runner):
        if self.init_kwargs:
            self.run = self.neptune.init(**self.init_kwargs)
        else:
            self.run = self.neptune.init()

    @master_only
    def log(self, runner):
        tags = self.get_loggable_tags(runner)
        if tags:
            for tag_name, tag_value in tags.items():
                if self.with_step:
                    self.run[tag_name].log(
                        tag_value, step=self.get_iter(runner))
                else:
                    tags['global_step'] = self.get_iter(runner)
                    self.run[tag_name].log(tags)

    @master_only
    def after_run(self, runner):
        self.run.stop()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/pavi.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os
import os.path as osp

import torch
import yaml

import mmcv
from ....parallel.utils import is_module_wrapper
from ...dist_utils import master_only
from ..hook import HOOKS
from .base import LoggerHook


@HOOKS.register_module()
class PaviLoggerHook(LoggerHook):
    """Class to visual model, log metrics (for internal use).

    Args:
        init_kwargs (dict): A dict contains the initialization keys.
        add_graph (bool): Whether to visual model. Default: False.
        add_last_ckpt (bool): Whether to save checkpoint after run.
            Default: False.
        interval (int): Logging interval (every k iterations). Default: True.
        ignore_last (bool): Ignore the log of last iterations in each epoch
            if less than `interval`. Default: True.
        reset_flag (bool): Whether to clear the output buffer after logging.
            Default: False.
        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.
        img_key (string): Get image data from Dataset. Default: 'img_info'.
    """

    def __init__(self,
                 init_kwargs=None,
                 add_graph=False,
                 add_last_ckpt=False,
                 interval=10,
                 ignore_last=True,
                 reset_flag=False,
                 by_epoch=True,
                 img_key='img_info'):
        super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag,
                                             by_epoch)
        self.init_kwargs = init_kwargs
        self.add_graph = add_graph
        self.add_last_ckpt = add_last_ckpt
        self.img_key = img_key

    @master_only
    def before_run(self, runner):
        super(PaviLoggerHook, self).before_run(runner)
        try:
            from pavi import SummaryWriter
        except ImportError:
            raise ImportError('Please run "pip install pavi" to install pavi.')

        self.run_name = runner.work_dir.split('/')[-1]

        if not self.init_kwargs:
            self.init_kwargs = dict()
        self.init_kwargs['name'] = self.run_name
        self.init_kwargs['model'] = runner._model_name
        if runner.meta is not None:
            if 'config_dict' in runner.meta:
                config_dict = runner.meta['config_dict']
                assert isinstance(
                    config_dict,
                    dict), ('meta["config_dict"] has to be of a dict, '
                            f'but got {type(config_dict)}')
            elif 'config_file' in runner.meta:
                config_file = runner.meta['config_file']
                config_dict = dict(mmcv.Config.fromfile(config_file))
            else:
                config_dict = None
            if config_dict is not None:
                # 'max_.*iter' is parsed in pavi sdk as the maximum iterations
                #  to properly set up the progress bar.
                config_dict = config_dict.copy()
                config_dict.setdefault('max_iter', runner.max_iters)
                # non-serializable values are first converted in
                # mmcv.dump to json
                config_dict = json.loads(
                    mmcv.dump(config_dict, file_format='json'))
                session_text = yaml.dump(config_dict)
                self.init_kwargs['session_text'] = session_text
        self.writer = SummaryWriter(**self.init_kwargs)

    def get_step(self, runner):
        """Get the total training step/epoch."""
        if self.get_mode(runner) == 'val' and self.by_epoch:
            return self.get_epoch(runner)
        else:
            return self.get_iter(runner)

    @master_only
    def log(self, runner):
        tags = self.get_loggable_tags(runner, add_mode=False)
        if tags:
            self.writer.add_scalars(
                self.get_mode(runner), tags, self.get_step(runner))

    @master_only
    def after_run(self, runner):
        if self.add_last_ckpt:
            ckpt_path = osp.join(runner.work_dir, 'latest.pth')
            if osp.islink(ckpt_path):
                ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path))

            if osp.isfile(ckpt_path):
                # runner.epoch += 1 has been done before `after_run`.
                iteration = runner.epoch if self.by_epoch else runner.iter
                return self.writer.add_snapshot_file(
                    tag=self.run_name,
                    snapshot_file_path=ckpt_path,
                    iteration=iteration)

        # flush the buffer and send a task ending signal to Pavi
        self.writer.close()

    @master_only
    def before_epoch(self, runner):
        if runner.epoch == 0 and self.add_graph:
            if is_module_wrapper(runner.model):
                _model = runner.model.module
            else:
                _model = runner.model
            device = next(_model.parameters()).device
            data = next(iter(runner.data_loader))
            image = data[self.img_key][0:1].to(device)
            with torch.no_grad():
                self.writer.add_graph(_model, image)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/tensorboard.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp

from mmcv.utils import TORCH_VERSION, digit_version
from ...dist_utils import master_only
from ..hook import HOOKS
from .base import LoggerHook


@HOOKS.register_module()
class TensorboardLoggerHook(LoggerHook):
    """Class to log metrics to Tensorboard.

    Args:
        log_dir (string): Save directory location. Default: None. If default
            values are used, directory location is ``runner.work_dir``/tf_logs.
        interval (int): Logging interval (every k iterations). Default: True.
        ignore_last (bool): Ignore the log of last iterations in each epoch
            if less than `interval`. Default: True.
        reset_flag (bool): Whether to clear the output buffer after logging.
            Default: False.
        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.
    """

    def __init__(self,
                 log_dir=None,
                 interval=10,
                 ignore_last=True,
                 reset_flag=False,
                 by_epoch=True):
        super(TensorboardLoggerHook, self).__init__(interval, ignore_last,
                                                    reset_flag, by_epoch)
        self.log_dir = log_dir

    @master_only
    def before_run(self, runner):
        super(TensorboardLoggerHook, self).before_run(runner)
        if (TORCH_VERSION == 'parrots'
                or digit_version(TORCH_VERSION) < digit_version('1.1')):
            try:
                from tensorboardX import SummaryWriter
            except ImportError:
                raise ImportError('Please install tensorboardX to use '
                                  'TensorboardLoggerHook.')
        else:
            try:
                from torch.utils.tensorboard import SummaryWriter
            except ImportError:
                raise ImportError(
                    'Please run "pip install future tensorboard" to install '
                    'the dependencies to use torch.utils.tensorboard '
                    '(applicable to PyTorch 1.1 or higher)')

        if self.log_dir is None:
            self.log_dir = osp.join(runner.work_dir, 'tf_logs')
        self.writer = SummaryWriter(self.log_dir)

    @master_only
    def log(self, runner):
        tags = self.get_loggable_tags(runner, allow_text=True)
        for tag, val in tags.items():
            if isinstance(val, str):
                self.writer.add_text(tag, val, self.get_iter(runner))
            else:
                self.writer.add_scalar(tag, val, self.get_iter(runner))

    @master_only
    def after_run(self, runner):
        self.writer.close()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/text.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import datetime
import os
import os.path as osp
from collections import OrderedDict

import torch
import torch.distributed as dist

import mmcv
from mmcv.fileio.file_client import FileClient
from mmcv.utils import is_tuple_of, scandir
from ..hook import HOOKS
from .base import LoggerHook
from mmcv.utils.logging import print_log


@HOOKS.register_module()
class TextLoggerHook(LoggerHook):
    """Logger hook in text.

    In this logger hook, the information will be printed on terminal and
    saved in json file.

    Args:
        by_epoch (bool, optional): Whether EpochBasedRunner is used.
            Default: True.
        interval (int, optional): Logging interval (every k iterations).
            Default: 10.
        ignore_last (bool, optional): Ignore the log of last iterations in each
            epoch if less than :attr:`interval`. Default: True.
        reset_flag (bool, optional): Whether to clear the output buffer after
            logging. Default: False.
        interval_exp_name (int, optional): Logging interval for experiment
            name. This feature is to help users conveniently get the experiment
            information from screen or log file. Default: 1000.
        out_dir (str, optional): Logs are saved in ``runner.work_dir`` default.
            If ``out_dir`` is specified, logs will be copied to a new directory
            which is the concatenation of ``out_dir`` and the last level
            directory of ``runner.work_dir``. Default: None.
            `New in version 1.3.16.`
        out_suffix (str or tuple[str], optional): Those filenames ending with
            ``out_suffix`` will be copied to ``out_dir``.
            Default: ('.log.json', '.log', '.py').
            `New in version 1.3.16.`
        keep_local (bool, optional): Whether to keep local log when
            :attr:`out_dir` is specified. If False, the local log will be
            removed. Default: True.
            `New in version 1.3.16.`
        file_client_args (dict, optional): Arguments to instantiate a
            FileClient. See :class:`mmcv.fileio.FileClient` for details.
            Default: None.
            `New in version 1.3.16.`
    """

    def __init__(self,
                 by_epoch=True,
                 interval=10,
                 ignore_last=True,
                 reset_flag=False,
                 interval_exp_name=1000,
                 out_dir=None,
                 out_suffix=('.log.json', '.log', '.py'),
                 keep_local=True,
                 file_client_args=None):
        super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag,
                                             by_epoch)
        self.by_epoch = by_epoch
        self.time_sec_tot = 0
        self.interval_exp_name = interval_exp_name

        if out_dir is None and file_client_args is not None:
            raise ValueError(
                'file_client_args should be "None" when `out_dir` is not'
                'specified.')
        self.out_dir = out_dir

        if not (out_dir is None or isinstance(out_dir, str)
                or is_tuple_of(out_dir, str)):
            raise TypeError('out_dir should be  "None" or string or tuple of '
                            'string, but got {out_dir}')
        self.out_suffix = out_suffix

        self.keep_local = keep_local
        self.file_client_args = file_client_args
        if self.out_dir is not None:
            self.file_client = FileClient.infer_client(file_client_args,
                                                       self.out_dir)

    def before_run(self, runner):
        super(TextLoggerHook, self).before_run(runner)

        if self.out_dir is not None:
            self.file_client = FileClient.infer_client(self.file_client_args,
                                                       self.out_dir)
            # The final `self.out_dir` is the concatenation of `self.out_dir`
            # and the last level directory of `runner.work_dir`
            basename = osp.basename(runner.work_dir.rstrip(osp.sep))
            self.out_dir = self.file_client.join_path(self.out_dir, basename)
            print_log(
                (f'Text logs will be saved to {self.out_dir} by '
                 f'{self.file_client.name} after the training process.'), logger=runner.logger)

        self.start_iter = runner.iter
        self.data_length = runner.data_length
        self.max_epochs = runner.max_epochs
        self.json_log_path = osp.join(runner.work_dir,
                                      f'{runner.timestamp}.log.json')
        if runner.meta is not None:
            self._dump_log(runner.meta, runner)

    def _get_max_memory(self, runner):
        device = getattr(runner.model, 'output_device', None)
        mem = torch.cuda.max_memory_allocated(device=device)
        mem_mb = torch.tensor([mem / (1024 * 1024)],
                              dtype=torch.int,
                              device=device)
        if runner.world_size > 1:
            dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX)
        return mem_mb.item()

    def _log_info(self, log_dict, runner):
        # print exp name for users to distinguish experiments
        # at every ``interval_exp_name`` iterations and the end of each epoch
        if runner.meta is not None and 'exp_name' in runner.meta:
            if (self.every_n_iters(runner, self.interval_exp_name)) or (
                    self.by_epoch and self.end_of_epoch(runner)):
                exp_info = f'Exp name: {runner.meta["exp_name"]}'
                print_log(exp_info, logger=runner.logger)

        if log_dict['mode'] == 'train':
            if isinstance(log_dict['lr'], dict):
                lr_str = []
                for k, val in log_dict['lr'].items():
                    lr_str.append(f'lr_{k}: {val:.3e}')
                lr_str = ' '.join(lr_str)
            else:
                lr_str = f'lr: {log_dict["lr"]:.3e}'

            # by epoch: Epoch [4][100/1000]
            # by iter:  Iter [100/100000]
            if self.by_epoch:
                log_str = f'Epoch [{log_dict["epoch"]}]/[{self.max_epochs}]' \
                          f'[{log_dict["iter"]}/{self.data_length}]\t'
            else:
                log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t'
            log_str += f'{lr_str}, '

            if 'time' in log_dict.keys():
                self.time_sec_tot += (log_dict['time'] * self.interval)
                time_sec_avg = self.time_sec_tot / (
                    runner.iter - self.start_iter + 1) #
                eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1)
                eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
                log_str += f'eta: {eta_str}, '
                log_str += f'time: {log_dict["time"]:.3f}, ' \
                           f'data_time: {log_dict["data_time"]:.3f}, '
                # statistic memory
                if torch.cuda.is_available():
                    log_str += f'memory: {log_dict["memory"]}MB, '
        else:
            # val/test time
            # here 1000 is the length of the val dataloader
            # by epoch: Epoch[val] [4][1000]
            # by iter: Iter[val] [1000]
            if self.by_epoch:
                log_str = f'Epoch({log_dict["mode"]}) ' \
                    f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t'
            else:
                log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t'

        log_items = []
        for name, val in log_dict.items():
            # TODO: resolve this hack
            # these items have been in log_str
            if name in [
                    'mode', 'Epoch', 'iter', 'lr', 'time', 'data_time',
                    'memory', 'epoch'
            ]:
                continue
            if isinstance(val, float):
                val = f'{val:.5f}'
            log_items.append(f'{name}: {val}')
        log_str += ', '.join(log_items)
        print_log(log_str, logger=runner.logger)

    def _dump_log(self, log_dict, runner):
        # dump log in json format
        json_log = OrderedDict()
        for k, v in log_dict.items():
            json_log[k] = self._round_float(v)
        # only append log at last line
        if runner.rank == 0:
            with open(self.json_log_path, 'a+') as f:
                mmcv.dump(json_log, f, file_format='json')
                f.write('\n')

    def _round_float(self, items):
        if isinstance(items, list):
            return [self._round_float(item) for item in items]
        elif isinstance(items, float):
            return round(items, 5)
        else:
            return items

    def log(self, runner):
        if 'eval_iter_num' in runner.log_buffer.meters: #output
            # this doesn't modify runner.iter and is regardless of by_epoch
            cur_iter = runner.log_buffer.meters.pop('eval_iter_num') #output
        else:
            cur_iter = self.get_iter(runner, inner_iter=True)

        log_dict = OrderedDict(
            mode=self.get_mode(runner),
            epoch=self.get_epoch(runner),
            iter=cur_iter)

        # only record lr of the first param group
        cur_lr = runner.current_lr()
        if isinstance(cur_lr, list):
            log_dict['lr'] = cur_lr[0]
        else:
            assert isinstance(cur_lr, dict)
            log_dict['lr'] = {}
            for k, lr_ in cur_lr.items():
                assert isinstance(lr_, list)
                log_dict['lr'].update({k: lr_[0]})

        if 'time' in runner.log_buffer.meters:#output
            # statistic memory
            if torch.cuda.is_available():
                log_dict['memory'] = self._get_max_memory(runner)

        runner.metrics = {k: meter.avg for k, meter in runner.log_buffer.meters.items()}
        log_dict = dict(log_dict, **runner.metrics) #output

        self._log_info(log_dict, runner)
        self._dump_log(log_dict, runner)
        return log_dict

    def after_run(self, runner):
        # copy or upload logs to self.out_dir
        if self.out_dir is not None:
            for filename in scandir(runner.work_dir, self.out_suffix, True):
                local_filepath = osp.join(runner.work_dir, filename)
                out_filepath = self.file_client.join_path(
                    self.out_dir, filename)
                with open(local_filepath, 'r') as f:
                    self.file_client.put_text(f.read(), out_filepath)

                print_log(
                    (f'The file {local_filepath} has been uploaded to '
                     f'{out_filepath}.'), logger=runner.logger)

                if not self.keep_local:
                    os.remove(local_filepath)
                    print_log(
                        (f'{local_filepath} was removed due to the '
                         '`self.keep_local=False`'), logger=runner.logger)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/logger/wandb.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp

from mmcv.utils import scandir
from ...dist_utils import master_only
from ..hook import HOOKS
from .base import LoggerHook


@HOOKS.register_module()
class WandbLoggerHook(LoggerHook):
    """Class to log metrics with wandb.

    It requires `wandb`_ to be installed.


    Args:
        init_kwargs (dict): A dict contains the initialization keys. Check
            https://docs.wandb.ai/ref/python/init for more init arguments.
        interval (int): Logging interval (every k iterations).
            Default 10.
        ignore_last (bool): Ignore the log of last iterations in each epoch
            if less than `interval`.
            Default: True.
        reset_flag (bool): Whether to clear the output buffer after logging.
            Default: False.
        commit (bool): Save the metrics dict to the wandb server and increment
            the step. If false ``wandb.log`` just updates the current metrics
            dict with the row argument and metrics won't be saved until
            ``wandb.log`` is called with ``commit=True``.
            Default: True.
        by_epoch (bool): Whether EpochBasedRunner is used.
            Default: True.
        with_step (bool): If True, the step will be logged from
            ``self.get_iters``. Otherwise, step will not be logged.
            Default: True.
        log_artifact (bool): If True, artifacts in {work_dir} will be uploaded
            to wandb after training ends.
            Default: True
            `New in version 1.4.3.`
        out_suffix (str or tuple[str], optional): Those filenames ending with
            ``out_suffix`` will be uploaded to wandb.
            Default: ('.log.json', '.log', '.py').
            `New in version 1.4.3.`

    .. _wandb:
        https://docs.wandb.ai
    """

    def __init__(self,
                 init_kwargs=None,
                 interval=10,
                 ignore_last=True,
                 reset_flag=False,
                 commit=True,
                 by_epoch=True,
                 with_step=True,
                 log_artifact=True,
                 out_suffix=('.log.json', '.log', '.py')):
        super(WandbLoggerHook, self).__init__(interval, ignore_last,
                                              reset_flag, by_epoch)
        self.import_wandb()
        self.init_kwargs = init_kwargs
        self.commit = commit
        self.with_step = with_step
        self.log_artifact = log_artifact
        self.out_suffix = out_suffix

    def import_wandb(self):
        try:
            import wandb
        except ImportError:
            raise ImportError(
                'Please run "pip install wandb" to install wandb')
        self.wandb = wandb

    @master_only
    def before_run(self, runner):
        super(WandbLoggerHook, self).before_run(runner)
        if self.wandb is None:
            self.import_wandb()
        if self.init_kwargs:
            self.wandb.init(**self.init_kwargs)
        else:
            self.wandb.init()

    @master_only
    def log(self, runner):
        tags = self.get_loggable_tags(runner)
        if tags:
            if self.with_step:
                self.wandb.log(
                    tags, step=self.get_iter(runner), commit=self.commit)
            else:
                tags['global_step'] = self.get_iter(runner)
                self.wandb.log(tags, commit=self.commit)

    @master_only
    def after_run(self, runner):
        if self.log_artifact:
            wandb_artifact = self.wandb.Artifact(
                name='artifacts', type='model')
            for filename in scandir(runner.work_dir, self.out_suffix, True):
                local_filepath = osp.join(runner.work_dir, filename)
                wandb_artifact.add_file(local_filepath)
            self.wandb.log_artifact(wandb_artifact)
        self.wandb.join()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/lr_updater.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import numbers
from math import cos, pi

import mmcv
from .hook import HOOKS, Hook


class LrUpdaterHook(Hook):
    """LR Scheduler in MMCV.

    Args:
        by_epoch (bool): LR changes epoch by epoch
        warmup (string): Type of warmup used. It can be None(use no warmup),
            'constant', 'linear' or 'exp'
        warmup_iters (int): The number of iterations or epochs that warmup
            lasts
        warmup_ratio (float): LR used at the beginning of warmup equals to
            warmup_ratio * initial_lr
        warmup_by_epoch (bool): When warmup_by_epoch == True, warmup_iters
            means the number of epochs that warmup lasts, otherwise means the
            number of iteration that warmup lasts
    """

    def __init__(self,
                 by_epoch=True,
                 warmup=None,
                 warmup_iters=0,
                 warmup_ratio=0.1,
                 warmup_by_epoch=False):
        # validate the "warmup" argument
        if warmup is not None:
            if warmup not in ['constant', 'linear', 'exp']:
                raise ValueError(
                    f'"{warmup}" is not a supported type for warming up, valid'
                    ' types are "constant" and "linear"')
        if warmup is not None:
            assert warmup_iters > 0, \
                '"warmup_iters" must be a positive integer'
            assert 0 < warmup_ratio <= 1.0, \
                '"warmup_ratio" must be in range (0,1]'

        self.by_epoch = by_epoch
        self.warmup = warmup
        self.warmup_iters = warmup_iters
        self.warmup_ratio = warmup_ratio
        self.warmup_by_epoch = warmup_by_epoch

        if self.warmup_by_epoch:
            self.warmup_epochs = self.warmup_iters
            self.warmup_iters = None
        else:
            self.warmup_epochs = None

        self.base_lr = []  # initial lr for all param groups
        self.regular_lr = []  # expected lr if no warming up is performed

    def _set_lr(self, runner, lr_groups):
        if isinstance(runner.optimizer, dict):
            for k, optim in runner.optimizer.items():
                for param_group, lr in zip(optim.param_groups, lr_groups[k]):
                    param_group['lr'] = lr
        else:
            for param_group, lr in zip(runner.optimizer.param_groups,
                                       lr_groups):
                param_group['lr'] = lr

    def get_lr(self, runner, base_lr):
        raise NotImplementedError

    def get_regular_lr(self, runner):
        if isinstance(runner.optimizer, dict):
            lr_groups = {}
            for k in runner.optimizer.keys():
                _lr_group = [
                    self.get_lr(runner, _base_lr)
                    for _base_lr in self.base_lr[k]
                ]
                lr_groups.update({k: _lr_group})

            return lr_groups
        else:
            return [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr]

    def get_warmup_lr(self, cur_iters):

        def _get_warmup_lr(cur_iters, regular_lr):
            if self.warmup == 'constant':
                warmup_lr = [_lr * self.warmup_ratio for _lr in regular_lr]
            elif self.warmup == 'linear':
                k = (1 - cur_iters / self.warmup_iters) * (1 -
                                                           self.warmup_ratio)
                warmup_lr = [_lr * (1 - k) for _lr in regular_lr]
            elif self.warmup == 'exp':
                k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters)
                warmup_lr = [_lr * k for _lr in regular_lr]
            return warmup_lr

        if isinstance(self.regular_lr, dict):
            lr_groups = {}
            for key, regular_lr in self.regular_lr.items():
                lr_groups[key] = _get_warmup_lr(cur_iters, regular_lr)
            return lr_groups
        else:
            return _get_warmup_lr(cur_iters, self.regular_lr)

    def before_run(self, runner):
        # NOTE: when resuming from a checkpoint, if 'initial_lr' is not saved,
        # it will be set according to the optimizer params
        if isinstance(runner.optimizer, dict):
            self.base_lr = {}
            for k, optim in runner.optimizer.items():
                for group in optim.param_groups:
                    group.setdefault('initial_lr', group['lr'])
                _base_lr = [
                    group['initial_lr'] for group in optim.param_groups
                ]
                self.base_lr.update({k: _base_lr})
        else:
            for group in runner.optimizer.param_groups:
                group.setdefault('initial_lr', group['lr'])
            self.base_lr = [
                group['initial_lr'] for group in runner.optimizer.param_groups
            ]

    def before_train_epoch(self, runner):
        if self.warmup_iters is None:
            epoch_len = len(runner.data_loader)
            self.warmup_iters = self.warmup_epochs * epoch_len

        if not self.by_epoch:
            return

        self.regular_lr = self.get_regular_lr(runner)
        self._set_lr(runner, self.regular_lr)

    def before_train_iter(self, runner):
        cur_iter = runner.iter
        if not self.by_epoch:
            self.regular_lr = self.get_regular_lr(runner)
            if self.warmup is None or cur_iter >= self.warmup_iters:
                self._set_lr(runner, self.regular_lr)
            else:
                warmup_lr = self.get_warmup_lr(cur_iter)
                self._set_lr(runner, warmup_lr)
        elif self.by_epoch:
            if self.warmup is None or cur_iter > self.warmup_iters:
                return
            elif cur_iter == self.warmup_iters:
                self._set_lr(runner, self.regular_lr)
            else:
                warmup_lr = self.get_warmup_lr(cur_iter)
                self._set_lr(runner, warmup_lr)


@HOOKS.register_module()
class FixedLrUpdaterHook(LrUpdaterHook):

    def __init__(self, **kwargs):
        super(FixedLrUpdaterHook, self).__init__(**kwargs)

    def get_lr(self, runner, base_lr):
        return base_lr


@HOOKS.register_module()
class StepLrUpdaterHook(LrUpdaterHook):
    """Step LR scheduler with min_lr clipping.

    Args:
        step (int | list[int]): Step to decay the LR. If an int value is given,
            regard it as the decay interval. If a list is given, decay LR at
            these steps.
        gamma (float, optional): Decay LR ratio. Default: 0.1.
        min_lr (float, optional): Minimum LR value to keep. If LR after decay
            is lower than `min_lr`, it will be clipped to this value. If None
            is given, we don't perform lr clipping. Default: None.
    """

    def __init__(self, step, gamma=0.1, min_lr=None, **kwargs):
        if isinstance(step, list):
            assert mmcv.is_list_of(step, int)
            assert all([s > 0 for s in step])
        elif isinstance(step, int):
            assert step > 0
        else:
            raise TypeError('"step" must be a list or integer')
        self.step = step
        self.gamma = gamma
        self.min_lr = min_lr
        super(StepLrUpdaterHook, self).__init__(**kwargs)

    def get_lr(self, runner, base_lr):
        progress = runner.epoch if self.by_epoch else runner.iter

        # calculate exponential term
        if isinstance(self.step, int):
            exp = progress // self.step
        else:
            exp = len(self.step)
            for i, s in enumerate(self.step):
                if progress < s:
                    exp = i
                    break

        lr = base_lr * (self.gamma**exp)
        if self.min_lr is not None:
            # clip to a minimum value
            lr = max(lr, self.min_lr)
        return lr


@HOOKS.register_module()
class ExpLrUpdaterHook(LrUpdaterHook):

    def __init__(self, gamma, **kwargs):
        self.gamma = gamma
        super(ExpLrUpdaterHook, self).__init__(**kwargs)

    def get_lr(self, runner, base_lr):
        progress = runner.epoch if self.by_epoch else runner.iter
        return base_lr * self.gamma**progress


@HOOKS.register_module()
class PolyLrUpdaterHook(LrUpdaterHook):

    def __init__(self, power=1., min_lr=0., **kwargs):
        self.power = power
        self.min_lr = min_lr
        super(PolyLrUpdaterHook, self).__init__(**kwargs)

    def get_lr(self, runner, base_lr):
        if self.by_epoch:
            progress = runner.epoch
            max_progress = runner.max_epochs
        else:
            progress = runner.iter
            max_progress = runner.max_iters
        coeff = (1 - progress / max_progress)**self.power
        return (base_lr - self.min_lr) * coeff + self.min_lr


@HOOKS.register_module()
class InvLrUpdaterHook(LrUpdaterHook):

    def __init__(self, gamma, power=1., **kwargs):
        self.gamma = gamma
        self.power = power
        super(InvLrUpdaterHook, self).__init__(**kwargs)

    def get_lr(self, runner, base_lr):
        progress = runner.epoch if self.by_epoch else runner.iter
        return base_lr * (1 + self.gamma * progress)**(-self.power)


@HOOKS.register_module()
class CosineAnnealingLrUpdaterHook(LrUpdaterHook):

    def __init__(self, min_lr=None, min_lr_ratio=None, **kwargs):
        assert (min_lr is None) ^ (min_lr_ratio is None)
        self.min_lr = min_lr
        self.min_lr_ratio = min_lr_ratio
        super(CosineAnnealingLrUpdaterHook, self).__init__(**kwargs)

    def get_lr(self, runner, base_lr):
        if self.by_epoch:
            progress = runner.epoch
            max_progress = runner.max_epochs
        else:
            progress = runner.iter
            max_progress = runner.max_iters

        if self.min_lr_ratio is not None:
            target_lr = base_lr * self.min_lr_ratio
        else:
            target_lr = self.min_lr
        return annealing_cos(base_lr, target_lr, progress / max_progress)


@HOOKS.register_module()
class FlatCosineAnnealingLrUpdaterHook(LrUpdaterHook):
    """Flat + Cosine lr schedule.

    Modified from https://github.com/fastai/fastai/blob/master/fastai/callback/schedule.py#L128 # noqa: E501

    Args:
        start_percent (float): When to start annealing the learning rate
            after the percentage of the total training steps.
            The value should be in range [0, 1).
            Default: 0.75
        min_lr (float, optional): The minimum lr. Default: None.
        min_lr_ratio (float, optional): The ratio of minimum lr to the base lr.
            Either `min_lr` or `min_lr_ratio` should be specified.
            Default: None.
    """

    def __init__(self,
                 start_percent=0.75,
                 min_lr=None,
                 min_lr_ratio=None,
                 **kwargs):
        assert (min_lr is None) ^ (min_lr_ratio is None)
        if start_percent < 0 or start_percent > 1 or not isinstance(
                start_percent, float):
            raise ValueError(
                'expected float between 0 and 1 start_percent, but '
                f'got {start_percent}')
        self.start_percent = start_percent
        self.min_lr = min_lr
        self.min_lr_ratio = min_lr_ratio
        super(FlatCosineAnnealingLrUpdaterHook, self).__init__(**kwargs)

    def get_lr(self, runner, base_lr):
        if self.by_epoch:
            start = round(runner.max_epochs * self.start_percent)
            progress = runner.epoch - start
            max_progress = runner.max_epochs - start
        else:
            start = round(runner.max_iters * self.start_percent)
            progress = runner.iter - start
            max_progress = runner.max_iters - start

        if self.min_lr_ratio is not None:
            target_lr = base_lr * self.min_lr_ratio
        else:
            target_lr = self.min_lr

        if progress < 0:
            return base_lr
        else:
            return annealing_cos(base_lr, target_lr, progress / max_progress)


@HOOKS.register_module()
class CosineRestartLrUpdaterHook(LrUpdaterHook):
    """Cosine annealing with restarts learning rate scheme.

    Args:
        periods (list[int]): Periods for each cosine anneling cycle.
        restart_weights (list[float], optional): Restart weights at each
            restart iteration. Default: [1].
        min_lr (float, optional): The minimum lr. Default: None.
        min_lr_ratio (float, optional): The ratio of minimum lr to the base lr.
            Either `min_lr` or `min_lr_ratio` should be specified.
            Default: None.
    """

    def __init__(self,
                 periods,
                 restart_weights=[1],
                 min_lr=None,
                 min_lr_ratio=None,
                 **kwargs):
        assert (min_lr is None) ^ (min_lr_ratio is None)
        self.periods = periods
        self.min_lr = min_lr
        self.min_lr_ratio = min_lr_ratio
        self.restart_weights = restart_weights
        assert (len(self.periods) == len(self.restart_weights)
                ), 'periods and restart_weights should have the same length.'
        super(CosineRestartLrUpdaterHook, self).__init__(**kwargs)

        self.cumulative_periods = [
            sum(self.periods[0:i + 1]) for i in range(0, len(self.periods))
        ]

    def get_lr(self, runner, base_lr):
        if self.by_epoch:
            progress = runner.epoch
        else:
            progress = runner.iter

        if self.min_lr_ratio is not None:
            target_lr = base_lr * self.min_lr_ratio
        else:
            target_lr = self.min_lr

        idx = get_position_from_periods(progress, self.cumulative_periods)
        current_weight = self.restart_weights[idx]
        nearest_restart = 0 if idx == 0 else self.cumulative_periods[idx - 1]
        current_periods = self.periods[idx]

        alpha = min((progress - nearest_restart) / current_periods, 1)
        return annealing_cos(base_lr, target_lr, alpha, current_weight)


def get_position_from_periods(iteration, cumulative_periods):
    """Get the position from a period list.

    It will return the index of the right-closest number in the period list.
    For example, the cumulative_periods = [100, 200, 300, 400],
    if iteration == 50, return 0;
    if iteration == 210, return 2;
    if iteration == 300, return 3.

    Args:
        iteration (int): Current iteration.
        cumulative_periods (list[int]): Cumulative period list.

    Returns:
        int: The position of the right-closest number in the period list.
    """
    for i, period in enumerate(cumulative_periods):
        if iteration < period:
            return i
    raise ValueError(f'Current iteration {iteration} exceeds '
                     f'cumulative_periods {cumulative_periods}')


@HOOKS.register_module()
class CyclicLrUpdaterHook(LrUpdaterHook):
    """Cyclic LR Scheduler.

    Implement the cyclical learning rate policy (CLR) described in
    https://arxiv.org/pdf/1506.01186.pdf

    Different from the original paper, we use cosine annealing rather than
    triangular policy inside a cycle. This improves the performance in the
    3D detection area.

    Args:
        by_epoch (bool, optional): Whether to update LR by epoch.
        target_ratio (tuple[float], optional): Relative ratio of the highest LR
            and the lowest LR to the initial LR.
        cyclic_times (int, optional): Number of cycles during training
        step_ratio_up (float, optional): The ratio of the increasing process of
            LR in the total cycle.
        anneal_strategy (str, optional): {'cos', 'linear'}
            Specifies the annealing strategy: 'cos' for cosine annealing,
            'linear' for linear annealing. Default: 'cos'.
        gamma (float, optional): Cycle decay ratio. Default: 1.
            It takes values in the range (0, 1]. The difference between the
            maximum learning rate and the minimum learning rate decreases
            periodically when it is less than 1. `New in version 1.4.4.`
    """

    def __init__(self,
                 by_epoch=False,
                 target_ratio=(10, 1e-4),
                 cyclic_times=1,
                 step_ratio_up=0.4,
                 anneal_strategy='cos',
                 gamma=1,
                 **kwargs):
        if isinstance(target_ratio, float):
            target_ratio = (target_ratio, target_ratio / 1e5)
        elif isinstance(target_ratio, tuple):
            target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \
                if len(target_ratio) == 1 else target_ratio
        else:
            raise ValueError('target_ratio should be either float '
                             f'or tuple, got {type(target_ratio)}')

        assert len(target_ratio) == 2, \
            '"target_ratio" must be list or tuple of two floats'
        assert 0 <= step_ratio_up < 1.0, \
            '"step_ratio_up" must be in range [0,1)'
        assert 0 < gamma <= 1, \
            '"gamma" must be in range (0, 1]'

        self.target_ratio = target_ratio
        self.cyclic_times = cyclic_times
        self.step_ratio_up = step_ratio_up
        self.gamma = gamma
        self.max_iter_per_phase = None
        self.lr_phases = []  # init lr_phases
        # validate anneal_strategy
        if anneal_strategy not in ['cos', 'linear']:
            raise ValueError('anneal_strategy must be one of "cos" or '
                             f'"linear", instead got {anneal_strategy}')
        elif anneal_strategy == 'cos':
            self.anneal_func = annealing_cos
        elif anneal_strategy == 'linear':
            self.anneal_func = annealing_linear

        assert not by_epoch, \
            'currently only support "by_epoch" = False'
        super(CyclicLrUpdaterHook, self).__init__(by_epoch, **kwargs)

    def before_run(self, runner):
        super(CyclicLrUpdaterHook, self).before_run(runner)
        # initiate lr_phases
        # total lr_phases are separated as up and down
        self.max_iter_per_phase = runner.max_iters // self.cyclic_times
        iter_up_phase = int(self.step_ratio_up * self.max_iter_per_phase)
        self.lr_phases.append([0, iter_up_phase, 1, self.target_ratio[0]])
        self.lr_phases.append([
            iter_up_phase, self.max_iter_per_phase, self.target_ratio[0],
            self.target_ratio[1]
        ])

    def get_lr(self, runner, base_lr):
        curr_iter = runner.iter % self.max_iter_per_phase
        curr_cycle = runner.iter // self.max_iter_per_phase
        # Update weight decay
        scale = self.gamma**curr_cycle

        for (start_iter, end_iter, start_ratio, end_ratio) in self.lr_phases:
            if start_iter <= curr_iter < end_iter:
                # Apply cycle scaling to gradually reduce the difference
                # between max_lr and base lr. The target end_ratio can be
                # expressed as:
                # end_ratio = (base_lr + scale * (max_lr - base_lr)) / base_lr
                # iteration: 0-iter_up_phase:
                if start_iter == 0:
                    end_ratio = 1 - scale + end_ratio * scale
                # iteration: iter_up_phase-self.max_iter_per_phase
                else:
                    start_ratio = 1 - scale + start_ratio * scale
                progress = curr_iter - start_iter
                return self.anneal_func(base_lr * start_ratio,
                                        base_lr * end_ratio,
                                        progress / (end_iter - start_iter))


@HOOKS.register_module()
class OneCycleLrUpdaterHook(LrUpdaterHook):
    """One Cycle LR Scheduler.

    The 1cycle learning rate policy changes the learning rate after every
    batch. The one cycle learning rate policy is described in
    https://arxiv.org/pdf/1708.07120.pdf

    Args:
        max_lr (float or list): Upper learning rate boundaries in the cycle
            for each parameter group.
        total_steps (int, optional): The total number of steps in the cycle.
            Note that if a value is not provided here, it will be the max_iter
            of runner. Default: None.
        pct_start (float): The percentage of the cycle (in number of steps)
            spent increasing the learning rate.
            Default: 0.3
        anneal_strategy (str): {'cos', 'linear'}
            Specifies the annealing strategy: 'cos' for cosine annealing,
            'linear' for linear annealing.
            Default: 'cos'
        div_factor (float): Determines the initial learning rate via
            initial_lr = max_lr/div_factor
            Default: 25
        final_div_factor (float): Determines the minimum learning rate via
            min_lr = initial_lr/final_div_factor
            Default: 1e4
        three_phase (bool): If three_phase is True, use a third phase of the
            schedule to annihilate the learning rate according to
            final_div_factor instead of modifying the second phase (the first
            two phases will be symmetrical about the step indicated by
            pct_start).
            Default: False
    """

    def __init__(self,
                 max_lr,
                 total_steps=None,
                 pct_start=0.3,
                 anneal_strategy='cos',
                 div_factor=25,
                 final_div_factor=1e4,
                 three_phase=False,
                 **kwargs):
        # validate by_epoch, currently only support by_epoch = False
        if 'by_epoch' not in kwargs:
            kwargs['by_epoch'] = False
        else:
            assert not kwargs['by_epoch'], \
                'currently only support "by_epoch" = False'
        if not isinstance(max_lr, (numbers.Number, list, dict)):
            raise ValueError('the type of max_lr must be the one of list or '
                             f'dict, but got {type(max_lr)}')
        self._max_lr = max_lr
        if total_steps is not None:
            if not isinstance(total_steps, int):
                raise ValueError('the type of total_steps must be int, but'
                                 f'got {type(total_steps)}')
            self.total_steps = total_steps
        # validate pct_start
        if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float):
            raise ValueError('expected float between 0 and 1 pct_start, but '
                             f'got {pct_start}')
        self.pct_start = pct_start
        # validate anneal_strategy
        if anneal_strategy not in ['cos', 'linear']:
            raise ValueError('anneal_strategy must be one of "cos" or '
                             f'"linear", instead got {anneal_strategy}')
        elif anneal_strategy == 'cos':
            self.anneal_func = annealing_cos
        elif anneal_strategy == 'linear':
            self.anneal_func = annealing_linear
        self.div_factor = div_factor
        self.final_div_factor = final_div_factor
        self.three_phase = three_phase
        self.lr_phases = []  # init lr_phases
        super(OneCycleLrUpdaterHook, self).__init__(**kwargs)

    def before_run(self, runner):
        if hasattr(self, 'total_steps'):
            total_steps = self.total_steps
        else:
            total_steps = runner.max_iters
        if total_steps < runner.max_iters:
            raise ValueError(
                'The total steps must be greater than or equal to max '
                f'iterations {runner.max_iters} of runner, but total steps '
                f'is {total_steps}.')

        if isinstance(runner.optimizer, dict):
            self.base_lr = {}
            for k, optim in runner.optimizer.items():
                _max_lr = format_param(k, optim, self._max_lr)
                self.base_lr[k] = [lr / self.div_factor for lr in _max_lr]
                for group, lr in zip(optim.param_groups, self.base_lr[k]):
                    group.setdefault('initial_lr', lr)
        else:
            k = type(runner.optimizer).__name__
            _max_lr = format_param(k, runner.optimizer, self._max_lr)
            self.base_lr = [lr / self.div_factor for lr in _max_lr]
            for group, lr in zip(runner.optimizer.param_groups, self.base_lr):
                group.setdefault('initial_lr', lr)

        if self.three_phase:
            self.lr_phases.append(
                [float(self.pct_start * total_steps) - 1, 1, self.div_factor])
            self.lr_phases.append([
                float(2 * self.pct_start * total_steps) - 2, self.div_factor, 1
            ])
            self.lr_phases.append(
                [total_steps - 1, 1, 1 / self.final_div_factor])
        else:
            self.lr_phases.append(
                [float(self.pct_start * total_steps) - 1, 1, self.div_factor])
            self.lr_phases.append(
                [total_steps - 1, self.div_factor, 1 / self.final_div_factor])

    def get_lr(self, runner, base_lr):
        curr_iter = runner.iter
        start_iter = 0
        for i, (end_iter, start_lr, end_lr) in enumerate(self.lr_phases):
            if curr_iter <= end_iter:
                pct = (curr_iter - start_iter) / (end_iter - start_iter)
                lr = self.anneal_func(base_lr * start_lr, base_lr * end_lr,
                                      pct)
                break
            start_iter = end_iter
        return lr


def annealing_cos(start, end, factor, weight=1):
    """Calculate annealing cos learning rate.

    Cosine anneal from `weight * start + (1 - weight) * end` to `end` as
    percentage goes from 0.0 to 1.0.

    Args:
        start (float): The starting learning rate of the cosine annealing.
        end (float): The ending learing rate of the cosine annealing.
        factor (float): The coefficient of `pi` when calculating the current
            percentage. Range from 0.0 to 1.0.
        weight (float, optional): The combination factor of `start` and `end`
            when calculating the actual starting learning rate. Default to 1.
    """
    cos_out = cos(pi * factor) + 1
    return end + 0.5 * weight * (start - end) * cos_out


def annealing_linear(start, end, factor):
    """Calculate annealing linear learning rate.

    Linear anneal from `start` to `end` as percentage goes from 0.0 to 1.0.

    Args:
        start (float): The starting learning rate of the linear annealing.
        end (float): The ending learing rate of the linear annealing.
        factor (float): The coefficient of `pi` when calculating the current
            percentage. Range from 0.0 to 1.0.
    """
    return start + (end - start) * factor


def format_param(name, optim, param):
    if isinstance(param, numbers.Number):
        return [param] * len(optim.param_groups)
    elif isinstance(param, (list, tuple)):  # multi param groups
        if len(param) != len(optim.param_groups):
            raise ValueError(f'expected {len(optim.param_groups)} '
                             f'values for {name}, got {len(param)}')
        return param
    else:  # multi optimizers
        if name not in param:
            raise KeyError(f'{name} is not found in {param.keys()}')
        return param[name]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/memory.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import torch

from .hook import HOOKS, Hook


@HOOKS.register_module()
class EmptyCacheHook(Hook):

    def __init__(self, before_epoch=False, after_epoch=True, after_iter=False):
        self._before_epoch = before_epoch
        self._after_epoch = after_epoch
        self._after_iter = after_iter

    def after_iter(self, runner):
        if self._after_iter:
            torch.cuda.empty_cache()

    def before_epoch(self, runner):
        if self._before_epoch:
            torch.cuda.empty_cache()

    def after_epoch(self, runner):
        if self._after_epoch:
            torch.cuda.empty_cache()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/momentum_updater.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import mmcv
from .hook import HOOKS, Hook
from .lr_updater import annealing_cos, annealing_linear, format_param


class MomentumUpdaterHook(Hook):

    def __init__(self,
                 by_epoch=True,
                 warmup=None,
                 warmup_iters=0,
                 warmup_ratio=0.9):
        # validate the "warmup" argument
        if warmup is not None:
            if warmup not in ['constant', 'linear', 'exp']:
                raise ValueError(
                    f'"{warmup}" is not a supported type for warming up, valid'
                    ' types are "constant" and "linear"')
        if warmup is not None:
            assert warmup_iters > 0, \
                '"warmup_iters" must be a positive integer'
            assert 0 < warmup_ratio <= 1.0, \
                '"warmup_momentum" must be in range (0,1]'

        self.by_epoch = by_epoch
        self.warmup = warmup
        self.warmup_iters = warmup_iters
        self.warmup_ratio = warmup_ratio

        self.base_momentum = []  # initial momentum for all param groups
        self.regular_momentum = [
        ]  # expected momentum if no warming up is performed

    def _set_momentum(self, runner, momentum_groups):
        if isinstance(runner.optimizer, dict):
            for k, optim in runner.optimizer.items():
                for param_group, mom in zip(optim.param_groups,
                                            momentum_groups[k]):
                    if 'momentum' in param_group.keys():
                        param_group['momentum'] = mom
                    elif 'betas' in param_group.keys():
                        param_group['betas'] = (mom, param_group['betas'][1])
        else:
            for param_group, mom in zip(runner.optimizer.param_groups,
                                        momentum_groups):
                if 'momentum' in param_group.keys():
                    param_group['momentum'] = mom
                elif 'betas' in param_group.keys():
                    param_group['betas'] = (mom, param_group['betas'][1])

    def get_momentum(self, runner, base_momentum):
        raise NotImplementedError

    def get_regular_momentum(self, runner):
        if isinstance(runner.optimizer, dict):
            momentum_groups = {}
            for k in runner.optimizer.keys():
                _momentum_group = [
                    self.get_momentum(runner, _base_momentum)
                    for _base_momentum in self.base_momentum[k]
                ]
                momentum_groups.update({k: _momentum_group})
            return momentum_groups
        else:
            return [
                self.get_momentum(runner, _base_momentum)
                for _base_momentum in self.base_momentum
            ]

    def get_warmup_momentum(self, cur_iters):

        def _get_warmup_momentum(cur_iters, regular_momentum):
            if self.warmup == 'constant':
                warmup_momentum = [
                    _momentum / self.warmup_ratio
                    for _momentum in regular_momentum
                ]
            elif self.warmup == 'linear':
                k = (1 - cur_iters / self.warmup_iters) * (1 -
                                                           self.warmup_ratio)
                warmup_momentum = [
                    _momentum / (1 - k) for _momentum in regular_momentum
                ]
            elif self.warmup == 'exp':
                k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters)
                warmup_momentum = [
                    _momentum / k for _momentum in regular_momentum
                ]
            return warmup_momentum

        if isinstance(self.regular_momentum, dict):
            momentum_groups = {}
            for key, regular_momentum in self.regular_momentum.items():
                momentum_groups[key] = _get_warmup_momentum(
                    cur_iters, regular_momentum)
            return momentum_groups
        else:
            return _get_warmup_momentum(cur_iters, self.regular_momentum)

    def before_run(self, runner):
        # NOTE: when resuming from a checkpoint,
        # if 'initial_momentum' is not saved,
        # it will be set according to the optimizer params
        if isinstance(runner.optimizer, dict):
            self.base_momentum = {}
            for k, optim in runner.optimizer.items():
                for group in optim.param_groups:
                    if 'momentum' in group.keys():
                        group.setdefault('initial_momentum', group['momentum'])
                    else:
                        group.setdefault('initial_momentum', group['betas'][0])
                _base_momentum = [
                    group['initial_momentum'] for group in optim.param_groups
                ]
                self.base_momentum.update({k: _base_momentum})
        else:
            for group in runner.optimizer.param_groups:
                if 'momentum' in group.keys():
                    group.setdefault('initial_momentum', group['momentum'])
                else:
                    group.setdefault('initial_momentum', group['betas'][0])
            self.base_momentum = [
                group['initial_momentum']
                for group in runner.optimizer.param_groups
            ]

    def before_train_epoch(self, runner):
        if not self.by_epoch:
            return
        self.regular_momentum = self.get_regular_momentum(runner)
        self._set_momentum(runner, self.regular_momentum)

    def before_train_iter(self, runner):
        cur_iter = runner.iter
        if not self.by_epoch:
            self.regular_momentum = self.get_regular_momentum(runner)
            if self.warmup is None or cur_iter >= self.warmup_iters:
                self._set_momentum(runner, self.regular_momentum)
            else:
                warmup_momentum = self.get_warmup_momentum(cur_iter)
                self._set_momentum(runner, warmup_momentum)
        elif self.by_epoch:
            if self.warmup is None or cur_iter > self.warmup_iters:
                return
            elif cur_iter == self.warmup_iters:
                self._set_momentum(runner, self.regular_momentum)
            else:
                warmup_momentum = self.get_warmup_momentum(cur_iter)
                self._set_momentum(runner, warmup_momentum)


@HOOKS.register_module()
class StepMomentumUpdaterHook(MomentumUpdaterHook):
    """Step momentum scheduler with min value clipping.

    Args:
        step (int | list[int]): Step to decay the momentum. If an int value is
            given, regard it as the decay interval. If a list is given, decay
            momentum at these steps.
        gamma (float, optional): Decay momentum ratio. Default: 0.5.
        min_momentum (float, optional): Minimum momentum value to keep. If
            momentum after decay is lower than this value, it will be clipped
            accordingly. If None is given, we don't perform lr clipping.
            Default: None.
    """

    def __init__(self, step, gamma=0.5, min_momentum=None, **kwargs):
        if isinstance(step, list):
            assert mmcv.is_list_of(step, int)
            assert all([s > 0 for s in step])
        elif isinstance(step, int):
            assert step > 0
        else:
            raise TypeError('"step" must be a list or integer')
        self.step = step
        self.gamma = gamma
        self.min_momentum = min_momentum
        super(StepMomentumUpdaterHook, self).__init__(**kwargs)

    def get_momentum(self, runner, base_momentum):
        progress = runner.epoch if self.by_epoch else runner.iter

        # calculate exponential term
        if isinstance(self.step, int):
            exp = progress // self.step
        else:
            exp = len(self.step)
            for i, s in enumerate(self.step):
                if progress < s:
                    exp = i
                    break

        momentum = base_momentum * (self.gamma**exp)
        if self.min_momentum is not None:
            # clip to a minimum value
            momentum = max(momentum, self.min_momentum)
        return momentum


@HOOKS.register_module()
class CosineAnnealingMomentumUpdaterHook(MomentumUpdaterHook):

    def __init__(self, min_momentum=None, min_momentum_ratio=None, **kwargs):
        assert (min_momentum is None) ^ (min_momentum_ratio is None)
        self.min_momentum = min_momentum
        self.min_momentum_ratio = min_momentum_ratio
        super(CosineAnnealingMomentumUpdaterHook, self).__init__(**kwargs)

    def get_momentum(self, runner, base_momentum):
        if self.by_epoch:
            progress = runner.epoch
            max_progress = runner.max_epochs
        else:
            progress = runner.iter
            max_progress = runner.max_iters
        if self.min_momentum_ratio is not None:
            target_momentum = base_momentum * self.min_momentum_ratio
        else:
            target_momentum = self.min_momentum
        return annealing_cos(base_momentum, target_momentum,
                             progress / max_progress)


@HOOKS.register_module()
class CyclicMomentumUpdaterHook(MomentumUpdaterHook):
    """Cyclic momentum Scheduler.

    Implement the cyclical momentum scheduler policy described in
    https://arxiv.org/pdf/1708.07120.pdf

    This momentum scheduler usually used together with the CyclicLRUpdater
    to improve the performance in the 3D detection area.

    Args:
        target_ratio (tuple[float]): Relative ratio of the lowest momentum and
            the highest momentum to the initial momentum.
        cyclic_times (int): Number of cycles during training
        step_ratio_up (float): The ratio of the increasing process of momentum
            in  the total cycle.
        by_epoch (bool): Whether to update momentum by epoch.
        anneal_strategy (str, optional): {'cos', 'linear'}
            Specifies the annealing strategy: 'cos' for cosine annealing,
            'linear' for linear annealing. Default: 'cos'.
        gamma (float, optional): Cycle decay ratio. Default: 1.
            It takes values in the range (0, 1]. The difference between the
            maximum learning rate and the minimum learning rate decreases
            periodically when it is less than 1. `New in version 1.4.4.`
    """

    def __init__(self,
                 by_epoch=False,
                 target_ratio=(0.85 / 0.95, 1),
                 cyclic_times=1,
                 step_ratio_up=0.4,
                 anneal_strategy='cos',
                 gamma=1,
                 **kwargs):
        if isinstance(target_ratio, float):
            target_ratio = (target_ratio, target_ratio / 1e5)
        elif isinstance(target_ratio, tuple):
            target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \
                if len(target_ratio) == 1 else target_ratio
        else:
            raise ValueError('target_ratio should be either float '
                             f'or tuple, got {type(target_ratio)}')

        assert len(target_ratio) == 2, \
            '"target_ratio" must be list or tuple of two floats'
        assert 0 <= step_ratio_up < 1.0, \
            '"step_ratio_up" must be in range [0,1)'

        self.target_ratio = target_ratio
        self.cyclic_times = cyclic_times
        self.step_ratio_up = step_ratio_up
        self.gamma = gamma
        self.momentum_phases = []  # init momentum_phases

        if anneal_strategy not in ['cos', 'linear']:
            raise ValueError('anneal_strategy must be one of "cos" or '
                             f'"linear", instead got {anneal_strategy}')
        elif anneal_strategy == 'cos':
            self.anneal_func = annealing_cos
        elif anneal_strategy == 'linear':
            self.anneal_func = annealing_linear
        # currently only support by_epoch=False
        assert not by_epoch, \
            'currently only support "by_epoch" = False'
        super(CyclicMomentumUpdaterHook, self).__init__(by_epoch, **kwargs)

    def before_run(self, runner):
        super(CyclicMomentumUpdaterHook, self).before_run(runner)
        # initiate momentum_phases
        # total momentum_phases are separated as up and down
        max_iter_per_phase = runner.max_iters // self.cyclic_times
        iter_up_phase = int(self.step_ratio_up * max_iter_per_phase)
        self.max_iter_per_phase = max_iter_per_phase
        self.momentum_phases.append(
            [0, iter_up_phase, 1, self.target_ratio[0]])
        self.momentum_phases.append([
            iter_up_phase, max_iter_per_phase, self.target_ratio[0],
            self.target_ratio[1]
        ])

    def get_momentum(self, runner, base_momentum):
        curr_iter = runner.iter % self.max_iter_per_phase
        curr_cycle = runner.iter // self.max_iter_per_phase
        scale = self.gamma**curr_cycle
        for (start_iter, end_iter, start_ratio, end_ratio) \
                in self.momentum_phases:
            if start_iter <= curr_iter < end_iter:
                # Apply cycle scaling to gradually reduce the difference
                # between max_momentum and base momentum. The target end_ratio
                # can be expressed as:
                # end_ratio = (base_momentum + scale * \
                # (max_momentum - base_momentum)) / base_momentum
                # iteration: 0-iter_up_phase:
                if start_iter == 0:
                    end_ratio = 1 - scale + end_ratio * scale
                # iteration: iter_up_phase-self.max_iter_per_phase
                else:
                    start_ratio = 1 - scale + start_ratio * scale
                progress = curr_iter - start_iter
                return self.anneal_func(base_momentum * start_ratio,
                                        base_momentum * end_ratio,
                                        progress / (end_iter - start_iter))


@HOOKS.register_module()
class OneCycleMomentumUpdaterHook(MomentumUpdaterHook):
    """OneCycle momentum Scheduler.

    This momentum scheduler usually used together with the OneCycleLrUpdater
    to improve the performance.

    Args:
        base_momentum (float or list): Lower momentum boundaries in the cycle
            for each parameter group. Note that momentum is cycled inversely
            to learning rate; at the peak of a cycle, momentum is
            'base_momentum' and learning rate is 'max_lr'.
            Default: 0.85
        max_momentum (float or list): Upper momentum boundaries in the cycle
            for each parameter group. Functionally,
            it defines the cycle amplitude (max_momentum - base_momentum).
            Note that momentum is cycled inversely
            to learning rate; at the start of a cycle, momentum is
            'max_momentum' and learning rate is 'base_lr'
            Default: 0.95
        pct_start (float): The percentage of the cycle (in number of steps)
            spent increasing the learning rate.
            Default: 0.3
        anneal_strategy (str): {'cos', 'linear'}
            Specifies the annealing strategy: 'cos' for cosine annealing,
            'linear' for linear annealing.
            Default: 'cos'
        three_phase (bool): If three_phase is True, use a third phase of the
            schedule to annihilate the learning rate according to
            final_div_factor instead of modifying the second phase (the first
            two phases will be symmetrical about the step indicated by
            pct_start).
            Default: False
    """

    def __init__(self,
                 base_momentum=0.85,
                 max_momentum=0.95,
                 pct_start=0.3,
                 anneal_strategy='cos',
                 three_phase=False,
                 **kwargs):
        # validate by_epoch, currently only support by_epoch=False
        if 'by_epoch' not in kwargs:
            kwargs['by_epoch'] = False
        else:
            assert not kwargs['by_epoch'], \
                'currently only support "by_epoch" = False'
        if not isinstance(base_momentum, (float, list, dict)):
            raise ValueError('base_momentum must be the type among of float,'
                             'list or dict.')
        self._base_momentum = base_momentum
        if not isinstance(max_momentum, (float, list, dict)):
            raise ValueError('max_momentum must be the type among of float,'
                             'list or dict.')
        self._max_momentum = max_momentum
        # validate pct_start
        if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float):
            raise ValueError('Expected float between 0 and 1 pct_start, but '
                             f'got {pct_start}')
        self.pct_start = pct_start
        # validate anneal_strategy
        if anneal_strategy not in ['cos', 'linear']:
            raise ValueError('anneal_strategy must by one of "cos" or '
                             f'"linear", instead got {anneal_strategy}')
        elif anneal_strategy == 'cos':
            self.anneal_func = annealing_cos
        elif anneal_strategy == 'linear':
            self.anneal_func = annealing_linear
        self.three_phase = three_phase
        self.momentum_phases = []  # init momentum_phases
        super(OneCycleMomentumUpdaterHook, self).__init__(**kwargs)

    def before_run(self, runner):
        if isinstance(runner.optimizer, dict):
            for k, optim in runner.optimizer.items():
                if ('momentum' not in optim.defaults
                        and 'betas' not in optim.defaults):
                    raise ValueError('optimizer must support momentum with'
                                     'option enabled')
                self.use_beta1 = 'betas' in optim.defaults
                _base_momentum = format_param(k, optim, self._base_momentum)
                _max_momentum = format_param(k, optim, self._max_momentum)
                for group, b_momentum, m_momentum in zip(
                        optim.param_groups, _base_momentum, _max_momentum):
                    if self.use_beta1:
                        _, beta2 = group['betas']
                        group['betas'] = (m_momentum, beta2)
                    else:
                        group['momentum'] = m_momentum
                    group['base_momentum'] = b_momentum
                    group['max_momentum'] = m_momentum
        else:
            optim = runner.optimizer
            if ('momentum' not in optim.defaults
                    and 'betas' not in optim.defaults):
                raise ValueError('optimizer must support momentum with'
                                 'option enabled')
            self.use_beta1 = 'betas' in optim.defaults
            k = type(optim).__name__
            _base_momentum = format_param(k, optim, self._base_momentum)
            _max_momentum = format_param(k, optim, self._max_momentum)
            for group, b_momentum, m_momentum in zip(optim.param_groups,
                                                     _base_momentum,
                                                     _max_momentum):
                if self.use_beta1:
                    _, beta2 = group['betas']
                    group['betas'] = (m_momentum, beta2)
                else:
                    group['momentum'] = m_momentum
                group['base_momentum'] = b_momentum
                group['max_momentum'] = m_momentum

        if self.three_phase:
            self.momentum_phases.append({
                'end_iter':
                float(self.pct_start * runner.max_iters) - 1,
                'start_momentum':
                'max_momentum',
                'end_momentum':
                'base_momentum'
            })
            self.momentum_phases.append({
                'end_iter':
                float(2 * self.pct_start * runner.max_iters) - 2,
                'start_momentum':
                'base_momentum',
                'end_momentum':
                'max_momentum'
            })
            self.momentum_phases.append({
                'end_iter': runner.max_iters - 1,
                'start_momentum': 'max_momentum',
                'end_momentum': 'max_momentum'
            })
        else:
            self.momentum_phases.append({
                'end_iter':
                float(self.pct_start * runner.max_iters) - 1,
                'start_momentum':
                'max_momentum',
                'end_momentum':
                'base_momentum'
            })
            self.momentum_phases.append({
                'end_iter': runner.max_iters - 1,
                'start_momentum': 'base_momentum',
                'end_momentum': 'max_momentum'
            })

    def _set_momentum(self, runner, momentum_groups):
        if isinstance(runner.optimizer, dict):
            for k, optim in runner.optimizer.items():
                for param_group, mom in zip(optim.param_groups,
                                            momentum_groups[k]):
                    if 'momentum' in param_group.keys():
                        param_group['momentum'] = mom
                    elif 'betas' in param_group.keys():
                        param_group['betas'] = (mom, param_group['betas'][1])
        else:
            for param_group, mom in zip(runner.optimizer.param_groups,
                                        momentum_groups):
                if 'momentum' in param_group.keys():
                    param_group['momentum'] = mom
                elif 'betas' in param_group.keys():
                    param_group['betas'] = (mom, param_group['betas'][1])

    def get_momentum(self, runner, param_group):
        curr_iter = runner.iter
        start_iter = 0
        for i, phase in enumerate(self.momentum_phases):
            end_iter = phase['end_iter']
            if curr_iter <= end_iter or i == len(self.momentum_phases) - 1:
                pct = (curr_iter - start_iter) / (end_iter - start_iter)
                momentum = self.anneal_func(
                    param_group[phase['start_momentum']],
                    param_group[phase['end_momentum']], pct)
                break
            start_iter = end_iter
        return momentum

    def get_regular_momentum(self, runner):
        if isinstance(runner.optimizer, dict):
            momentum_groups = {}
            for k, optim in runner.optimizer.items():
                _momentum_group = [
                    self.get_momentum(runner, param_group)
                    for param_group in optim.param_groups
                ]
                momentum_groups.update({k: _momentum_group})
            return momentum_groups
        else:
            momentum_groups = []
            for param_group in runner.optimizer.param_groups:
                momentum_groups.append(self.get_momentum(runner, param_group))
            return momentum_groups


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/nni_hook.py
================================================
from .hook import HOOKS, Hook

@HOOKS.register_module()
class NNIHook(Hook):

    def before_run(self, runner):
        if runner.opt_cfg['mode'] == "nni":
            import nni
            runner.logger = None
            self.nni = nni

    def after_train_epoch(self, runner):
        opt_cfg = runner.opt_cfg
        if opt_cfg['mode'] == 'nni':
            # stats = runner.outputs['log_vars']
            stats = runner.metrics
            if len(runner.workflow) == 1 and runner.epoch == runner.max_epochs:
                self.nni.report_final_result({name: value for name, value in stats.items() if opt_cfg['metrics'] in name})
            else:
                print("report_intermediate_result")
                metrics = {name: value for name, value in stats.items() if opt_cfg['metrics'] in name}
                self.nni.report_intermediate_result(metrics['loss'])


    def after_train_iter(self, runner):
        ...

    def before_val_iter(self, runner):
        ...

    def after_val_iter(self, runner):
        ...

    def after_val_epoch(self, runner):
        opt_cfg = runner.opt_cfg
        if opt_cfg['mode'] == 'nni':
            stats = runner.outputs
            if len(runner.workflow) != 1 and runner.epoch == runner.max_epochs:
                self.nni.report_final_result({name: value for name, value in stats.items() if opt_cfg['metrics'] in name}['loss'])
            else:
                self.nni.report_intermediate_result(
                    {name: value for name, value in stats.items() if opt_cfg['metrics'] in name})

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/optimizer.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import logging
from collections import defaultdict
from itertools import chain

from torch.nn.utils import clip_grad

from mmcv.utils import TORCH_VERSION, _BatchNorm, digit_version
from ..dist_utils import allreduce_grads
from ..fp16_utils import LossScaler, wrap_fp16_model
from .hook import HOOKS, Hook
from mmcv.runner.record import get_grad_norm
try:
    # If PyTorch version >= 1.6.0, torch.cuda.amp.GradScaler would be imported
    # and used; otherwise, auto fp16 will adopt mmcv's implementation.
    from torch.cuda.amp import GradScaler
except ImportError:
    pass


@HOOKS.register_module()
class OptimizerHook(Hook):
    """A hook contains custom operations for the optimizer.

    Args:
        grad_clip (dict, optional): A float to control the clip_grad.
            Default: None. (not a config dict)
        detect_anomalous_params (bool): This option is only used for
            debugging which will slow down the training speed.
            Detect anomalous parameters that are not included in
            the computational graph with `loss` as the root.
            There are two cases

                - Parameters were not used during
                  forward pass.
                - Parameters were not used to produce
                  loss.
            Default: False.
    """

    def __init__(self, grad_clip=None, detect_anomalous_params=False):
        self.grad_clip = grad_clip
        self.detect_anomalous_params = detect_anomalous_params

    def clip_grads(self, params):
        # params = list(
        #     filter(lambda p: p.requires_grad and p.grad is not None, params))
        params, grad_norm = get_grad_norm(params)
        if len(params) > 0 and self.grad_clip:
            # **self.grad_clip
            return clip_grad.clip_grad_norm_(params, self.grad_clip)

        else:
            return grad_norm

    def after_train_iter(self, runner):
        if isinstance(runner.optimizer, dict):
            if isinstance(runner.model, dict):
                for name, m in runner.model.items():
                    grad_norm = self.clip_grads(m.parameters())
                    # if grad_norm is not None:
                        # Add grad norm to the logger
                        # runner.outputs['num_samples'] 用于计算平均的，MetricLogger改进了log_buffer的几个计算，这里不需要了
                    runner.log_buffer.update_dict({f'{name}_grad_norm': float(grad_norm)})
            for name, optim in runner.optimizer.items():
                optim.zero_grad()
                if self.detect_anomalous_params:
                    self.detect_anomalous_parameters(runner.outputs[f'{name}_loss'], runner)
                runner.outputs[f'{name}_loss'].backward()
                optim.step()

        else:
            runner.optimizer.zero_grad()
            if self.detect_anomalous_params:
                self.detect_anomalous_parameters(runner.outputs['loss'], runner)
            runner.outputs['loss'].backward()
            if not hasattr(runner.model, 'train'):
                grad_norm = self.clip_grads(runner.model.model.parameters())
            else:
                grad_norm = self.clip_grads(runner.model.parameters())

            # runner.outputs['num_samples'] 用于计算平均的，MetricLogger改进了log_buffer的几个计算，这里不需要了
            runner.log_buffer.update_dict({'grad_norm': float(grad_norm)})

            runner.optimizer.step()

    def detect_anomalous_parameters(self, loss, runner):
        logger = runner.logger
        parameters_in_graph = set()
        visited = set()

        def traverse(grad_fn):
            if grad_fn is None:
                return
            if grad_fn not in visited:
                visited.add(grad_fn)
                if hasattr(grad_fn, 'variable'):
                    parameters_in_graph.add(grad_fn.variable)
                parents = grad_fn.next_functions
                if parents is not None:
                    for parent in parents:
                        grad_fn = parent[0]
                        traverse(grad_fn)

        traverse(loss.grad_fn)
        for n, p in runner.model.named_parameters():
            if p not in parameters_in_graph and p.requires_grad:
                logger.log(
                    level=logging.ERROR,
                    msg=f'{n} with shape {p.size()} is not '
                    f'in the computational graph \n')


@HOOKS.register_module()
class GradientCumulativeOptimizerHook(OptimizerHook):
    """Optimizer Hook implements multi-iters gradient cumulating.

    Args:
        cumulative_iters (int, optional): Num of gradient cumulative iters.
            The optimizer will step every `cumulative_iters` iters.
            Defaults to 1.

    Examples:
        >>> # Use cumulative_iters to simulate a large batch size
        >>> # It is helpful when the hardware cannot handle a large batch size.
        >>> loader = DataLoader(data, batch_size=64)
        >>> optim_hook = GradientCumulativeOptimizerHook(cumulative_iters=4)
        >>> # almost equals to
        >>> loader = DataLoader(data, batch_size=256)
        >>> optim_hook = OptimizerHook()
    """

    def __init__(self, cumulative_iters=1, **kwargs):
        super(GradientCumulativeOptimizerHook, self).__init__(**kwargs)

        assert isinstance(cumulative_iters, int) and cumulative_iters > 0, \
            f'cumulative_iters only accepts positive int, but got ' \
            f'{type(cumulative_iters)} instead.'

        self.cumulative_iters = cumulative_iters
        self.divisible_iters = 0
        self.remainder_iters = 0
        self.initialized = False

    def has_batch_norm(self, module):
        if isinstance(module, _BatchNorm):
            return True
        for m in module.children():
            if self.has_batch_norm(m):
                return True
        return False

    def _init(self, runner):
        if runner.iter % self.cumulative_iters != 0:
            runner.logger.warning(
                'Resume iter number is not divisible by cumulative_iters in '
                'GradientCumulativeOptimizerHook, which means the gradient of '
                'some iters is lost and the result may be influenced slightly.'
            )

        if self.has_batch_norm(runner.model) and self.cumulative_iters > 1:
            runner.logger.warning(
                'GradientCumulativeOptimizerHook may slightly decrease '
                'performance if the model has BatchNorm layers.')

        residual_iters = runner.max_iters - runner.iter

        self.divisible_iters = (
            residual_iters // self.cumulative_iters * self.cumulative_iters)
        self.remainder_iters = residual_iters - self.divisible_iters

        self.initialized = True

    def after_train_iter(self, runner):
        if not self.initialized:
            self._init(runner)

        if runner.iter < self.divisible_iters:
            loss_factor = self.cumulative_iters
        else:
            loss_factor = self.remainder_iters
        loss = runner.outputs['loss']
        loss = loss / loss_factor
        loss.backward()

        if (self.every_n_iters(runner, self.cumulative_iters)
                or self.is_last_iter(runner)):

            if self.grad_clip is not None:
                grad_norm = self.clip_grads(runner.model.parameters())
                if grad_norm is not None:
                    # Add grad norm to the logger
                    runner.log_buffer.update({'grad_norm': float(grad_norm)},
                                             runner.outputs['num_samples'])
            runner.optimizer.step()
            runner.optimizer.zero_grad()


if (TORCH_VERSION != 'parrots'
        and digit_version(TORCH_VERSION) >= digit_version('1.6.0')):

    @HOOKS.register_module()
    class Fp16OptimizerHook(OptimizerHook):
        """FP16 optimizer hook (using PyTorch's implementation).

        If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend,
        to take care of the optimization procedure.

        Args:
            loss_scale (float | str | dict): Scale factor configuration.
                If loss_scale is a float, static loss scaling will be used with
                the specified scale. If loss_scale is a string, it must be
                'dynamic', then dynamic loss scaling will be used.
                It can also be a dict containing arguments of GradScalar.
                Defaults to 512. For Pytorch >= 1.6, mmcv uses official
                implementation of GradScaler. If you use a dict version of
                loss_scale to create GradScaler, please refer to:
                https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler
                for the parameters.

        Examples:
            >>> loss_scale = dict(
            ...     init_scale=65536.0,
            ...     growth_factor=2.0,
            ...     backoff_factor=0.5,
            ...     growth_interval=2000
            ... )
            >>> optimizer_hook = Fp16OptimizerHook(loss_scale=loss_scale)
        """

        def __init__(self,
                     grad_clip=None,
                     coalesce=True,
                     bucket_size_mb=-1,
                     loss_scale=512.,
                     distributed=True):
            self.grad_clip = grad_clip
            self.coalesce = coalesce
            self.bucket_size_mb = bucket_size_mb
            self.distributed = distributed
            self._scale_update_param = None
            if loss_scale == 'dynamic':
                self.loss_scaler = GradScaler()
            elif isinstance(loss_scale, float):
                self._scale_update_param = loss_scale
                self.loss_scaler = GradScaler(init_scale=loss_scale)
            elif isinstance(loss_scale, dict):
                self.loss_scaler = GradScaler(**loss_scale)
            else:
                raise ValueError('loss_scale must be of type float, dict, or '
                                 f'"dynamic", got {loss_scale}')

        def before_run(self, runner):
            """Preparing steps before Mixed Precision Training."""
            # wrap model mode to fp16
            wrap_fp16_model(runner.model)
            # resume from state dict
            if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']:
                scaler_state_dict = runner.meta['fp16']['loss_scaler']
                self.loss_scaler.load_state_dict(scaler_state_dict)

        def copy_grads_to_fp32(self, fp16_net, fp32_weights):
            """Copy gradients from fp16 model to fp32 weight copy."""
            for fp32_param, fp16_param in zip(fp32_weights,
                                              fp16_net.parameters()):
                if fp16_param.grad is not None:
                    if fp32_param.grad is None:
                        fp32_param.grad = fp32_param.data.new(
                            fp32_param.size())
                    fp32_param.grad.copy_(fp16_param.grad)

        def copy_params_to_fp16(self, fp16_net, fp32_weights):
            """Copy updated params from fp32 weight copy to fp16 model."""
            for fp16_param, fp32_param in zip(fp16_net.parameters(),
                                              fp32_weights):
                fp16_param.data.copy_(fp32_param.data)

        def after_train_iter(self, runner):
            """Backward optimization steps for Mixed Precision Training. For
            dynamic loss scaling, please refer to
            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler.

            1. Scale the loss by a scale factor.
            2. Backward the loss to obtain the gradients.
            3. Unscale the optimizer’s gradient tensors.
            4. Call optimizer.step() and update scale factor.
            5. Save loss_scaler state_dict for resume purpose.
            """
            # clear grads of last iteration
            runner.model.zero_grad()
            runner.optimizer.zero_grad()

            self.loss_scaler.scale(runner.outputs['loss']).backward()
            self.loss_scaler.unscale_(runner.optimizer)
            # grad clip
            if self.grad_clip is not None:
                grad_norm = self.clip_grads(runner.model.parameters())
                if grad_norm is not None:
                    # Add grad norm to the logger
                    runner.log_buffer.update({'grad_norm': float(grad_norm)},
                                             runner.outputs['num_samples'])
            # backward and update scaler
            self.loss_scaler.step(runner.optimizer)
            self.loss_scaler.update(self._scale_update_param)

            # save state_dict of loss_scaler
            runner.meta.setdefault(
                'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()

    @HOOKS.register_module()
    class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook,
                                              Fp16OptimizerHook):
        """Fp16 optimizer Hook (using PyTorch's implementation) implements
        multi-iters gradient cumulating.

        If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend,
        to take care of the optimization procedure.
        """

        def __init__(self, *args, **kwargs):
            super(GradientCumulativeFp16OptimizerHook,
                  self).__init__(*args, **kwargs)

        def after_train_iter(self, runner):
            if not self.initialized:
                self._init(runner)

            if runner.iter < self.divisible_iters:
                loss_factor = self.cumulative_iters
            else:
                loss_factor = self.remainder_iters
            loss = runner.outputs['loss']
            loss = loss / loss_factor

            self.loss_scaler.scale(loss).backward()

            if (self.every_n_iters(runner, self.cumulative_iters)
                    or self.is_last_iter(runner)):

                # copy fp16 grads in the model to fp32 params in the optimizer
                self.loss_scaler.unscale_(runner.optimizer)

                if self.grad_clip is not None:
                    grad_norm = self.clip_grads(runner.model.parameters())
                    if grad_norm is not None:
                        # Add grad norm to the logger
                        runner.log_buffer.update(
                            {'grad_norm': float(grad_norm)},
                            runner.outputs['num_samples'])

                # backward and update scaler
                self.loss_scaler.step(runner.optimizer)
                self.loss_scaler.update(self._scale_update_param)

                # save state_dict of loss_scaler
                runner.meta.setdefault(
                    'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()

                # clear grads
                runner.model.zero_grad()
                runner.optimizer.zero_grad()

else:

    @HOOKS.register_module()
    class Fp16OptimizerHook(OptimizerHook):
        """FP16 optimizer hook (mmcv's implementation).

        The steps of fp16 optimizer is as follows.
        1. Scale the loss value.
        2. BP in the fp16 model.
        2. Copy gradients from fp16 model to fp32 weights.
        3. Update fp32 weights.
        4. Copy updated parameters from fp32 weights to fp16 model.

        Refer to https://arxiv.org/abs/1710.03740 for more details.

        Args:
            loss_scale (float | str | dict): Scale factor configuration.
                If loss_scale is a float, static loss scaling will be used with
                the specified scale. If loss_scale is a string, it must be
                'dynamic', then dynamic loss scaling will be used.
                It can also be a dict containing arguments of LossScaler.
                Defaults to 512.
        """

        def __init__(self,
                     grad_clip=None,
                     coalesce=True,
                     bucket_size_mb=-1,
                     loss_scale=512.,
                     distributed=True):
            self.grad_clip = grad_clip
            self.coalesce = coalesce
            self.bucket_size_mb = bucket_size_mb
            self.distributed = distributed
            if loss_scale == 'dynamic':
                self.loss_scaler = LossScaler(mode='dynamic')
            elif isinstance(loss_scale, float):
                self.loss_scaler = LossScaler(
                    init_scale=loss_scale, mode='static')
            elif isinstance(loss_scale, dict):
                self.loss_scaler = LossScaler(**loss_scale)
            else:
                raise ValueError('loss_scale must be of type float, dict, or '
                                 f'"dynamic", got {loss_scale}')

        def before_run(self, runner):
            """Preparing steps before Mixed Precision Training.

            1. Make a master copy of fp32 weights for optimization.
            2. Convert the main model from fp32 to fp16.
            """
            # keep a copy of fp32 weights
            old_groups = runner.optimizer.param_groups
            runner.optimizer.param_groups = copy.deepcopy(
                runner.optimizer.param_groups)
            state = defaultdict(dict)
            p_map = {
                old_p: p
                for old_p, p in zip(
                    chain(*(g['params'] for g in old_groups)),
                    chain(*(g['params']
                            for g in runner.optimizer.param_groups)))
            }
            for k, v in runner.optimizer.state.items():
                state[p_map[k]] = v
            runner.optimizer.state = state
            # convert model to fp16
            wrap_fp16_model(runner.model)
            # resume from state dict
            if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']:
                scaler_state_dict = runner.meta['fp16']['loss_scaler']
                self.loss_scaler.load_state_dict(scaler_state_dict)

        def copy_grads_to_fp32(self, fp16_net, fp32_weights):
            """Copy gradients from fp16 model to fp32 weight copy."""
            for fp32_param, fp16_param in zip(fp32_weights,
                                              fp16_net.parameters()):
                if fp16_param.grad is not None:
                    if fp32_param.grad is None:
                        fp32_param.grad = fp32_param.data.new(
                            fp32_param.size())
                    fp32_param.grad.copy_(fp16_param.grad)

        def copy_params_to_fp16(self, fp16_net, fp32_weights):
            """Copy updated params from fp32 weight copy to fp16 model."""
            for fp16_param, fp32_param in zip(fp16_net.parameters(),
                                              fp32_weights):
                fp16_param.data.copy_(fp32_param.data)

        def after_train_iter(self, runner):
            """Backward optimization steps for Mixed Precision Training. For
            dynamic loss scaling, please refer `loss_scalar.py`

            1. Scale the loss by a scale factor.
            2. Backward the loss to obtain the gradients (fp16).
            3. Copy gradients from the model to the fp32 weight copy.
            4. Scale the gradients back and update the fp32 weight copy.
            5. Copy back the params from fp32 weight copy to the fp16 model.
            6. Save loss_scaler state_dict for resume purpose.
            """
            # clear grads of last iteration
            runner.model.zero_grad()
            runner.optimizer.zero_grad()
            # scale the loss value
            scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale
            scaled_loss.backward()
            # copy fp16 grads in the model to fp32 params in the optimizer

            fp32_weights = []
            for param_group in runner.optimizer.param_groups:
                fp32_weights += param_group['params']
            self.copy_grads_to_fp32(runner.model, fp32_weights)
            # allreduce grads
            if self.distributed:
                allreduce_grads(fp32_weights, self.coalesce,
                                self.bucket_size_mb)

            has_overflow = self.loss_scaler.has_overflow(fp32_weights)
            # if has overflow, skip this iteration
            if not has_overflow:
                # scale the gradients back
                for param in fp32_weights:
                    if param.grad is not None:
                        param.grad.div_(self.loss_scaler.loss_scale)
                if self.grad_clip is not None:
                    grad_norm = self.clip_grads(fp32_weights)
                    if grad_norm is not None:
                        # Add grad norm to the logger
                        runner.log_buffer.update(
                            {'grad_norm': float(grad_norm)},
                            runner.outputs['num_samples'])
                # update fp32 params
                runner.optimizer.step()
                # copy fp32 params to the fp16 model
                self.copy_params_to_fp16(runner.model, fp32_weights)
            self.loss_scaler.update_scale(has_overflow)
            if has_overflow:
                runner.logger.warning('Check overflow, downscale loss scale '
                                      f'to {self.loss_scaler.cur_scale}')

            # save state_dict of loss_scaler
            runner.meta.setdefault(
                'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()

    @HOOKS.register_module()
    class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook,
                                              Fp16OptimizerHook):
        """Fp16 optimizer Hook (using mmcv implementation) implements multi-
        iters gradient cumulating."""

        def __init__(self, *args, **kwargs):
            super(GradientCumulativeFp16OptimizerHook,
                  self).__init__(*args, **kwargs)

        def after_train_iter(self, runner):
            if not self.initialized:
                self._init(runner)

            if runner.iter < self.divisible_iters:
                loss_factor = self.cumulative_iters
            else:
                loss_factor = self.remainder_iters

            loss = runner.outputs['loss']
            loss = loss / loss_factor

            # scale the loss value
            scaled_loss = loss * self.loss_scaler.loss_scale
            scaled_loss.backward()

            if (self.every_n_iters(runner, self.cumulative_iters)
                    or self.is_last_iter(runner)):

                # copy fp16 grads in the model to fp32 params in the optimizer
                fp32_weights = []
                for param_group in runner.optimizer.param_groups:
                    fp32_weights += param_group['params']
                self.copy_grads_to_fp32(runner.model, fp32_weights)
                # allreduce grads
                if self.distributed:
                    allreduce_grads(fp32_weights, self.coalesce,
                                    self.bucket_size_mb)

                has_overflow = self.loss_scaler.has_overflow(fp32_weights)
                # if has overflow, skip this iteration
                if not has_overflow:
                    # scale the gradients back
                    for param in fp32_weights:
                        if param.grad is not None:
                            param.grad.div_(self.loss_scaler.loss_scale)
                    if self.grad_clip is not None:
                        grad_norm = self.clip_grads(fp32_weights)
                        if grad_norm is not None:
                            # Add grad norm to the logger
                            runner.log_buffer.update(
                                {'grad_norm': float(grad_norm)},
                                runner.outputs['num_samples'])
                    # update fp32 params
                    runner.optimizer.step()
                    # copy fp32 params to the fp16 model
                    self.copy_params_to_fp16(runner.model, fp32_weights)
                else:
                    runner.logger.warning(
                        'Check overflow, downscale loss scale '
                        f'to {self.loss_scaler.cur_scale}')

                self.loss_scaler.update_scale(has_overflow)

                # save state_dict of loss_scaler
                runner.meta.setdefault(
                    'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()

                # clear grads
                runner.model.zero_grad()
                runner.optimizer.zero_grad()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/profiler.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Callable, List, Optional, Union

import torch

from ..dist_utils import master_only
from .hook import HOOKS, Hook


@HOOKS.register_module()
class ProfilerHook(Hook):
    """Profiler to analyze performance during training.

    PyTorch Profiler is a tool that allows the collection of the performance
    metrics during the training. More details on Profiler can be found at
    https://pytorch.org/docs/1.8.1/profiler.html#torch.profiler.profile

    Args:
        by_epoch (bool): Profile performance by epoch or by iteration.
            Default: True.
        profile_iters (int): Number of iterations for profiling.
            If ``by_epoch=True``, profile_iters indicates that they are the
            first profile_iters epochs at the beginning of the
            training, otherwise it indicates the first profile_iters
            iterations. Default: 1.
        activities (list[str]): List of activity groups (CPU, CUDA) to use in
            profiling. Default: ['cpu', 'cuda'].
        schedule (dict, optional): Config of generating the callable schedule.
            if schedule is None, profiler will not add step markers into the
            trace and table view. Default: None.
        on_trace_ready (callable, dict): Either a handler or a dict of generate
            handler. Default: None.
        record_shapes (bool): Save information about operator's input shapes.
            Default: False.
        profile_memory (bool): Track tensor memory allocation/deallocation.
            Default: False.
        with_stack (bool): Record source information (file and line number)
            for the ops. Default: False.
        with_flops (bool): Use formula to estimate the FLOPS of specific
            operators (matrix multiplication and 2D convolution).
            Default: False.
        json_trace_path (str, optional): Exports the collected trace in Chrome
            JSON format. Default: None.

    Example:
        >>> runner = ... # instantiate a Runner
        >>> # tensorboard trace
        >>> trace_config = dict(type='tb_trace', dir_name='work_dir')
        >>> profiler_config = dict(on_trace_ready=trace_config)
        >>> runner.register_profiler_hook(profiler_config)
        >>> runner.run(data_loaders=[trainloader], workflow=[('train', 1)])
    """

    def __init__(self,
                 by_epoch: bool = True,
                 profile_iters: int = 1,
                 activities: List[str] = ['cpu', 'cuda'],
                 schedule: Optional[dict] = None,
                 on_trace_ready: Optional[Union[Callable, dict]] = None,
                 record_shapes: bool = False,
                 profile_memory: bool = False,
                 with_stack: bool = False,
                 with_flops: bool = False,
                 json_trace_path: Optional[str] = None) -> None:
        try:
            from torch import profiler  # torch version >= 1.8.1
        except ImportError:
            raise ImportError('profiler is the new feature of torch1.8.1, '
                              f'but your version is {torch.__version__}')

        assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.'
        self.by_epoch = by_epoch

        if profile_iters < 1:
            raise ValueError('profile_iters should be greater than 0, but got '
                             f'{profile_iters}')
        self.profile_iters = profile_iters

        if not isinstance(activities, list):
            raise ValueError(
                f'activities should be list, but got {type(activities)}')
        self.activities = []
        for activity in activities:
            activity = activity.lower()
            if activity == 'cpu':
                self.activities.append(profiler.ProfilerActivity.CPU)
            elif activity == 'cuda':
                self.activities.append(profiler.ProfilerActivity.CUDA)
            else:
                raise ValueError(
                    f'activity should be "cpu" or "cuda", but got {activity}')

        if schedule is not None:
            self.schedule = profiler.schedule(**schedule)
        else:
            self.schedule = None

        self.on_trace_ready = on_trace_ready
        self.record_shapes = record_shapes
        self.profile_memory = profile_memory
        self.with_stack = with_stack
        self.with_flops = with_flops
        self.json_trace_path = json_trace_path

    @master_only
    def before_run(self, runner):
        if self.by_epoch and runner.max_epochs < self.profile_iters:
            raise ValueError('self.profile_iters should not be greater than '
                             f'{runner.max_epochs}')

        if not self.by_epoch and runner.max_iters < self.profile_iters:
            raise ValueError('self.profile_iters should not be greater than '
                             f'{runner.max_iters}')

        if callable(self.on_trace_ready):  # handler
            _on_trace_ready = self.on_trace_ready
        elif isinstance(self.on_trace_ready, dict):  # config of handler
            trace_cfg = self.on_trace_ready.copy()
            trace_type = trace_cfg.pop('type')  # log_trace handler
            if trace_type == 'log_trace':

                def _log_handler(prof):
                    print(prof.key_averages().table(**trace_cfg))

                _on_trace_ready = _log_handler
            elif trace_type == 'tb_trace':  # tensorboard_trace handler
                try:
                    import torch_tb_profiler  # noqa: F401
                except ImportError:
                    raise ImportError('please run "pip install '
                                      'torch-tb-profiler" to install '
                                      'torch_tb_profiler')
                _on_trace_ready = torch.profiler.tensorboard_trace_handler(
                    **trace_cfg)
            else:
                raise ValueError('trace_type should be "log_trace" or '
                                 f'"tb_trace", but got {trace_type}')
        elif self.on_trace_ready is None:
            _on_trace_ready = None  # type: ignore
        else:
            raise ValueError('on_trace_ready should be handler, dict or None, '
                             f'but got {type(self.on_trace_ready)}')

        if runner.max_epochs > 1:
            warnings.warn(f'profiler will profile {runner.max_epochs} epochs '
                          'instead of 1 epoch. Since profiler will slow down '
                          'the training, it is recommended to train 1 epoch '
                          'with ProfilerHook and adjust your setting according'
                          ' to the profiler summary. During normal training '
                          '(epoch > 1), you may disable the ProfilerHook.')

        self.profiler = torch.profiler.profile(
            activities=self.activities,
            schedule=self.schedule,
            on_trace_ready=_on_trace_ready,
            record_shapes=self.record_shapes,
            profile_memory=self.profile_memory,
            with_stack=self.with_stack,
            with_flops=self.with_flops)

        self.profiler.__enter__()
        runner.logger.info('profiler is profiling...')

    @master_only
    def after_train_epoch(self, runner):
        if self.by_epoch and runner.epoch == self.profile_iters - 1:
            runner.logger.info('profiler may take a few minutes...')
            self.profiler.__exit__(None, None, None)
            if self.json_trace_path is not None:
                self.profiler.export_chrome_trace(self.json_trace_path)

    @master_only
    def after_train_iter(self, runner):
        self.profiler.step()
        if not self.by_epoch and runner.iter == self.profile_iters - 1:
            runner.logger.info('profiler may take a few minutes...')
            self.profiler.__exit__(None, None, None)
            if self.json_trace_path is not None:
                self.profiler.export_chrome_trace(self.json_trace_path)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/sampler_seed.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .hook import HOOKS, Hook


@HOOKS.register_module()
class DistSamplerSeedHook(Hook):
    """Data-loading sampler for distributed training.

    When distributed training, it is only useful in conjunction with
    :obj:`EpochBasedRunner`, while :obj:`IterBasedRunner` achieves the same
    purpose with :obj:`IterLoader`.
    """

    def before_epoch(self, runner):
        if hasattr(runner.data_loader.sampler, 'set_epoch'):
            # in case the data loader uses `SequentialSampler` in Pytorch
            runner.data_loader.sampler.set_epoch(runner.epoch)
        elif hasattr(runner.data_loader.batch_sampler.sampler, 'set_epoch'):
            # batch sampler in pytorch warps the sampler as its attributes.
            runner.data_loader.batch_sampler.sampler.set_epoch(runner.epoch)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/hooks/sync_buffer.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from ..dist_utils import allreduce_params
from .hook import HOOKS, Hook


@HOOKS.register_module()
class SyncBuffersHook(Hook):
    """Synchronize model buffers such as running_mean and running_var in BN at
    the end of each epoch.

    Args:
        distributed (bool): Whether distributed training is used. It is
          effective only for distributed training. Defaults to True.
    """

    def __init__(self, distributed=True):
        self.distributed = distributed

    def after_epoch(self, runner):
        """All-reduce model buffers at the end of each epoch."""
        if self.distributed:
            allreduce_params(runner.model.buffers())


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/iter_based_runner.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import platform
import shutil
import time
import warnings

import torch
from torch.optim import Optimizer

import mmcv
from .base_runner import BaseRunner
from .builder import RUNNERS
from .checkpoint import save_checkpoint
from .hooks import IterTimerHook
from .utils import get_host_info


class IterLoader:

    def __init__(self, dataloader):
        self._dataloader = dataloader
        self.iter_loader = iter(self._dataloader)
        self._epoch = 0

    @property
    def epoch(self):
        return self._epoch

    def __next__(self):
        try:
            data = next(self.iter_loader)
        except StopIteration:
            self._epoch += 1
            if hasattr(self._dataloader.sampler, 'set_epoch'):
                self._dataloader.sampler.set_epoch(self._epoch)
            time.sleep(2)  # Prevent possible deadlock during epoch transition
            self.iter_loader = iter(self._dataloader)
            data = next(self.iter_loader)

        return data

    def __len__(self):
        return len(self._dataloader)


@RUNNERS.register_module()
class IterBasedRunner(BaseRunner):
    """Iteration-based Runner.

    This runner train models iteration by iteration.
    """

    def train(self, data_loader, **kwargs):
        self.model.train()
        self.mode = 'train'
        self.data_loader = data_loader
        self._epoch = data_loader.epoch
        data_batch = next(data_loader)
        self.call_hook('before_train_iter')
        outputs = self.model.train_step(data_batch, self.optimizer, **kwargs)
        if not isinstance(outputs, dict):
            raise TypeError('model.train_step() must return a dict')
        if 'log_vars' in outputs:
            self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
        self.outputs = outputs
        self.call_hook('after_train_iter')
        self._inner_iter += 1
        self._iter += 1

    @torch.no_grad()
    def val(self, data_loader, **kwargs):
        self.model.eval()
        self.mode = 'val'
        self.data_loader = data_loader
        data_batch = next(data_loader)
        self.call_hook('before_val_iter')
        outputs = self.model.val_step(data_batch, **kwargs)
        if not isinstance(outputs, dict):
            raise TypeError('model.val_step() must return a dict')
        if 'log_vars' in outputs:
            self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
        self.outputs = outputs
        self.call_hook('after_val_iter')
        self._inner_iter += 1

    def run(self, data_loaders, workflow, max_iters=None, **kwargs):
        """Start running.

        Args:
            data_loaders (list[:obj:`DataLoader`]): Dataloaders for training
                and validation.
            workflow (list[tuple]): A list of (phase, iters) to specify the
                running order and iterations. E.g, [('train', 10000),
                ('val', 1000)] means running 10000 iterations for training and
                1000 iterations for validation, iteratively.
        """
        assert isinstance(data_loaders, list)
        assert mmcv.is_list_of(workflow, tuple)
        assert len(data_loaders) == len(workflow)
        if max_iters is not None:
            warnings.warn(
                'setting max_iters in run is deprecated, '
                'please set max_iters in runner_config', DeprecationWarning)
            self._max_iters = max_iters
        assert self._max_iters is not None, (
            'max_iters must be specified during instantiation')

        work_dir = self.work_dir if self.work_dir is not None else 'NONE'
        self.logger.info('Start running, host: %s, work_dir: %s',
                         get_host_info(), work_dir)
        self.logger.info('Hooks will be executed in the following order:\n%s',
                         self.get_hook_info())
        self.logger.info('workflow: %s, max: %d iters', workflow,
                         self._max_iters)
        self.call_hook('before_run')

        iter_loaders = [IterLoader(x) for x in data_loaders]

        self.call_hook('before_epoch')

        while self.iter < self._max_iters:
            for i, flow in enumerate(workflow):
                self._inner_iter = 0
                mode, iters = flow
                if not isinstance(mode, str) or not hasattr(self, mode):
                    raise ValueError(
                        'runner has no method named "{}" to run a workflow'.
                        format(mode))
                iter_runner = getattr(self, mode)
                for _ in range(iters):
                    if mode == 'train' and self.iter >= self._max_iters:
                        break
                    iter_runner(iter_loaders[i], **kwargs) #<bound method IterBasedRunner.train of <mmcv.runner.iter_based_runner.IterBasedRunner object at 0x0000016BB7487BC8>>

        time.sleep(1)  # wait for some hooks like loggers to finish
        self.call_hook('after_epoch')
        self.call_hook('after_run')

    def resume(self,
               checkpoint,
               resume_optimizer=True,
               map_location='default'):
        """Resume model from checkpoint.

        Args:
            checkpoint (str): Checkpoint to resume from.
            resume_optimizer (bool, optional): Whether resume the optimizer(s)
                if the checkpoint file includes optimizer(s). Default to True.
            map_location (str, optional): Same as :func:`torch.load`.
                Default to 'default'.
        """
        if map_location == 'default':
            device_id = torch.cuda.current_device()
            checkpoint = self.load_checkpoint(
                checkpoint,
                map_location=lambda storage, loc: storage.cuda(device_id))
        else:
            checkpoint = self.load_checkpoint(
                checkpoint, map_location=map_location)

        self._epoch = checkpoint['meta']['epoch']
        self._iter = checkpoint['meta']['iter']
        self._inner_iter = checkpoint['meta']['iter']
        if 'optimizer' in checkpoint and resume_optimizer:
            if isinstance(self.optimizer, Optimizer):
                self.optimizer.load_state_dict(checkpoint['optimizer'])
            elif isinstance(self.optimizer, dict):
                for k in self.optimizer.keys():
                    self.optimizer[k].load_state_dict(
                        checkpoint['optimizer'][k])
            else:
                raise TypeError(
                    'Optimizer should be dict or torch.optim.Optimizer '
                    f'but got {type(self.optimizer)}')

        self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}')

    def save_checkpoint(self,
                        out_dir,
                        filename_tmpl='iter_{}.pth',
                        meta=None,
                        save_optimizer=True,
                        create_symlink=True):
        """Save checkpoint to file.

        Args:
            out_dir (str): Directory to save checkpoint files.
            filename_tmpl (str, optional): Checkpoint file template.
                Defaults to 'iter_{}.pth'.
            meta (dict, optional): Metadata to be saved in checkpoint.
                Defaults to None.
            save_optimizer (bool, optional): Whether save optimizer.
                Defaults to True.
            create_symlink (bool, optional): Whether create symlink to the
                latest checkpoint file. Defaults to True.
        """
        if meta is None:
            meta = {}
        elif not isinstance(meta, dict):
            raise TypeError(
                f'meta should be a dict or None, but got {type(meta)}')
        if self.meta is not None:
            meta.update(self.meta)
            # Note: meta.update(self.meta) should be done before
            # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise
            # there will be problems with resumed checkpoints.
            # More details in https://github.com/open-mmlab/mmcv/pull/1108
        meta.update(epoch=self.epoch + 1, iter=self.iter)

        filename = filename_tmpl.format(self.iter + 1)
        filepath = osp.join(out_dir, filename)
        optimizer = self.optimizer if save_optimizer else None
        save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
        # in some environments, `os.symlink` is not supported, you may need to
        # set `create_symlink` to False
        if create_symlink:
            dst_file = osp.join(out_dir, 'latest.pth')
            if platform.system() != 'Windows':
                mmcv.symlink(filename, dst_file)
            else:
                shutil.copy(filepath, dst_file)

    def register_training_hooks(self,
                                lr_config,
                                optimizer_config=None,
                                checkpoint_config=None,
                                log_config=None,
                                momentum_config=None,
                                custom_hooks_config=None):
        """Register default hooks for iter-based training.

        Checkpoint hook, optimizer stepper hook and logger hooks will be set to
        `by_epoch=False` by default.

        Default hooks include:

        +----------------------+-------------------------+
        | Hooks                | Priority                |
        +======================+=========================+
        | LrUpdaterHook        | VERY_HIGH (10)          |
        +----------------------+-------------------------+
        | MomentumUpdaterHook  | HIGH (30)               |
        +----------------------+-------------------------+
        | OptimizerStepperHook | ABOVE_NORMAL (40)       |
        +----------------------+-------------------------+
        | CheckpointSaverHook  | NORMAL (50)             |
        +----------------------+-------------------------+
        | IterTimerHook        | LOW (70)                |
        +----------------------+-------------------------+
        | LoggerHook(s)        | VERY_LOW (90)           |
        +----------------------+-------------------------+
        | CustomHook(s)        | defaults to NORMAL (50) |
        +----------------------+-------------------------+

        If custom hooks have same priority with default hooks, custom hooks
        will be triggered after default hooks.
        """
        if checkpoint_config is not None:
            checkpoint_config.setdefault('by_epoch', False)
        if lr_config is not None:
            lr_config.setdefault('by_epoch', False)
        if log_config is not None:
            for info in log_config['hooks']:
                info.setdefault('by_epoch', False)
        super(IterBasedRunner, self).register_training_hooks(
            lr_config=lr_config,
            momentum_config=momentum_config,
            optimizer_config=optimizer_config,
            checkpoint_config=checkpoint_config,
            log_config=log_config,
            timer_config=IterTimerHook(),
            custom_hooks_config=custom_hooks_config)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/log_buffer.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from collections import OrderedDict
import torch
import numpy as np


class LogBuffer:

    def __init__(self):
        self.val_history = OrderedDict()
        self.n_history = OrderedDict()
        self.output = OrderedDict()
        self.ready = False

    def clear(self):
        self.val_history.clear()
        self.n_history.clear()
        self.clear_output()

    def clear_output(self):
        self.output.clear()
        self.ready = False

    # def update(self, vars, count=1):
    #     assert isinstance(vars, dict)
    #     for key, var in vars.items():
    #         if key not in self.val_history:
    #             self.val_history[key] = []
    #             self.n_history[key] = []
    #         self.val_history[key].append(var)
    #         self.n_history[key].append(count)

    # {k:v}打印，对每个k都有val、avg、max、deque属性
    def update(self, vars, count=1):
        # dist.barrier()
        for k, v in vars.items():
            if k not in self.val_history:
                self.val_history[k] = []
                self.n_history[k] = []
            if isinstance(v, torch.Tensor):
                v = torch.mean(v)
                if hasattr(v, 'item'):
                    v = v.item()
            assert isinstance(v, (float, int, str)), print(f"{k} type: {type(v)}")
            self.val_history[k].append(v)
            self.n_history[k].append(count)

    def average(self, n=0):
        """Average latest n values or all values."""
        assert n >= 0
        for key in self.val_history:
            values = np.array(self.val_history[key][-n:])
            nums = np.array(self.n_history[key][-n:])
            avg = np.sum(values * nums) / np.sum(nums)
            self.output[key] = avg
        self.ready = True


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/misc.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import glob
import os.path as osp
import warnings


def find_latest_checkpoint(path, suffix='pth'):
    """Find the latest checkpoint from the working directory.

    Args:
        path(str): The path to find checkpoints.
        suffix(str): File extension.
            Defaults to pth.

    Returns:
        latest_path(str | None): File path of the latest checkpoint.
    References:
        .. [1] https://github.com/microsoft/SoftTeacher
                  /blob/main/ssod/utils/patch.py
    """
    if not osp.exists(path):
        warnings.warn('The path of checkpoints does not exist.')
        return None
    if osp.exists(osp.join(path, f'latest.{suffix}')):
        return osp.join(path, f'latest.{suffix}')

    checkpoints = glob.glob(osp.join(path, f'*.{suffix}'))
    if len(checkpoints) == 0:
        warnings.warn('There are no checkpoints in the path.')
        return None
    latest = -1
    latest_path = None
    for checkpoint in checkpoints:
        count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0])
        if count > latest:
            latest = count
            latest_path = checkpoint
    return latest_path


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/optimizer/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .builder import (OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer,
                      build_optimizer_constructor)
from .default_constructor import DefaultOptimizerConstructor

__all__ = [
    'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor',
    'build_optimizer', 'build_optimizer_constructor'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/optimizer/builder.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import inspect

import torch

from ...utils import Registry, build_from_cfg

OPTIMIZERS = Registry('optimizer')
OPTIMIZER_BUILDERS = Registry('optimizer builder')


def register_torch_optimizers():
    torch_optimizers = []
    for module_name in dir(torch.optim):
        if module_name.startswith('__'):
            continue
        _optim = getattr(torch.optim, module_name)
        if inspect.isclass(_optim) and issubclass(_optim,
                                                  torch.optim.Optimizer):
            OPTIMIZERS.register_module()(_optim)
            torch_optimizers.append(module_name)
    return torch_optimizers


TORCH_OPTIMIZERS = register_torch_optimizers()


def build_optimizer_constructor(cfg):
    return build_from_cfg(cfg, OPTIMIZER_BUILDERS)


def build_optimizer(model, cfg):
    optimizer_cfg = copy.deepcopy(cfg)
    constructor_type = optimizer_cfg.pop('constructor',
                                         'DefaultOptimizerConstructor')
    paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None)
    optim_constructor = build_optimizer_constructor(
        dict(
            type=constructor_type,
            optimizer_cfg=optimizer_cfg,
            paramwise_cfg=paramwise_cfg))
    optimizer = optim_constructor(model)
    return optimizer


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/optimizer/default_constructor.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import warnings

import torch
from torch.nn import GroupNorm, LayerNorm

from mmcv.utils import _BatchNorm, _InstanceNorm, build_from_cfg, is_list_of
from mmcv.utils.ext_loader import check_ops_exist
from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS


@OPTIMIZER_BUILDERS.register_module()
class DefaultOptimizerConstructor:
    """Default constructor for optimizers.

    By default each parameter share the same optimizer settings, and we
    provide an argument ``paramwise_cfg`` to specify parameter-wise settings.
    It is a dict and may contain the following fields:

    - ``custom_keys`` (dict): Specified parameters-wise settings by keys. If
      one of the keys in ``custom_keys`` is a substring of the name of one
      parameter, then the setting of the parameter will be specified by
      ``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will
      be ignored. It should be noted that the aforementioned ``key`` is the
      longest key that is a substring of the name of the parameter. If there
      are multiple matched keys with the same length, then the key with lower
      alphabet order will be chosen.
      ``custom_keys[key]`` should be a dict and may contain fields ``lr_mult``
      and ``decay_mult``. See Example 2 below.
    - ``bias_lr_mult`` (float): It will be multiplied to the learning
      rate for all bias parameters (except for those in normalization
      layers and offset layers of DCN).
    - ``bias_decay_mult`` (float): It will be multiplied to the weight
      decay for all bias parameters (except for those in
      normalization layers, depthwise conv layers, offset layers of DCN).
    - ``norm_decay_mult`` (float): It will be multiplied to the weight
      decay for all weight and bias parameters of normalization
      layers.
    - ``dwconv_decay_mult`` (float): It will be multiplied to the weight
      decay for all weight and bias parameters of depthwise conv
      layers.
    - ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning
      rate for parameters of offset layer in the deformable convs
      of a model.
    - ``bypass_duplicate`` (bool): If true, the duplicate parameters
      would not be added into optimizer. Default: False.

    Note:

        1. If the option ``dcn_offset_lr_mult`` is used, the constructor will
        override the effect of ``bias_lr_mult`` in the bias of offset layer.
        So be careful when using both ``bias_lr_mult`` and
        ``dcn_offset_lr_mult``. If you wish to apply both of them to the offset
        layer in deformable convs, set ``dcn_offset_lr_mult`` to the original
        ``dcn_offset_lr_mult`` * ``bias_lr_mult``.

        2. If the option ``dcn_offset_lr_mult`` is used, the constructor will
        apply it to all the DCN layers in the model. So be careful when the
        model contains multiple DCN layers in places other than backbone.

    Args:
        model (:obj:`nn.Module`): The model with parameters to be optimized.
        optimizer_cfg (dict): The config dict of the optimizer.
            Positional fields are

                - `type`: class name of the optimizer.

            Optional fields are

                - any arguments of the corresponding optimizer type, e.g.,
                  lr, weight_decay, momentum, etc.
        paramwise_cfg (dict, optional): Parameter-wise options.

    Example 1:
        >>> model = torch.nn.modules.Conv1d(1, 1, 1)
        >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
        >>>                      weight_decay=0.0001)
        >>> paramwise_cfg = dict(norm_decay_mult=0.)
        >>> optim_builder = DefaultOptimizerConstructor(
        >>>     optimizer_cfg, paramwise_cfg)
        >>> optimizer = optim_builder(model)

    Example 2:
        >>> # assume model have attribute model.backbone and model.cls_head
        >>> optimizer_cfg = dict(type='SGD', lr=0.01, weight_decay=0.95)
        >>> paramwise_cfg = dict(custom_keys={
                '.backbone': dict(lr_mult=0.1, decay_mult=0.9)})
        >>> optim_builder = DefaultOptimizerConstructor(
        >>>     optimizer_cfg, paramwise_cfg)
        >>> optimizer = optim_builder(model)
        >>> # Then the `lr` and `weight_decay` for model.backbone is
        >>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for
        >>> # model.cls_head is (0.01, 0.95).
    """

    def __init__(self, optimizer_cfg, paramwise_cfg=None):
        if not isinstance(optimizer_cfg, dict):
            raise TypeError('optimizer_cfg should be a dict',
                            f'but got {type(optimizer_cfg)}')
        self.optimizer_cfg = optimizer_cfg
        self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg
        self.base_lr = optimizer_cfg.get('lr', None)
        self.base_wd = optimizer_cfg.get('weight_decay', None)
        self._validate_cfg()

    def _validate_cfg(self):
        if not isinstance(self.paramwise_cfg, dict):
            raise TypeError('paramwise_cfg should be None or a dict, '
                            f'but got {type(self.paramwise_cfg)}')

        if 'custom_keys' in self.paramwise_cfg:
            if not isinstance(self.paramwise_cfg['custom_keys'], dict):
                raise TypeError(
                    'If specified, custom_keys must be a dict, '
                    f'but got {type(self.paramwise_cfg["custom_keys"])}')
            if self.base_wd is None:
                for key in self.paramwise_cfg['custom_keys']:
                    if 'decay_mult' in self.paramwise_cfg['custom_keys'][key]:
                        raise ValueError('base_wd should not be None')

        # get base lr and weight decay
        # weight_decay must be explicitly specified if mult is specified
        if ('bias_decay_mult' in self.paramwise_cfg
                or 'norm_decay_mult' in self.paramwise_cfg
                or 'dwconv_decay_mult' in self.paramwise_cfg):
            if self.base_wd is None:
                raise ValueError('base_wd should not be None')

    def _is_in(self, param_group, param_group_list):
        assert is_list_of(param_group_list, dict)
        param = set(param_group['params'])
        param_set = set()
        for group in param_group_list:
            param_set.update(set(group['params']))

        return not param.isdisjoint(param_set)

    def add_params(self, params, module, prefix='', is_dcn_module=None):
        """Add all parameters of module to the params list.

        The parameters of the given module will be added to the list of param
        groups, with specific rules defined by paramwise_cfg.

        Args:
            params (list[dict]): A list of param groups, it will be modified
                in place.
            module (nn.Module): The module to be added.
            prefix (str): The prefix of the module
            is_dcn_module (int|float|None): If the current module is a
                submodule of DCN, `is_dcn_module` will be passed to
                control conv_offset layer's learning rate. Defaults to None.
        """
        # get param-wise options
        custom_keys = self.paramwise_cfg.get('custom_keys', {})
        # first sort with alphabet order and then sort with reversed len of str
        sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True)

        bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.)
        bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.)
        norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.)
        dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.)
        bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False)
        dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.)

        # special rules for norm layers and depth-wise conv layers
        is_norm = isinstance(module,
                             (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm))
        is_dwconv = (
            isinstance(module, torch.nn.Conv2d)
            and module.in_channels == module.groups)

        for name, param in module.named_parameters(recurse=False):
            param_group = {'params': [param]}
            if not param.requires_grad:
                params.append(param_group)
                continue
            if bypass_duplicate and self._is_in(param_group, params):
                warnings.warn(f'{prefix} is duplicate. It is skipped since '
                              f'bypass_duplicate={bypass_duplicate}')
                continue
            # if the parameter match one of the custom keys, ignore other rules
            is_custom = False
            for key in sorted_keys:
                if key in f'{prefix}.{name}':
                    is_custom = True
                    lr_mult = custom_keys[key].get('lr_mult', 1.)
                    param_group['lr'] = self.base_lr * lr_mult
                    if self.base_wd is not None:
                        decay_mult = custom_keys[key].get('decay_mult', 1.)
                        param_group['weight_decay'] = self.base_wd * decay_mult
                    break

            if not is_custom:
                # bias_lr_mult affects all bias parameters
                # except for norm.bias dcn.conv_offset.bias
                if name == 'bias' and not (is_norm or is_dcn_module):
                    param_group['lr'] = self.base_lr * bias_lr_mult

                if (prefix.find('conv_offset') != -1 and is_dcn_module
                        and isinstance(module, torch.nn.Conv2d)):
                    # deal with both dcn_offset's bias & weight
                    param_group['lr'] = self.base_lr * dcn_offset_lr_mult

                # apply weight decay policies
                if self.base_wd is not None:
                    # norm decay
                    if is_norm:
                        param_group[
                            'weight_decay'] = self.base_wd * norm_decay_mult
                    # depth-wise conv
                    elif is_dwconv:
                        param_group[
                            'weight_decay'] = self.base_wd * dwconv_decay_mult
                    # bias lr and decay
                    elif name == 'bias' and not is_dcn_module:
                        # TODO: current bias_decay_mult will have affect on DCN
                        param_group[
                            'weight_decay'] = self.base_wd * bias_decay_mult
            params.append(param_group)

        if check_ops_exist():
            from mmcv.ops import DeformConv2d, ModulatedDeformConv2d
            is_dcn_module = isinstance(module,
                                       (DeformConv2d, ModulatedDeformConv2d))
        else:
            is_dcn_module = False
        for child_name, child_mod in module.named_children():
            child_prefix = f'{prefix}.{child_name}' if prefix else child_name
            self.add_params(
                params,
                child_mod,
                prefix=child_prefix,
                is_dcn_module=is_dcn_module)

    def __call__(self, model):
        if hasattr(model, 'module'):
            model = model.module

        optimizer_cfg = self.optimizer_cfg.copy()
        # if no paramwise option is specified, just use the global setting
        if not self.paramwise_cfg:
            optimizer_cfg['params'] = model.parameters()
            return build_from_cfg(optimizer_cfg, OPTIMIZERS)

        # set param-wise lr and weight decay recursively
        params = []
        self.add_params(params, model)
        optimizer_cfg['params'] = params

        return build_from_cfg(optimizer_cfg, OPTIMIZERS)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/priority.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from enum import Enum


class Priority(Enum):
    """Hook priority levels.

    +--------------+------------+
    | Level        | Value      |
    +==============+============+
    | HIGHEST      | 0          |
    +--------------+------------+
    | VERY_HIGH    | 10         |
    +--------------+------------+
    | HIGH         | 30         |
    +--------------+------------+
    | ABOVE_NORMAL | 40         |
    +--------------+------------+
    | NORMAL       | 50         |
    +--------------+------------+
    | BELOW_NORMAL | 60         |
    +--------------+------------+
    | LOW          | 70         |
    +--------------+------------+
    | VERY_LOW     | 90         |
    +--------------+------------+
    | LOWEST       | 100        |
    +--------------+------------+
    """

    HIGHEST = 0
    VERY_HIGH = 10
    HIGH = 30
    ABOVE_NORMAL = 40
    NORMAL = 50
    BELOW_NORMAL = 60
    LOW = 70
    VERY_LOW = 90
    LOWEST = 100


def get_priority(priority):
    """Get priority value.

    Args:
        priority (int or str or :obj:`Priority`): Priority.

    Returns:
        int: The priority value.
    """
    if isinstance(priority, int):
        if priority < 0 or priority > 100:
            raise ValueError('priority must be between 0 and 100')
        return priority
    elif isinstance(priority, Priority):
        return priority.value
    elif isinstance(priority, str):
        return Priority[priority.upper()].value
    else:
        raise TypeError('priority must be an integer or Priority enum value')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/record.py
================================================
import os
import datetime
import torch
import psutil
from collections import defaultdict, deque
import time
# from UDL.AutoDL.logger import log_string
# from logging import info as log_string
# from .logger import get_root_logger
import numpy as np
import random
import torch.backends.cudnn as cudnn
import torch.distributed as dist
from functools import partial
from mmcv import print_log as log_string

def get_grad_norm(parameters, norm_type=2):
    if isinstance(parameters, torch.Tensor):
        parameters = [parameters]
    parameters = list(filter(lambda p: p.requires_grad and p.grad is not None, parameters))
    norm_type = float(norm_type)
    total_norm = 0
    for p in parameters:
        param_norm = p.grad.data.norm(norm_type)
        total_norm += param_norm.item() ** norm_type
    total_norm = total_norm ** (1. / norm_type)
    return parameters, total_norm

def set_random_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    cudnn.deterministic = True


def show_memory_info(hint):
    pid = os.getpid()
    p = psutil.Process(pid)

    info = p.memory_full_info()
    memory = info.uss / 1024. / 1024
    print('{} memory used: {} MB'.format(hint, memory))


# class OrderedAverageMeter(object):
#     def __init__(self):


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self, name=None, fmt=":f"):
        # self.name = name
        # self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    # def __str__(self):
    #     fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
    #     return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


# class logger():
#     def __init__(self, obj, LOG_DIR, parser):
#         logname = 'log_train' + datetime.datetime.now().strftime('%Y_%m_%d-%H_%M_%S')+'.txt'
#         self.LOG_FOUT = open(os.path.join(LOG_DIR, logname), 'w')
#         self.LOG_FOUT.write(str(parser)+'\n')
#     def __call__(self, out_str):
#          self.LOG_FOUT.write(out_str+'\n')
#          self.LOG_FOUT.flush()
#          print(out_str)

def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None, eval=False):
        if fmt is None:
            if not eval:
                fmt = "{value:.7f} (avg:{avg:.7f})"
            else:
                fmt = "{value:.7f} (avg:{avg:.7f}, std:{std:.7f})"
        self.reset(window_size)
        self.fmt = fmt

    def reset(self, window_size):
        self.deque = deque(maxlen=window_size)
        self.val = 0
        self.avg = 0
        self.total = 0
        self.count = 0

    def update(self, value, n=1):
        self.deque.append(value)
        self.val = value
        self.count += n
        self.total += value * n
        self.avg = self.total / self.count

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.val, self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.val = t[0]
        self.count = int(t[1])
        self.total = t[2]
        self.avg = self.total / self.count

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def std(self):
        return torch.tensor(list(self.deque)).std().item()

    # @property
    # def avg(self):
    #     d = torch.tensor(list(self.deque), dtype=torch.float32)
    #     return d.mean().item()

    # @property
    # def global_avg(self):
    #     return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    #
    # @property
    # def value(self):
    #     return self.deque[-1]

    def __str__(self):
        # return self.fmt.format(
        #     median=self.median,
        #     avg=self.avg,
        #     global_avg=self.global_avg,
        #     max=self.max,
        #     value=self.value)
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            max=self.max,
            value=self.val,
            std=self.std)


class MetricLogger(object):


    def __init__(self, logger=None, delimiter="\t", dist_print=0, window_size=20, eval=False):
        self.meters = defaultdict(partial(SmoothedValue, window_size=window_size, eval=eval))
        self.delimiter = delimiter
        self.dist_print = dist_print
        # self.log = get_root_logger("UDL")
        self.logger = logger
        self.ready = False


    def clear(self):
        self.clear_output()

    def clear_output(self):
        self.meters.clear()
        self.ready = False

    # {k:v}打印，对每个k都有val、avg、max、deque属性
    def update(self, n=1, **kwargs):
        # dist.barrier()
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = torch.mean(v)
                if hasattr(v, 'item'):
                    v = v.item()
            assert isinstance(v, (float, int, str)), print("type: ", type(v))
            self.meters[k].update(v, n)

    # {k:v}打印，对每个k都有val、avg、max、deque属性
    def update_dict(self, kwargs: dict, n=1):
        # dist.barrier()
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = torch.mean(v)
                if hasattr(v, 'item'):
                    v = v.item()
            assert isinstance(v, (float, int, str)), print("type: ", type(v))
            self.meters[k].update(v, n)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 1
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}MB'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        # log_string = self.logger.info
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj, i
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable):
                eta_seconds = iter_time.avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    if self.dist_print == 0:
                        log_string(log_msg.format(
                            i, len(iterable), eta=eta_string,
                            meters=str(self),
                            time=str(iter_time), data=str(data_time),
                            memory=torch.cuda.max_memory_allocated() / MB), logger=self.logger)
                        # self.logger.info(log_msg.format(
                        #     i, len(iterable), eta=eta_string,
                        #     meters=str(self),
                        #     time=str(iter_time), data=str(data_time),
                        #     memory=torch.cuda.max_memory_allocated() / MB))

                else:
                    log_string(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)), logger=self.logger)
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        if self.dist_print == 0:
            log_string('{} Total time: {} ({:.4f} s / it)'.format(
                header, total_time_str, total_time / len(iterable)), logger=self.logger)
            # self.logger.info('{} Total time: {} ({:.4f} s / it)'.format(
            #     header, total_time_str, total_time / len(iterable)))

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/runner/utils.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import random
import sys
import time
import warnings
from getpass import getuser
from socket import gethostname

import numpy as np
import torch

import mmcv


def get_host_info():
    """Get hostname and username.

    Return empty string if exception raised, e.g. ``getpass.getuser()`` will
    lead to error in docker container
    """
    host = ''
    try:
        host = f'{getuser()}@{gethostname()}'
    except Exception as e:
        warnings.warn(f'Host or user not found: {str(e)}')
    finally:
        return host


def get_time_str():
    return time.strftime('%Y%m%d_%H%M%S', time.localtime())


def obj_from_dict(info, parent=None, default_args=None):
    """Initialize an object from dict.

    The dict must contain the key "type", which indicates the object type, it
    can be either a string or type, such as "list" or ``list``. Remaining
    fields are treated as the arguments for constructing the object.

    Args:
        info (dict): Object types and arguments.
        parent (:class:`module`): Module which may containing expected object
            classes.
        default_args (dict, optional): Default arguments for initializing the
            object.

    Returns:
        any type: Object built from the dict.
    """
    assert isinstance(info, dict) and 'type' in info
    assert isinstance(default_args, dict) or default_args is None
    args = info.copy()
    obj_type = args.pop('type')
    if mmcv.is_str(obj_type):
        if parent is not None:
            obj_type = getattr(parent, obj_type)
        else:
            obj_type = sys.modules[obj_type]
    elif not isinstance(obj_type, type):
        raise TypeError('type must be a str or valid type, but '
                        f'got {type(obj_type)}')
    if default_args is not None:
        for name, value in default_args.items():
            args.setdefault(name, value)
    return obj_type(**args)


def set_random_seed(seed, deterministic=False, use_rank_shift=False):
    """Set random seed.

    Args:
        seed (int): Seed to be used.
        deterministic (bool): Whether to set the deterministic option for
            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
            to True and `torch.backends.cudnn.benchmark` to False.
            Default: False.
        rank_shift (bool): Whether to add rank number to the random seed to
            have different random seed in different threads. Default: False.
    """
    if use_rank_shift:
        rank, _ = mmcv.runner.get_dist_info()
        seed += rank
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    if deterministic:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/tensorrt/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
# flake8: noqa
from .init_plugins import is_tensorrt_plugin_loaded, load_tensorrt_plugin
from .preprocess import preprocess_onnx


def is_tensorrt_available():
    try:
        import tensorrt
        del tensorrt
        return True
    except ModuleNotFoundError:
        return False


__all__ = []

if is_tensorrt_available():
    from .tensorrt_utils import (TRTWraper, TRTWrapper, load_trt_engine,
                                 onnx2trt, save_trt_engine)

    # load tensorrt plugin lib
    load_tensorrt_plugin()

    __all__.append([
        'onnx2trt', 'save_trt_engine', 'load_trt_engine', 'TRTWraper',
        'TRTWrapper'
    ])

__all__.append(['is_tensorrt_plugin_loaded', 'preprocess_onnx'])


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/tensorrt/init_plugins.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import ctypes
import glob
import os


def get_tensorrt_op_path():
    """Get TensorRT plugins library path."""
    wildcard = os.path.join(
        os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
        '_ext_trt.*.so')

    paths = glob.glob(wildcard)
    lib_path = paths[0] if len(paths) > 0 else ''
    return lib_path


plugin_is_loaded = False


def is_tensorrt_plugin_loaded():
    """Check if TensorRT plugins library is loaded or not.

    Returns:
        bool: plugin_is_loaded flag
    """
    global plugin_is_loaded
    return plugin_is_loaded


def load_tensorrt_plugin():
    """load TensorRT plugins library."""
    global plugin_is_loaded
    lib_path = get_tensorrt_op_path()
    if (not plugin_is_loaded) and os.path.exists(lib_path):
        ctypes.CDLL(lib_path)
        plugin_is_loaded = True


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/tensorrt/preprocess.py
================================================
import numpy as np
import onnx


def preprocess_onnx(onnx_model):
    """Modify onnx model to match with TensorRT plugins in mmcv.

    There are some conflict between onnx node definition and TensorRT limit.
    This function perform preprocess on the onnx model to solve the conflicts.
    For example, onnx `attribute` is loaded in TensorRT on host and onnx
    `input` is loaded on device. The shape inference is performed on host, so
    any `input` related to shape (such as `max_output_boxes_per_class` in
    NonMaxSuppression) should be transformed to `attribute` before conversion.

    Arguments:
        onnx_model (onnx.ModelProto): Input onnx model.

    Returns:
        onnx.ModelProto: Modified onnx model.
    """
    graph = onnx_model.graph
    nodes = graph.node
    initializers = graph.initializer
    node_dict = {}
    for node in nodes:
        node_outputs = node.output
        for output in node_outputs:
            if len(output) > 0:
                node_dict[output] = node

    init_dict = {_.name: _ for _ in initializers}

    nodes_name_to_remove = set()

    def is_node_without_output(name):
        for node_name, node in node_dict.items():
            if node_name not in nodes_name_to_remove:
                if name in node.input:
                    return False
        return True

    def mark_nodes_to_remove(name):
        node = node_dict[name]
        nodes_name_to_remove.add(name)
        for input_node_name in node.input:
            if is_node_without_output(input_node_name):
                mark_nodes_to_remove(input_node_name)

    def parse_data(name, typ, default_value=0):
        if name in node_dict:
            node = node_dict[name]
            if node.op_type == 'Constant':
                raw_data = node.attribute[0].t.raw_data
            else:
                mark_nodes_to_remove(name)
                return default_value
        elif name in init_dict:
            raw_data = init_dict[name].raw_data
        else:
            raise ValueError(f'{name} not found in node or initilizer.')
        return np.frombuffer(raw_data, typ).item()

    nrof_node = len(nodes)
    for idx in range(nrof_node):
        node = nodes[idx]
        node_attributes = node.attribute
        node_inputs = node.input
        node_outputs = node.output
        node_name = node.name
        # process NonMaxSuppression node
        if node.op_type == 'NonMaxSuppression':
            center_point_box = 0
            max_output_boxes_per_class = 1000000
            iou_threshold = 0.3
            score_threshold = 0.0
            offset = 0
            for attribute in node_attributes:
                if attribute.name == 'center_point_box':
                    center_point_box = attribute.i
                elif attribute.name == 'offset':
                    offset = attribute.i

            if len(node_inputs) >= 3:
                max_output_boxes_per_class = parse_data(
                    node_inputs[2], np.int64, max_output_boxes_per_class)
                mark_nodes_to_remove(node_inputs[2])

            if len(node_inputs) >= 4:
                iou_threshold = parse_data(node_inputs[3], np.float32,
                                           iou_threshold)
                mark_nodes_to_remove(node_inputs[3])

            if len(node_inputs) >= 5:
                score_threshold = parse_data(node_inputs[4], np.float32)
                mark_nodes_to_remove(node_inputs[4])

            new_node = onnx.helper.make_node(
                'NonMaxSuppression',
                node_inputs[:2],
                node_outputs,
                name=node_name,
                center_point_box=center_point_box,
                max_output_boxes_per_class=max_output_boxes_per_class,
                iou_threshold=iou_threshold,
                score_threshold=score_threshold,
                offset=offset)

            for output in node_outputs:
                if output in node_dict:
                    node_dict[output] = new_node
            nodes.insert(idx, new_node)
            nodes.remove(node)
        elif node.op_type == 'InstanceNormalization':
            # directly change op name
            node.op_type = 'MMCVInstanceNormalization'

    for node_name in nodes_name_to_remove:
        nodes.remove(node_dict[node_name])

    return onnx_model


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/tensorrt/tensorrt_utils.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import warnings

import onnx
import tensorrt as trt
import torch

from .preprocess import preprocess_onnx


def onnx2trt(onnx_model,
             opt_shape_dict,
             log_level=trt.Logger.ERROR,
             fp16_mode=False,
             max_workspace_size=0,
             device_id=0):
    """Convert onnx model to tensorrt engine.

    Arguments:
        onnx_model (str or onnx.ModelProto): the onnx model to convert from
        opt_shape_dict (dict): the min/opt/max shape of each input
        log_level (TensorRT log level): the log level of TensorRT
        fp16_mode (bool): enable fp16 mode
        max_workspace_size (int): set max workspace size of TensorRT engine.
            some tactic and layers need large workspace.
        device_id (int): choice the device to create engine.

    Returns:
        tensorrt.ICudaEngine: the TensorRT engine created from onnx_model

    Example:
        >>> engine = onnx2trt(
        >>>             "onnx_model.onnx",
        >>>             {'input': [[1, 3, 160, 160],
        >>>                        [1, 3, 320, 320],
        >>>                        [1, 3, 640, 640]]},
        >>>             log_level=trt.Logger.WARNING,
        >>>             fp16_mode=True,
        >>>             max_workspace_size=1 << 30,
        >>>             device_id=0)
        >>>             })
    """
    device = torch.device('cuda:{}'.format(device_id))
    # create builder and network
    logger = trt.Logger(log_level)
    builder = trt.Builder(logger)
    EXPLICIT_BATCH = 1 << (int)(
        trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    network = builder.create_network(EXPLICIT_BATCH)

    # parse onnx
    parser = trt.OnnxParser(network, logger)

    if isinstance(onnx_model, str):
        onnx_model = onnx.load(onnx_model)

    onnx_model = preprocess_onnx(onnx_model)

    if not parser.parse(onnx_model.SerializeToString()):
        error_msgs = ''
        for error in range(parser.num_errors):
            error_msgs += f'{parser.get_error(error)}\n'
        raise RuntimeError(f'parse onnx failed:\n{error_msgs}')

    # config builder
    builder.max_workspace_size = max_workspace_size

    config = builder.create_builder_config()
    config.max_workspace_size = max_workspace_size
    profile = builder.create_optimization_profile()

    for input_name, param in opt_shape_dict.items():
        min_shape = tuple(param[0][:])
        opt_shape = tuple(param[1][:])
        max_shape = tuple(param[2][:])
        profile.set_shape(input_name, min_shape, opt_shape, max_shape)
    config.add_optimization_profile(profile)

    if fp16_mode:
        builder.fp16_mode = fp16_mode
        config.set_flag(trt.BuilderFlag.FP16)

    # create engine
    with torch.cuda.device(device):
        engine = builder.build_engine(network, config)

    return engine


def save_trt_engine(engine, path):
    """Serialize TensorRT engine to disk.

    Arguments:
        engine (tensorrt.ICudaEngine): TensorRT engine to serialize
        path (str): disk path to write the engine
    """
    with open(path, mode='wb') as f:
        f.write(bytearray(engine.serialize()))


def load_trt_engine(path):
    """Deserialize TensorRT engine from disk.

    Arguments:
        path (str): disk path to read the engine

    Returns:
        tensorrt.ICudaEngine: the TensorRT engine loaded from disk
    """
    with trt.Logger() as logger, trt.Runtime(logger) as runtime:
        with open(path, mode='rb') as f:
            engine_bytes = f.read()
        engine = runtime.deserialize_cuda_engine(engine_bytes)
        return engine


def torch_dtype_from_trt(dtype):
    """Convert pytorch dtype to TensorRT dtype."""
    if dtype == trt.bool:
        return torch.bool
    elif dtype == trt.int8:
        return torch.int8
    elif dtype == trt.int32:
        return torch.int32
    elif dtype == trt.float16:
        return torch.float16
    elif dtype == trt.float32:
        return torch.float32
    else:
        raise TypeError('%s is not supported by torch' % dtype)


def torch_device_from_trt(device):
    """Convert pytorch device to TensorRT device."""
    if device == trt.TensorLocation.DEVICE:
        return torch.device('cuda')
    elif device == trt.TensorLocation.HOST:
        return torch.device('cpu')
    else:
        return TypeError('%s is not supported by torch' % device)


class TRTWrapper(torch.nn.Module):
    """TensorRT engine Wrapper.

    Arguments:
        engine (tensorrt.ICudaEngine): TensorRT engine to wrap
        input_names (list[str]): names of each inputs
        output_names (list[str]): names of each outputs

    Note:
        If the engine is converted from onnx model. The input_names and
        output_names should be the same as onnx model.
    """

    def __init__(self, engine, input_names=None, output_names=None):
        super(TRTWrapper, self).__init__()
        self.engine = engine
        if isinstance(self.engine, str):
            self.engine = load_trt_engine(engine)

        if not isinstance(self.engine, trt.ICudaEngine):
            raise TypeError('engine should be str or trt.ICudaEngine')

        self._register_state_dict_hook(TRTWrapper._on_state_dict)
        self.context = self.engine.create_execution_context()

        # get input and output names from engine
        if input_names is None or output_names is None:
            names = [_ for _ in self.engine]
            input_names = list(filter(self.engine.binding_is_input, names))
            output_names = list(set(names) - set(input_names))
        self.input_names = input_names
        self.output_names = output_names

    def _on_state_dict(self, state_dict, prefix, local_metadata):
        state_dict[prefix + 'engine'] = bytearray(self.engine.serialize())
        state_dict[prefix + 'input_names'] = self.input_names
        state_dict[prefix + 'output_names'] = self.output_names

    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
                              missing_keys, unexpected_keys, error_msgs):
        engine_bytes = state_dict[prefix + 'engine']

        with trt.Logger() as logger, trt.Runtime(logger) as runtime:
            self.engine = runtime.deserialize_cuda_engine(engine_bytes)
            self.context = self.engine.create_execution_context()

        self.input_names = state_dict[prefix + 'input_names']
        self.output_names = state_dict[prefix + 'output_names']

    def forward(self, inputs):
        """
        Arguments:
            inputs (dict): dict of input name-tensors pair

        Return:
            dict: dict of output name-tensors pair
        """
        assert self.input_names is not None
        assert self.output_names is not None
        bindings = [None] * (len(self.input_names) + len(self.output_names))

        for input_name, input_tensor in inputs.items():
            idx = self.engine.get_binding_index(input_name)

            if input_tensor.dtype == torch.long:
                input_tensor = input_tensor.int()
            self.context.set_binding_shape(idx, tuple(input_tensor.shape))
            bindings[idx] = input_tensor.contiguous().data_ptr()

        # create output tensors
        outputs = {}
        for i, output_name in enumerate(self.output_names):
            idx = self.engine.get_binding_index(output_name)
            dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))
            shape = tuple(self.context.get_binding_shape(idx))

            device = torch_device_from_trt(self.engine.get_location(idx))
            output = torch.empty(size=shape, dtype=dtype, device=device)
            outputs[output_name] = output
            bindings[idx] = output.data_ptr()

        self.context.execute_async_v2(bindings,
                                      torch.cuda.current_stream().cuda_stream)

        return outputs


class TRTWraper(TRTWrapper):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        warnings.warn(
            'TRTWraper will be deprecated in'
            ' future. Please use TRTWrapper instead', DeprecationWarning)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/__init__.py
================================================
# flake8: noqa
# Copyright (c) OpenMMLab. All rights reserved.
from .config import Config, ConfigDict, DictAction
from .misc import (check_prerequisites, concat_list, deprecated_api_warning,
                   has_method, import_modules_from_strings, is_list_of,
                   is_method_overridden, is_seq_of, is_str, is_tuple_of,
                   iter_cast, list_cast, requires_executable, requires_package,
                   slice_list, to_1tuple, to_2tuple, to_3tuple, to_4tuple,
                   to_ntuple, tuple_cast)
from .path import (check_file_exist, fopen, is_filepath, mkdir_or_exist,
                   scandir, symlink)
from .progressbar import (ProgressBar, track_iter_progress,
                          track_parallel_progress, track_progress)
from .testing import (assert_attrs_equal, assert_dict_contains_subset,
                      assert_dict_has_keys, assert_is_norm_layer,
                      assert_keys_equal, assert_params_all_zeros,
                      check_python_script)
from .timer import Timer, TimerError, check_time
from .version_utils import digit_version, get_git_hash

try:
    import torch
except ImportError:
    __all__ = [
        'Config', 'ConfigDict', 'DictAction', 'is_str', 'iter_cast',
        'list_cast', 'tuple_cast', 'is_seq_of', 'is_list_of', 'is_tuple_of',
        'slice_list', 'concat_list', 'check_prerequisites', 'requires_package',
        'requires_executable', 'is_filepath', 'fopen', 'check_file_exist',
        'mkdir_or_exist', 'symlink', 'scandir', 'ProgressBar',
        'track_progress', 'track_iter_progress', 'track_parallel_progress',
        'Timer', 'TimerError', 'check_time', 'deprecated_api_warning',
        'digit_version', 'get_git_hash', 'import_modules_from_strings',
        'assert_dict_contains_subset', 'assert_attrs_equal',
        'assert_dict_has_keys', 'assert_keys_equal', 'check_python_script',
        'to_1tuple', 'to_2tuple', 'to_3tuple', 'to_4tuple', 'to_ntuple',
        'is_method_overridden', 'has_method'
    ]
else:
    from .env import collect_env
    from .logging import get_logger, print_log
    from .parrots_jit import jit, skip_no_elena
    from .parrots_wrapper import (
        TORCH_VERSION, BuildExtension, CppExtension, CUDAExtension, DataLoader,
        PoolDataLoader, SyncBatchNorm, _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd,
        _AvgPoolNd, _BatchNorm, _ConvNd, _ConvTransposeMixin, _InstanceNorm,
        _MaxPoolNd, get_build_config, is_rocm_pytorch, _get_cuda_home)
    from .registry import Registry, build_from_cfg
    from .trace import is_jit_tracing
    from .hub import load_url
    __all__ = [
        'Config', 'ConfigDict', 'DictAction', 'collect_env', 'get_logger',
        'print_log', 'is_str', 'iter_cast', 'list_cast', 'tuple_cast',
        'is_seq_of', 'is_list_of', 'is_tuple_of', 'slice_list', 'concat_list',
        'check_prerequisites', 'requires_package', 'requires_executable',
        'is_filepath', 'fopen', 'check_file_exist', 'mkdir_or_exist',
        'symlink', 'scandir', 'ProgressBar', 'track_progress',
        'track_iter_progress', 'track_parallel_progress', 'Registry',
        'build_from_cfg', 'Timer', 'TimerError', 'check_time', 'SyncBatchNorm',
        '_AdaptiveAvgPoolNd', '_AdaptiveMaxPoolNd', '_AvgPoolNd', '_BatchNorm',
        '_ConvNd', '_ConvTransposeMixin', '_InstanceNorm', '_MaxPoolNd',
        'get_build_config', 'BuildExtension', 'CppExtension', 'CUDAExtension',
        'DataLoader', 'PoolDataLoader', 'TORCH_VERSION',
        'deprecated_api_warning', 'digit_version', 'get_git_hash',
        'import_modules_from_strings', 'jit', 'skip_no_elena',
        'assert_dict_contains_subset', 'assert_attrs_equal',
        'assert_dict_has_keys', 'assert_keys_equal', 'assert_is_norm_layer',
        'assert_params_all_zeros', 'check_python_script',
        'is_method_overridden', 'is_jit_tracing', 'is_rocm_pytorch',
        '_get_cuda_home', 'load_url', 'has_method'
    ]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/config.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import ast
import copy
import os
import os.path as osp
import platform
import shutil
import sys
import tempfile
import uuid
import warnings
from argparse import Action, ArgumentParser
from collections import abc
from importlib import import_module

from addict import Dict
from yapf.yapflib.yapf_api import FormatCode

from .misc import import_modules_from_strings
from .path import check_file_exist

if platform.system() == 'Windows':
    import regex as re
else:
    import re

BASE_KEY = '_base_'
DELETE_KEY = '_delete_'
DEPRECATION_KEY = '_deprecation_'
RESERVED_KEYS = ['filename', 'text', 'pretty_text']


class ConfigDict(Dict):

    def __missing__(self, name):
        raise KeyError(name)

    def __getattr__(self, name):
        try:
            value = super(ConfigDict, self).__getattr__(name)
        except KeyError:
            ex = AttributeError(f"'{self.__class__.__name__}' object has no "
                                f"attribute '{name}'")
        except Exception as e:
            ex = e
        else:
            return value
        raise ex


def add_args(parser, cfg, prefix=''):
    for k, v in cfg.items():
        if isinstance(v, str):
            parser.add_argument('--' + prefix + k)
        elif isinstance(v, int):
            parser.add_argument('--' + prefix + k, type=int)
        elif isinstance(v, float):
            parser.add_argument('--' + prefix + k, type=float)
        elif isinstance(v, bool):
            parser.add_argument('--' + prefix + k, action='store_true')
        elif isinstance(v, dict):
            add_args(parser, v, prefix + k + '.')
        elif isinstance(v, abc.Iterable):
            parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+')
        else:
            print(f'cannot parse key {prefix + k} of type {type(v)}')
    return parser


class Config:
    """A facility for config and config files.

    It supports common file formats as configs: python/json/yaml. The interface
    is the same as a dict object and also allows access config values as
    attributes.

    Example:
        >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1])))
        >>> cfg.a
        1
        >>> cfg.b
        {'b1': [0, 1]}
        >>> cfg.b.b1
        [0, 1]
        >>> cfg = Config.fromfile('tests/data/config/a.py')
        >>> cfg.filename
        "/home/kchen/projects/mmcv/tests/data/config/a.py"
        >>> cfg.item4
        'test'
        >>> cfg
        "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: "
        "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}"
    """

    @staticmethod
    def _validate_py_syntax(filename):
        with open(filename, 'r', encoding='utf-8') as f:
            # Setting encoding explicitly to resolve coding issue on windows
            content = f.read()
        try:
            ast.parse(content)
        except SyntaxError as e:
            raise SyntaxError('There are syntax errors in config '
                              f'file {filename}: {e}')

    @staticmethod
    def _substitute_predefined_vars(filename, temp_config_name):
        file_dirname = osp.dirname(filename)
        file_basename = osp.basename(filename)
        file_basename_no_extension = osp.splitext(file_basename)[0]
        file_extname = osp.splitext(filename)[1]
        support_templates = dict(
            fileDirname=file_dirname,
            fileBasename=file_basename,
            fileBasenameNoExtension=file_basename_no_extension,
            fileExtname=file_extname)
        with open(filename, 'r', encoding='utf-8') as f:
            # Setting encoding explicitly to resolve coding issue on windows
            config_file = f.read()
        for key, value in support_templates.items():
            regexp = r'\{\{\s*' + str(key) + r'\s*\}\}'
            value = value.replace('\\', '/')
            config_file = re.sub(regexp, value, config_file)
        with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file:
            tmp_config_file.write(config_file)

    @staticmethod
    def _pre_substitute_base_vars(filename, temp_config_name):
        """Substitute base variable placehoders to string, so that parsing
        would work."""
        with open(filename, 'r', encoding='utf-8') as f:
            # Setting encoding explicitly to resolve coding issue on windows
            config_file = f.read()
        base_var_dict = {}
        regexp = r'\{\{\s*' + BASE_KEY + r'\.([\w\.]+)\s*\}\}'
        base_vars = set(re.findall(regexp, config_file))
        for base_var in base_vars:
            randstr = f'_{base_var}_{uuid.uuid4().hex.lower()[:6]}'
            base_var_dict[randstr] = base_var
            regexp = r'\{\{\s*' + BASE_KEY + r'\.' + base_var + r'\s*\}\}'
            config_file = re.sub(regexp, f'"{randstr}"', config_file)
        with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file:
            tmp_config_file.write(config_file)
        return base_var_dict

    @staticmethod
    def _substitute_base_vars(cfg, base_var_dict, base_cfg):
        """Substitute variable strings to their actual values."""
        cfg = copy.deepcopy(cfg)

        if isinstance(cfg, dict):
            for k, v in cfg.items():
                if isinstance(v, str) and v in base_var_dict:
                    new_v = base_cfg
                    for new_k in base_var_dict[v].split('.'):
                        new_v = new_v[new_k]
                    cfg[k] = new_v
                elif isinstance(v, (list, tuple, dict)):
                    cfg[k] = Config._substitute_base_vars(
                        v, base_var_dict, base_cfg)
        elif isinstance(cfg, tuple):
            cfg = tuple(
                Config._substitute_base_vars(c, base_var_dict, base_cfg)
                for c in cfg)
        elif isinstance(cfg, list):
            cfg = [
                Config._substitute_base_vars(c, base_var_dict, base_cfg)
                for c in cfg
            ]
        elif isinstance(cfg, str) and cfg in base_var_dict:
            new_v = base_cfg
            for new_k in base_var_dict[cfg].split('.'):
                new_v = new_v[new_k]
            cfg = new_v

        return cfg

    @staticmethod
    def _file2dict(filename, use_predefined_variables=True):
        filename = osp.abspath(osp.expanduser(filename))
        check_file_exist(filename)
        fileExtname = osp.splitext(filename)[1]
        if fileExtname not in ['.py', '.json', '.yaml', '.yml']:
            raise IOError('Only py/yml/yaml/json type are supported now!')

        with tempfile.TemporaryDirectory() as temp_config_dir:
            temp_config_file = tempfile.NamedTemporaryFile(
                dir=temp_config_dir, suffix=fileExtname)
            if platform.system() == 'Windows':
                temp_config_file.close()
            temp_config_name = osp.basename(temp_config_file.name)
            # Substitute predefined variables
            if use_predefined_variables:
                Config._substitute_predefined_vars(filename,
                                                   temp_config_file.name)
            else:
                shutil.copyfile(filename, temp_config_file.name)
            # Substitute base variables from placeholders to strings
            base_var_dict = Config._pre_substitute_base_vars(
                temp_config_file.name, temp_config_file.name)

            if filename.endswith('.py'):
                temp_module_name = osp.splitext(temp_config_name)[0]
                sys.path.insert(0, temp_config_dir)
                Config._validate_py_syntax(filename)
                mod = import_module(temp_module_name)
                sys.path.pop(0)
                cfg_dict = {
                    name: value
                    for name, value in mod.__dict__.items()
                    if not name.startswith('__')
                }
                # delete imported module
                del sys.modules[temp_module_name]
            elif filename.endswith(('.yml', '.yaml', '.json')):
                import mmcv
                cfg_dict = mmcv.load(temp_config_file.name)
            # close temp file
            temp_config_file.close()

        # check deprecation information
        if DEPRECATION_KEY in cfg_dict:
            deprecation_info = cfg_dict.pop(DEPRECATION_KEY)
            warning_msg = f'The config file {filename} will be deprecated ' \
                'in the future.'
            if 'expected' in deprecation_info:
                warning_msg += f' Please use {deprecation_info["expected"]} ' \
                    'instead.'
            if 'reference' in deprecation_info:
                warning_msg += ' More information can be found at ' \
                    f'{deprecation_info["reference"]}'
            warnings.warn(warning_msg, DeprecationWarning)

        cfg_text = filename + '\n'
        with open(filename, 'r', encoding='utf-8') as f:
            # Setting encoding explicitly to resolve coding issue on windows
            cfg_text += f.read()

        if BASE_KEY in cfg_dict:
            cfg_dir = osp.dirname(filename)
            base_filename = cfg_dict.pop(BASE_KEY)
            base_filename = base_filename if isinstance(
                base_filename, list) else [base_filename]

            cfg_dict_list = list()
            cfg_text_list = list()
            for f in base_filename:
                _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f))
                cfg_dict_list.append(_cfg_dict)
                cfg_text_list.append(_cfg_text)

            base_cfg_dict = dict()
            for c in cfg_dict_list:
                duplicate_keys = base_cfg_dict.keys() & c.keys()
                if len(duplicate_keys) > 0:
                    raise KeyError('Duplicate key is not allowed among bases. '
                                   f'Duplicate keys: {duplicate_keys}')
                base_cfg_dict.update(c)

            # Substitute base variables from strings to their actual values
            cfg_dict = Config._substitute_base_vars(cfg_dict, base_var_dict,
                                                    base_cfg_dict)

            base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict)
            cfg_dict = base_cfg_dict

            # merge cfg_text
            cfg_text_list.append(cfg_text)
            cfg_text = '\n'.join(cfg_text_list)

        return cfg_dict, cfg_text

    @staticmethod
    def _merge_a_into_b(a, b, allow_list_keys=False):
        """merge dict ``a`` into dict ``b`` (non-inplace).

        Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid
        in-place modifications.

        Args:
            a (dict): The source dict to be merged into ``b``.
            b (dict): The origin dict to be fetch keys from ``a``.
            allow_list_keys (bool): If True, int string keys (e.g. '0', '1')
              are allowed in source ``a`` and will replace the element of the
              corresponding index in b if b is a list. Default: False.

        Returns:
            dict: The modified dict of ``b`` using ``a``.

        Examples:
            # Normally merge a into b.
            >>> Config._merge_a_into_b(
            ...     dict(obj=dict(a=2)), dict(obj=dict(a=1)))
            {'obj': {'a': 2}}

            # Delete b first and merge a into b.
            >>> Config._merge_a_into_b(
            ...     dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1)))
            {'obj': {'a': 2}}

            # b is a list
            >>> Config._merge_a_into_b(
            ...     {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True)
            [{'a': 2}, {'b': 2}]
        """
        b = b.copy()
        for k, v in a.items():
            if allow_list_keys and k.isdigit() and isinstance(b, list):
                k = int(k)
                if len(b) <= k:
                    raise KeyError(f'Index {k} exceeds the length of list {b}')
                b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys)
            elif isinstance(v, dict):
                if k in b and not v.pop(DELETE_KEY, False):
                    allowed_types = (dict, list) if allow_list_keys else dict
                    if not isinstance(b[k], allowed_types):
                        raise TypeError(
                            f'{k}={v} in child config cannot inherit from '
                            f'base because {k} is a dict in the child config '
                            f'but is of type {type(b[k])} in base config. '
                            f'You may set `{DELETE_KEY}=True` to ignore the '
                            f'base config.')
                    b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys)
                else:
                    b[k] = ConfigDict(v)
            else:
                b[k] = v
        return b

    @staticmethod
    def fromfile(filename,
                 use_predefined_variables=True,
                 import_custom_modules=True):
        cfg_dict, cfg_text = Config._file2dict(filename,
                                               use_predefined_variables)
        if import_custom_modules and cfg_dict.get('custom_imports', None):
            import_modules_from_strings(**cfg_dict['custom_imports'])
        return Config(cfg_dict, cfg_text=cfg_text, filename=filename)

    @staticmethod
    def fromstring(cfg_str, file_format):
        """Generate config from config str.

        Args:
            cfg_str (str): Config str.
            file_format (str): Config file format corresponding to the
               config str. Only py/yml/yaml/json type are supported now!

        Returns:
            :obj:`Config`: Config obj.
        """
        if file_format not in ['.py', '.json', '.yaml', '.yml']:
            raise IOError('Only py/yml/yaml/json type are supported now!')
        if file_format != '.py' and 'dict(' in cfg_str:
            # check if users specify a wrong suffix for python
            warnings.warn(
                'Please check "file_format", the file format may be .py')
        with tempfile.NamedTemporaryFile(
                'w', encoding='utf-8', suffix=file_format,
                delete=False) as temp_file:
            temp_file.write(cfg_str)
            # on windows, previous implementation cause error
            # see PR 1077 for details
        cfg = Config.fromfile(temp_file.name)
        os.remove(temp_file.name)
        return cfg

    @staticmethod
    def auto_argparser(description=None):
        """Generate argparser from config file automatically (experimental)"""
        partial_parser = ArgumentParser(description=description)
        partial_parser.add_argument('config', help='config file path')
        cfg_file = partial_parser.parse_known_args()[0].config
        cfg = Config.fromfile(cfg_file)
        parser = ArgumentParser(description=description)
        parser.add_argument('config', help='config file path')
        add_args(parser, cfg)
        return parser, cfg

    def __init__(self, cfg_dict=None, cfg_text=None, filename=None):
        if cfg_dict is None:
            cfg_dict = dict()
        elif not isinstance(cfg_dict, dict):
            raise TypeError('cfg_dict must be a dict, but '
                            f'got {type(cfg_dict)}')
        for key in cfg_dict:
            if key in RESERVED_KEYS:
                raise KeyError(f'{key} is reserved for config file')

        super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict))
        super(Config, self).__setattr__('_filename', filename)
        if cfg_text:
            text = cfg_text
        elif filename:
            with open(filename, 'r') as f:
                text = f.read()
        else:
            text = ''
        super(Config, self).__setattr__('_text', text)

    @property
    def filename(self):
        return self._filename

    @property
    def text(self):
        return self._text

    @property
    def pretty_text(self):

        indent = 4

        def _indent(s_, num_spaces):
            s = s_.split('\n')
            if len(s) == 1:
                return s_
            first = s.pop(0)
            s = [(num_spaces * ' ') + line for line in s]
            s = '\n'.join(s)
            s = first + '\n' + s
            return s

        def _format_basic_types(k, v, use_mapping=False):
            if isinstance(v, str):
                v_str = f"'{v}'"
            else:
                v_str = str(v)

            if use_mapping:
                k_str = f"'{k}'" if isinstance(k, str) else str(k)
                attr_str = f'{k_str}: {v_str}'
            else:
                attr_str = f'{str(k)}={v_str}'
            attr_str = _indent(attr_str, indent)

            return attr_str

        def _format_list(k, v, use_mapping=False):
            # check if all items in the list are dict
            if all(isinstance(_, dict) for _ in v):
                v_str = '[\n'
                v_str += '\n'.join(
                    f'dict({_indent(_format_dict(v_), indent)}),'
                    for v_ in v).rstrip(',')
                if use_mapping:
                    k_str = f"'{k}'" if isinstance(k, str) else str(k)
                    attr_str = f'{k_str}: {v_str}'
                else:
                    attr_str = f'{str(k)}={v_str}'
                attr_str = _indent(attr_str, indent) + ']'
            else:
                attr_str = _format_basic_types(k, v, use_mapping)
            return attr_str

        def _contain_invalid_identifier(dict_str):
            contain_invalid_identifier = False
            for key_name in dict_str:
                contain_invalid_identifier |= \
                    (not str(key_name).isidentifier())
            return contain_invalid_identifier

        def _format_dict(input_dict, outest_level=False):
            r = ''
            s = []

            use_mapping = _contain_invalid_identifier(input_dict)
            if use_mapping:
                r += '{'
            for idx, (k, v) in enumerate(input_dict.items()):
                is_last = idx >= len(input_dict) - 1
                end = '' if outest_level or is_last else ','
                if isinstance(v, dict):
                    v_str = '\n' + _format_dict(v)
                    if use_mapping:
                        k_str = f"'{k}'" if isinstance(k, str) else str(k)
                        attr_str = f'{k_str}: dict({v_str}'
                    else:
                        attr_str = f'{str(k)}=dict({v_str}'
                    attr_str = _indent(attr_str, indent) + ')' + end
                elif isinstance(v, list):
                    attr_str = _format_list(k, v, use_mapping) + end
                else:
                    attr_str = _format_basic_types(k, v, use_mapping) + end

                s.append(attr_str)
            r += '\n'.join(s)
            if use_mapping:
                r += '}'
            return r

        cfg_dict = self._cfg_dict.to_dict()
        text = _format_dict(cfg_dict, outest_level=True)
        # copied from setup.cfg
        yapf_style = dict(
            based_on_style='pep8',
            blank_line_before_nested_class_or_def=True,
            split_before_expression_after_opening_paren=True)
        text, _ = FormatCode(text, style_config=yapf_style, verify=True)

        return text

    def __repr__(self):
        return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}'

    def __len__(self):
        return len(self._cfg_dict)

    def __getattr__(self, name):
        return getattr(self._cfg_dict, name)

    def __getitem__(self, name):
        return self._cfg_dict.__getitem__(name)

    def __setattr__(self, name, value):
        if isinstance(value, dict):
            value = ConfigDict(value)
        self._cfg_dict.__setattr__(name, value)

    def __setitem__(self, name, value):
        if isinstance(value, dict):
            value = ConfigDict(value)
        self._cfg_dict.__setitem__(name, value)

    def __iter__(self):
        return iter(self._cfg_dict)

    def __getstate__(self):
        return (self._cfg_dict, self._filename, self._text)

    def __setstate__(self, state):
        _cfg_dict, _filename, _text = state
        super(Config, self).__setattr__('_cfg_dict', _cfg_dict)
        super(Config, self).__setattr__('_filename', _filename)
        super(Config, self).__setattr__('_text', _text)

    def dump(self, file=None):
        cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict()
        if self.filename.endswith('.py'):
            if file is None:
                return self.pretty_text
            else:
                with open(file, 'w', encoding='utf-8') as f:
                    f.write(self.pretty_text)
        else:
            import mmcv
            if file is None:
                file_format = self.filename.split('.')[-1]
                return mmcv.dump(cfg_dict, file_format=file_format)
            else:
                mmcv.dump(cfg_dict, file)

    def merge_from_dict(self, options, allow_list_keys=True):
        """Merge list into cfg_dict.

        Merge the dict parsed by MultipleKVAction into this cfg.

        Examples:
            >>> options = {'model.backbone.depth': 50,
            ...            'model.backbone.with_cp':True}
            >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet'))))
            >>> cfg.merge_from_dict(options)
            >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
            >>> assert cfg_dict == dict(
            ...     model=dict(backbone=dict(depth=50, with_cp=True)))

            >>> # Merge list element
            >>> cfg = Config(dict(pipeline=[
            ...     dict(type='LoadImage'), dict(type='LoadAnnotations')]))
            >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')})
            >>> cfg.merge_from_dict(options, allow_list_keys=True)
            >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
            >>> assert cfg_dict == dict(pipeline=[
            ...     dict(type='SelfLoadImage'), dict(type='LoadAnnotations')])

        Args:
            options (dict): dict of configs to merge from.
            allow_list_keys (bool): If True, int string keys (e.g. '0', '1')
              are allowed in ``options`` and will replace the element of the
              corresponding index in the config if the config is a list.
              Default: True.
        """
        option_cfg_dict = {}
        for full_key, v in options.items():
            d = option_cfg_dict
            key_list = full_key.split('.')
            for subkey in key_list[:-1]:
                d.setdefault(subkey, ConfigDict())
                d = d[subkey]
            subkey = key_list[-1]
            d[subkey] = v

        cfg_dict = super(Config, self).__getattribute__('_cfg_dict')
        super(Config, self).__setattr__(
            '_cfg_dict',
            Config._merge_a_into_b(
                option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys))


class DictAction(Action):
    """
    argparse action to split an argument into KEY=VALUE form
    on the first = and append to a dictionary. List options can
    be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit
    brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build
    list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]'
    """

    @staticmethod
    def _parse_int_float_bool(val):
        try:
            return int(val)
        except ValueError:
            pass
        try:
            return float(val)
        except ValueError:
            pass
        if val.lower() in ['true', 'false']:
            return True if val.lower() == 'true' else False
        return val

    @staticmethod
    def _parse_iterable(val):
        """Parse iterable values in the string.

        All elements inside '()' or '[]' are treated as iterable values.

        Args:
            val (str): Value string.

        Returns:
            list | tuple: The expanded list or tuple from the string.

        Examples:
            >>> DictAction._parse_iterable('1,2,3')
            [1, 2, 3]
            >>> DictAction._parse_iterable('[a, b, c]')
            ['a', 'b', 'c']
            >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]')
            [(1, 2, 3), ['a', 'b'], 'c']
        """

        def find_next_comma(string):
            """Find the position of next comma in the string.

            If no ',' is found in the string, return the string length. All
            chars inside '()' and '[]' are treated as one element and thus ','
            inside these brackets are ignored.
            """
            assert (string.count('(') == string.count(')')) and (
                    string.count('[') == string.count(']')), \
                f'Imbalanced brackets exist in {string}'
            end = len(string)
            for idx, char in enumerate(string):
                pre = string[:idx]
                # The string before this ',' is balanced
                if ((char == ',') and (pre.count('(') == pre.count(')'))
                        and (pre.count('[') == pre.count(']'))):
                    end = idx
                    break
            return end

        # Strip ' and " characters and replace whitespace.
        val = val.strip('\'\"').replace(' ', '')
        is_tuple = False
        if val.startswith('(') and val.endswith(')'):
            is_tuple = True
            val = val[1:-1]
        elif val.startswith('[') and val.endswith(']'):
            val = val[1:-1]
        elif ',' not in val:
            # val is a single value
            return DictAction._parse_int_float_bool(val)

        values = []
        while len(val) > 0:
            comma_idx = find_next_comma(val)
            element = DictAction._parse_iterable(val[:comma_idx])
            values.append(element)
            val = val[comma_idx + 1:]
        if is_tuple:
            values = tuple(values)
        return values

    def __call__(self, parser, namespace, values, option_string=None):
        options = {}
        for kv in values:
            key, val = kv.split('=', maxsplit=1)
            options[key] = self._parse_iterable(val)
        setattr(namespace, self.dest, options)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/env.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
"""This file holding some environment constant for sharing by other files."""

import os.path as osp
import subprocess
import sys
from collections import defaultdict

import cv2
import torch

import mmcv
from .parrots_wrapper import get_build_config


def collect_env():
    """Collect the information of the running environments.

    Returns:
        dict: The environment information. The following fields are contained.

            - sys.platform: The variable of ``sys.platform``.
            - Python: Python version.
            - CUDA available: Bool, indicating if CUDA is available.
            - GPU devices: Device type of each GPU.
            - CUDA_HOME (optional): The env var ``CUDA_HOME``.
            - NVCC (optional): NVCC version.
            - GCC: GCC version, "n/a" if GCC is not installed.
            - PyTorch: PyTorch version.
            - PyTorch compiling details: The output of \
                ``torch.__config__.show()``.
            - TorchVision (optional): TorchVision version.
            - OpenCV: OpenCV version.
            - MMCV: MMCV version.
            - MMCV Compiler: The GCC version for compiling MMCV ops.
            - MMCV CUDA Compiler: The CUDA version for compiling MMCV ops.
    """
    env_info = {}
    env_info['sys.platform'] = sys.platform
    env_info['Python'] = sys.version.replace('\n', '')

    cuda_available = torch.cuda.is_available()
    env_info['CUDA available'] = cuda_available

    if cuda_available:
        devices = defaultdict(list)
        for k in range(torch.cuda.device_count()):
            devices[torch.cuda.get_device_name(k)].append(str(k))
        for name, device_ids in devices.items():
            env_info['GPU ' + ','.join(device_ids)] = name

        from mmcv.utils.parrots_wrapper import _get_cuda_home
        CUDA_HOME = _get_cuda_home()
        env_info['CUDA_HOME'] = CUDA_HOME

        if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
            try:
                nvcc = osp.join(CUDA_HOME, 'bin/nvcc')
                nvcc = subprocess.check_output(
                    f'"{nvcc}" -V | tail -n1', shell=True)
                nvcc = nvcc.decode('utf-8').strip()
            except subprocess.SubprocessError:
                nvcc = 'Not Available'
            env_info['NVCC'] = nvcc

    try:
        gcc = subprocess.check_output('gcc --version | head -n1', shell=True)
        gcc = gcc.decode('utf-8').strip()
        env_info['GCC'] = gcc
    except subprocess.CalledProcessError:  # gcc is unavailable
        env_info['GCC'] = 'n/a'

    env_info['PyTorch'] = torch.__version__
    env_info['PyTorch compiling details'] = get_build_config()

    try:
        import torchvision
        env_info['TorchVision'] = torchvision.__version__
    except ModuleNotFoundError:
        pass

    env_info['OpenCV'] = cv2.__version__

    env_info['MMCV'] = mmcv.__version__

    try:
        from mmcv.ops import get_compiler_version, get_compiling_cuda_version
    except ModuleNotFoundError:
        env_info['MMCV Compiler'] = 'n/a'
        env_info['MMCV CUDA Compiler'] = 'n/a'
    else:
        env_info['MMCV Compiler'] = get_compiler_version()
        env_info['MMCV CUDA Compiler'] = get_compiling_cuda_version()

    return env_info


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/ext_loader.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import importlib
import os
import pkgutil
import warnings
from collections import namedtuple

import torch

if torch.__version__ != 'parrots':

    def load_ext(name, funcs):
        ext = importlib.import_module('mmcv.' + name)
        for fun in funcs:
            assert hasattr(ext, fun), f'{fun} miss in module {name}'
        return ext
else:
    from parrots import extension
    from parrots.base import ParrotsException

    has_return_value_ops = [
        'nms',
        'softnms',
        'nms_match',
        'nms_rotated',
        'top_pool_forward',
        'top_pool_backward',
        'bottom_pool_forward',
        'bottom_pool_backward',
        'left_pool_forward',
        'left_pool_backward',
        'right_pool_forward',
        'right_pool_backward',
        'fused_bias_leakyrelu',
        'upfirdn2d',
        'ms_deform_attn_forward',
        'pixel_group',
        'contour_expand',
    ]

    def get_fake_func(name, e):

        def fake_func(*args, **kwargs):
            warnings.warn(f'{name} is not supported in parrots now')
            raise e

        return fake_func

    def load_ext(name, funcs):
        ExtModule = namedtuple('ExtModule', funcs)
        ext_list = []
        lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
        for fun in funcs:
            try:
                ext_fun = extension.load(fun, name, lib_dir=lib_root)
            except ParrotsException as e:
                if 'No element registered' not in e.message:
                    warnings.warn(e.message)
                ext_fun = get_fake_func(fun, e)
                ext_list.append(ext_fun)
            else:
                if fun in has_return_value_ops:
                    ext_list.append(ext_fun.op)
                else:
                    ext_list.append(ext_fun.op_)
        return ExtModule(*ext_list)


def check_ops_exist():
    ext_loader = pkgutil.find_loader('mmcv._ext')
    return ext_loader is not None


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/hub.py
================================================
# The 1.6 release of PyTorch switched torch.save to use a new zipfile-based
# file format. It will cause RuntimeError when a checkpoint was saved in
# torch >= 1.6.0 but loaded in torch < 1.7.0.
# More details at https://github.com/open-mmlab/mmpose/issues/904
from .parrots_wrapper import TORCH_VERSION
from .path import mkdir_or_exist
from .version_utils import digit_version

if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) < digit_version(
        '1.7.0'):
    # Modified from https://github.com/pytorch/pytorch/blob/master/torch/hub.py
    import os
    import torch
    import warnings
    from urllib.parse import urlparse
    import sys
    import zipfile
    from torch.hub import download_url_to_file, _get_torch_home, HASH_REGEX

    # Hub used to support automatically extracts from zipfile manually
    # compressed by users. The legacy zip format expects only one file from
    # torch.save() < 1.6 in the zip. We should remove this support since
    # zipfile is now default zipfile format for torch.save().
    def _is_legacy_zip_format(filename):
        if zipfile.is_zipfile(filename):
            infolist = zipfile.ZipFile(filename).infolist()
            return len(infolist) == 1 and not infolist[0].is_dir()
        return False

    def _legacy_zip_load(filename, model_dir, map_location):
        warnings.warn(
            'Falling back to the old format < 1.6. This support will'
            ' be deprecated in favor of default zipfile format '
            'introduced in 1.6. Please redo torch.save() to save it '
            'in the new zipfile format.', DeprecationWarning)
        # Note: extractall() defaults to overwrite file if exists. No need to
        #       clean up beforehand. We deliberately don't handle tarfile here
        #       since our legacy serialization format was in tar.
        #       E.g. resnet18-5c106cde.pth which is widely used.
        with zipfile.ZipFile(filename) as f:
            members = f.infolist()
            if len(members) != 1:
                raise RuntimeError(
                    'Only one file(not dir) is allowed in the zipfile')
            f.extractall(model_dir)
            extraced_name = members[0].filename
            extracted_file = os.path.join(model_dir, extraced_name)
        return torch.load(extracted_file, map_location=map_location)

    def load_url(url,
                 model_dir=None,
                 map_location=None,
                 progress=True,
                 check_hash=False,
                 file_name=None):
        r"""Loads the Torch serialized object at the given URL.

        If downloaded file is a zip file, it will be automatically decompressed

        If the object is already present in `model_dir`, it's deserialized and
        returned.
        The default value of ``model_dir`` is ``<hub_dir>/checkpoints`` where
        ``hub_dir`` is the directory returned by :func:`~torch.hub.get_dir`.

        Args:
            url (str): URL of the object to download
            model_dir (str, optional): directory in which to save the object
            map_location (optional): a function or a dict specifying how to
                remap storage locations (see torch.load)
            progress (bool, optional): whether or not to display a progress bar
                to stderr. Default: True
            check_hash(bool, optional): If True, the filename part of the URL
                should follow the naming convention ``filename-<sha256>.ext``
                where ``<sha256>`` is the first eight or more digits of the
                SHA256 hash of the contents of the file. The hash is used to
                ensure unique names and to verify the contents of the file.
                Default: False
            file_name (str, optional): name for the downloaded file. Filename
                from ``url`` will be used if not set. Default: None.

        Example:
            >>> url = ('https://s3.amazonaws.com/pytorch/models/resnet18-5c106'
            ...        'cde.pth')
            >>> state_dict = torch.hub.load_state_dict_from_url(url)
        """
        # Issue warning to move data if old env is set
        if os.getenv('TORCH_MODEL_ZOO'):
            warnings.warn(
                'TORCH_MODEL_ZOO is deprecated, please use env '
                'TORCH_HOME instead', DeprecationWarning)

        if model_dir is None:
            torch_home = _get_torch_home()
            model_dir = os.path.join(torch_home, 'checkpoints')

        mkdir_or_exist(model_dir)

        parts = urlparse(url)
        filename = os.path.basename(parts.path)
        if file_name is not None:
            filename = file_name
        cached_file = os.path.join(model_dir, filename)
        if not os.path.exists(cached_file):
            sys.stderr.write('Downloading: "{}" to {}\n'.format(
                url, cached_file))
            hash_prefix = None
            if check_hash:
                r = HASH_REGEX.search(filename)  # r is Optional[Match[str]]
                hash_prefix = r.group(1) if r else None
            download_url_to_file(
                url, cached_file, hash_prefix, progress=progress)

        if _is_legacy_zip_format(cached_file):
            return _legacy_zip_load(cached_file, model_dir, map_location)

        try:
            return torch.load(cached_file, map_location=map_location)
        except RuntimeError as error:
            if digit_version(TORCH_VERSION) < digit_version('1.5.0'):
                warnings.warn(
                    f'If the error is the same as "{cached_file} is a zip '
                    'archive (did you mean to use torch.jit.load()?)", you can'
                    ' upgrade your torch to 1.5.0 or higher (current torch '
                    f'version is {TORCH_VERSION}). The error was raised '
                    ' because the checkpoint was saved in torch>=1.6.0 but '
                    'loaded in torch<1.5.')
            raise error
else:
    from torch.utils.model_zoo import load_url  # noqa: F401


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/logging.py
================================================
# # Copyright (c) OpenMMLab. All rights reserved.
# import logging
#
# import torch.distributed as dist
#
# logger_initialized = {}
#
#
# def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'):
#     """Initialize and get a logger by name.
#
#     If the logger has not been initialized, this method will initialize the
#     logger by adding one or two handlers, otherwise the initialized logger will
#     be directly returned. During initialization, a StreamHandler will always be
#     added. If `log_file` is specified and the process rank is 0, a FileHandler
#     will also be added.
#
#     Args:
#         name (str): Logger name.
#         log_file (str | None): The log filename. If specified, a FileHandler
#             will be added to the logger.
#         log_level (int): The logger level. Note that only the process of
#             rank 0 is affected, and other processes will set the level to
#             "Error" thus be silent most of the time.
#         file_mode (str): The file mode used in opening log file.
#             Defaults to 'w'.
#
#     Returns:
#         logging.Logger: The expected logger.
#     """
#     logger = logging.getLogger(name)
#     if name in logger_initialized:
#         return logger
#     # handle hierarchical names
#     # e.g., logger "a" is initialized, then logger "a.b" will skip the
#     # initialization since it is a child of "a".
#     for logger_name in logger_initialized:
#         if name.startswith(logger_name):
#             return logger
#
#     # handle duplicate logs to the console
#     # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler <stderr> (NOTSET)
#     # to the root logger. As logger.propagate is True by default, this root
#     # level handler causes logging messages from rank>0 processes to
#     # unexpectedly show up on the console, creating much unwanted clutter.
#     # To fix this issue, we set the root logger's StreamHandler, if any, to log
#     # at the ERROR level.
#     for handler in logger.root.handlers:
#         if type(handler) is logging.StreamHandler:
#             handler.setLevel(logging.ERROR)
#
#     stream_handler = logging.StreamHandler()
#     handlers = [stream_handler]
#
#     if dist.is_available() and dist.is_initialized():
#         rank = dist.get_rank()
#     else:
#         rank = 0
#
#     # only rank 0 will add a FileHandler
#     if rank == 0 and log_file is not None:
#         # Here, the default behaviour of the official logger is 'a'. Thus, we
#         # provide an interface to change the file mode to the default
#         # behaviour.
#         file_handler = logging.FileHandler(log_file, file_mode)
#         handlers.append(file_handler)
#
#     formatter = logging.Formatter(
#         '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
#     for handler in handlers:
#         handler.setFormatter(formatter)
#         handler.setLevel(log_level)
#         logger.addHandler(handler)
#
#     if rank == 0:
#         logger.setLevel(log_level)
#     else:
#         logger.setLevel(logging.ERROR)
#
#     logger_initialized[name] = True
#
#     return logger
#
#
# def print_log(msg, logger=None, level=logging.INFO):
#     """Print a log message.
#
#     Args:
#         msg (str): The message to be logged.
#         logger (logging.Logger | str | None): The logger to be used.
#             Some special loggers are:
#             - "silent": no message will be printed.
#             - other str: the logger obtained with `get_root_logger(logger)`.
#             - None: The `print()` method will be used to print log messages.
#         level (int): Logging level. Only available when `logger` is a Logger
#             object or "root".
#     """
#     if logger is None:
#         print(msg)
#     elif isinstance(logger, logging.Logger):
#         logger.log(level, msg)
#     elif logger == 'silent':
#         pass
#     elif isinstance(logger, str):
#         _logger = get_logger(logger)
#         _logger.log(level, msg)
#     else:
#         raise TypeError(
#             'logger should be either a logging.Logger object, str, '
#             f'"silent" or None, but got {type(logger)}')


# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
#
# @Time    : 2022/1/24 11:03
# @Author  : Xiao Wu
# @reference:
# Copyright (c) OpenMMLab. All rights reserved.
import json
from collections import defaultdict
import logging
import os
import functools
import torch.distributed as dist
import colorlog
import time
from pathlib import Path

logger_initialized = {}

log_colors_config = {
    'DEBUG': 'cyan',
    'INFO': 'white',
    'WARNING': 'yellow',
    'ERROR': 'red',
    'CRITICAL': 'red',
}


# def get_root_logger(name, log_file=None, log_level=logging.INFO):
#     return get_logger('mmcls', log_file, log_level)
def get_root_logger(name=None, cfg=None, cfg_name=None, log_level=logging.INFO):
    return get_logger(name, cfg, cfg_name, log_level)
# TODO: Depre
# the same as "get_root_logger"
def create_logger(cfg=None, cfg_name=None, dist_print=0, log_level=logging.INFO):
    return get_logger(None, cfg, cfg_name, log_level)

@functools.lru_cache()  # so that calling setup_logger multiple times won't add many handlers
def setup_logger(name, final_log_file, color=True):
    # LOG_DIR = cfg.log_dir
    # LOG_FOUT = open(final_log_file, 'w')
    # head = '%(asctime)-15s %(message)s'

    logging.basicConfig(filename=str(final_log_file).replace('\\', '/'), format='%(message)s', level=logging.INFO)
    # logger = logging.getLogger()
    # logger.setLevel(logging.INFO)
    # console = logging.StreamHandler()
    # logging.getLogger('').addHandler(console)

    logger = logging.getLogger(name)
    # if name in logger_initialized:
    #     return logger

    for handler in logger.root.handlers:
        if type(handler) is logging.StreamHandler:
            handler.setLevel(logging.ERROR)

    # stream_handler = logging.StreamHandler()
    console = colorlog.StreamHandler()
    handlers = [console]

    # logger.setLevel(logging.INFO)
    # formatter = colorlog.ColoredFormatter(
    #     '%(log_color)s[%(asctime)s] [%(filename)s:%(lineno)d] [%(module)s:%(funcName)s] [%(levelname)s]- %(message)s',
    #     log_colors=log_colors_config)  # 日志输出格式

    if dist.is_available() and dist.is_initialized():
        rank = dist.get_rank()
    else:
        rank = 0

    if rank == 0:
        # console = colorlog.StreamHandler()
        # console.setLevel(logging.DEBUG)
        handlers.append(console)
        # if color:
        #     formatter = _ColorfulFormatter(
        #         colored("%(message)s", "green")
        #     )
        # else:
    formatter = colorlog.ColoredFormatter(
        '%(log_color)s- %(message)s',
        log_colors=log_colors_config)  # 日志输出格式

    # console.setFormatter(formatter)
    # logger.addHandler(console)
    for handler in handlers:
        handler.setFormatter(formatter)
        handler.setLevel(logging.INFO)  # log_level
        logger.addHandler(handler)

    # if rank == 0:
    #     logger.setLevel(logging.INFO)  # log_level
    # else:
    #     logger.setLevel(logging.ERROR)

    logger_initialized[name] = True

    return logger


def get_logger(name=None, cfg=None, cfg_name=None, phase='train', log_level=logging.INFO, file_mode='w'):  # log_file=None,
    """Initialize and get a logger by name.

    If the logger has not been initialized, this method will initialize the
    logger by adding one or two handlers, otherwise the initialized logger will
    be directly returned. During initialization, a StreamHandler will always be
    added. If `log_file` is specified and the process rank is 0, a FileHandler
    will also be added.

    Args:
        name (str): Logger name.
        log_file (str | None): The log filename. If specified, a FileHandler
            will be added to the logger.
        log_level (int): The logger level. Note that only the process of
            rank 0 is affected, and other processes will set the level to
            "Error" thus be silent most of the time.
        file_mode (str): The file mode used in opening log file.
            Defaults to 'w'.

    Returns:
        logging.Logger: The expected logger.
    """
    if name in logger_initialized:
        if cfg is None: # cfg.use_log
            return logging.getLogger(name)
        else:
            return None
    # handle hierarchical names
    # e.g., logger "a" is initialized, then logger "a.b" will skip the
    # initialization since it is a child of "a".
    for logger_name in logger_initialized:
        if name.startswith(logger_name):
            if cfg.use_log:
                return logging.getLogger(name)
            else:
                return None

    logger = None
    tensorboard_log_dir = None
    root_output_dir = Path(cfg.out_dir)
    # set up logger in root_path
    if not root_output_dir.exists():
        # if not dist_print: #rank 0-N, 0 is False
        print('=> creating {}'.format(root_output_dir))
        root_output_dir.mkdir(parents=True, exist_ok=True)

    dataset = cfg.dataset
    assert isinstance(dataset, dict), print(f"{dataset}'s type is {type(dataset)}, not a dict. ")
    dataset = dataset.get('train') if dataset.get('train', None) is not None else dataset.get('val')
    model = cfg.arch
    cfg_name = os.path.basename(cfg_name).split('.')[0]
    time_str = time.strftime('%Y-%m-%d-%H-%M-%S')

    # store all output except tb_log file
    final_output_dir = root_output_dir / dataset / model / cfg_name
    if cfg.eval:
        model_save_tmp = os.path.dirname(cfg.resume_from).split('/')[-1]
    else:
        model_save_tmp = "model_{}".format(time_str)

    model_save_dir = final_output_dir / model_save_tmp
    # if not dist_print:
    print_log('=> creating {}'.format(final_output_dir))
    final_output_dir.mkdir(parents=True, exist_ok=True)
    model_save_dir.mkdir(parents=True, exist_ok=True)


    if cfg.use_log:
        cfg_name = '{}_{}'.format(cfg_name, time_str)
        # a logger to save results
        log_file = '{}_{}.log'.format(cfg_name, phase)
        if cfg.eval:
            final_log_file = model_save_dir / log_file
        else:
            final_log_file = final_output_dir / log_file
            # tensorboard_log
            tensorboard_log_dir = root_output_dir / Path(cfg.log_dir) / dataset / model / cfg_name
            # if not dist_print:
            print_log('=> creating tfb logs {}'.format(tensorboard_log_dir))
            tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
        logger = setup_logger(name, final_log_file)

    return logger, str(final_output_dir), str(model_save_dir), str(
        tensorboard_log_dir)  # logger,

def print_log(msg, logger=None, level=logging.INFO):
    """Print a log message.

    Args:
        msg (str): The message to be logged.
        logger (logging.Logger | str | None): The logger to be used.
            Some special loggers are:
            - "silent": no message will be printed.
            - other str: the logger obtained with `get_root_logger(logger)`.
            - None: The `print()` method will be used to print log messages.
        level (int): Logging level. Only available when `logger` is a Logger
            object or "root".
    """
    if logger is None:
        print(msg)
    elif isinstance(logger, logging.Logger):
        logger.log(level, msg)
    elif logger == 'silent':
        pass
    elif isinstance(logger, str):
        _logger = get_logger(logger)
        _logger.log(level, msg)
    else:
        raise TypeError(
            'logger should be either a logging.Logger object, str, '
            f'"silent" or None, but got {type(logger)}')


def load_json_log(json_log):
    """load and convert json_logs to log_dicts.

    Args:
        json_log (str): The path of the json log file.

    Returns:
        dict[int, dict[str, list]]:
            Key is the epoch, value is a sub dict. The keys in each sub dict
            are different metrics, e.g. memory, bbox_mAP, and the value is a
            list of corresponding values in all iterations in this epoch.

            .. code-block:: python

                # An example output
                {
                    1: {'iter': [100, 200, 300], 'loss': [6.94, 6.73, 6.53]},
                    2: {'iter': [100, 200, 300], 'loss': [6.33, 6.20, 6.07]},
                    ...
                }
    """
    log_dict = dict()
    with open(json_log, 'r') as log_file:
        for line in log_file:
            log = json.loads(line.strip())
            # skip lines without `epoch` field
            if 'epoch' not in log:
                continue
            epoch = log.pop('epoch')
            if epoch not in log_dict:
                log_dict[epoch] = defaultdict(list)
            for k, v in log.items():
                log_dict[epoch][k].append(v)
    return log_dict


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/misc.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import collections.abc
import functools
import itertools
import subprocess
import warnings
from collections import abc
from importlib import import_module
from inspect import getfullargspec
from itertools import repeat


# From PyTorch internals
def _ntuple(n):

    def parse(x):
        if isinstance(x, collections.abc.Iterable):
            return x
        return tuple(repeat(x, n))

    return parse


to_1tuple = _ntuple(1)
to_2tuple = _ntuple(2)
to_3tuple = _ntuple(3)
to_4tuple = _ntuple(4)
to_ntuple = _ntuple


def is_str(x):
    """Whether the input is an string instance.

    Note: This method is deprecated since python 2 is no longer supported.
    """
    return isinstance(x, str)


def import_modules_from_strings(imports, allow_failed_imports=False):
    """Import modules from the given list of strings.

    Args:
        imports (list | str | None): The given module names to be imported.
        allow_failed_imports (bool): If True, the failed imports will return
            None. Otherwise, an ImportError is raise. Default: False.

    Returns:
        list[module] | module | None: The imported modules.

    Examples:
        >>> osp, sys = import_modules_from_strings(
        ...     ['os.path', 'sys'])
        >>> import os.path as osp_
        >>> import sys as sys_
        >>> assert osp == osp_
        >>> assert sys == sys_
    """
    if not imports:
        return
    single_import = False
    if isinstance(imports, str):
        single_import = True
        imports = [imports]
    if not isinstance(imports, list):
        raise TypeError(
            f'custom_imports must be a list but got type {type(imports)}')
    imported = []
    for imp in imports:
        if not isinstance(imp, str):
            raise TypeError(
                f'{imp} is of type {type(imp)} and cannot be imported.')
        try:
            imported_tmp = import_module(imp)
        except ImportError:
            if allow_failed_imports:
                warnings.warn(f'{imp} failed to import and is ignored.',
                              UserWarning)
                imported_tmp = None
            else:
                raise ImportError
        imported.append(imported_tmp)
    if single_import:
        imported = imported[0]
    return imported


def iter_cast(inputs, dst_type, return_type=None):
    """Cast elements of an iterable object into some type.

    Args:
        inputs (Iterable): The input object.
        dst_type (type): Destination type.
        return_type (type, optional): If specified, the output object will be
            converted to this type, otherwise an iterator.

    Returns:
        iterator or specified type: The converted object.
    """
    if not isinstance(inputs, abc.Iterable):
        raise TypeError('inputs must be an iterable object')
    if not isinstance(dst_type, type):
        raise TypeError('"dst_type" must be a valid type')

    out_iterable = map(dst_type, inputs)

    if return_type is None:
        return out_iterable
    else:
        return return_type(out_iterable)


def list_cast(inputs, dst_type):
    """Cast elements of an iterable object into a list of some type.

    A partial method of :func:`iter_cast`.
    """
    return iter_cast(inputs, dst_type, return_type=list)


def tuple_cast(inputs, dst_type):
    """Cast elements of an iterable object into a tuple of some type.

    A partial method of :func:`iter_cast`.
    """
    return iter_cast(inputs, dst_type, return_type=tuple)


def is_seq_of(seq, expected_type, seq_type=None):
    """Check whether it is a sequence of some type.

    Args:
        seq (Sequence): The sequence to be checked.
        expected_type (type): Expected type of sequence items.
        seq_type (type, optional): Expected sequence type.

    Returns:
        bool: Whether the sequence is valid.
    """
    if seq_type is None:
        exp_seq_type = abc.Sequence
    else:
        assert isinstance(seq_type, type)
        exp_seq_type = seq_type
    if not isinstance(seq, exp_seq_type):
        return False
    for item in seq:
        if not isinstance(item, expected_type):
            return False
    return True


def is_list_of(seq, expected_type):
    """Check whether it is a list of some type.

    A partial method of :func:`is_seq_of`.
    """
    return is_seq_of(seq, expected_type, seq_type=list)


def is_tuple_of(seq, expected_type):
    """Check whether it is a tuple of some type.

    A partial method of :func:`is_seq_of`.
    """
    return is_seq_of(seq, expected_type, seq_type=tuple)


def slice_list(in_list, lens):
    """Slice a list into several sub lists by a list of given length.

    Args:
        in_list (list): The list to be sliced.
        lens(int or list): The expected length of each out list.

    Returns:
        list: A list of sliced list.
    """
    if isinstance(lens, int):
        assert len(in_list) % lens == 0
        lens = [lens] * int(len(in_list) / lens)
    if not isinstance(lens, list):
        raise TypeError('"indices" must be an integer or a list of integers')
    elif sum(lens) != len(in_list):
        raise ValueError('sum of lens and list length does not '
                         f'match: {sum(lens)} != {len(in_list)}')
    out_list = []
    idx = 0
    for i in range(len(lens)):
        out_list.append(in_list[idx:idx + lens[i]])
        idx += lens[i]
    return out_list


def concat_list(in_list):
    """Concatenate a list of list into a single list.

    Args:
        in_list (list): The list of list to be merged.

    Returns:
        list: The concatenated flat list.
    """
    return list(itertools.chain(*in_list))


def check_prerequisites(
        prerequisites,
        checker,
        msg_tmpl='Prerequisites "{}" are required in method "{}" but not '
        'found, please install them first.'):  # yapf: disable
    """A decorator factory to check if prerequisites are satisfied.

    Args:
        prerequisites (str of list[str]): Prerequisites to be checked.
        checker (callable): The checker method that returns True if a
            prerequisite is meet, False otherwise.
        msg_tmpl (str): The message template with two variables.

    Returns:
        decorator: A specific decorator.
    """

    def wrap(func):

        @functools.wraps(func)
        def wrapped_func(*args, **kwargs):
            requirements = [prerequisites] if isinstance(
                prerequisites, str) else prerequisites
            missing = []
            for item in requirements:
                if not checker(item):
                    missing.append(item)
            if missing:
                print(msg_tmpl.format(', '.join(missing), func.__name__))
                raise RuntimeError('Prerequisites not meet.')
            else:
                return func(*args, **kwargs)

        return wrapped_func

    return wrap


def _check_py_package(package):
    try:
        import_module(package)
    except ImportError:
        return False
    else:
        return True


def _check_executable(cmd):
    if subprocess.call(f'which {cmd}', shell=True) != 0:
        return False
    else:
        return True


def requires_package(prerequisites):
    """A decorator to check if some python packages are installed.

    Example:
        >>> @requires_package('numpy')
        >>> func(arg1, args):
        >>>     return numpy.zeros(1)
        array([0.])
        >>> @requires_package(['numpy', 'non_package'])
        >>> func(arg1, args):
        >>>     return numpy.zeros(1)
        ImportError
    """
    return check_prerequisites(prerequisites, checker=_check_py_package)


def requires_executable(prerequisites):
    """A decorator to check if some executable files are installed.

    Example:
        >>> @requires_executable('ffmpeg')
        >>> func(arg1, args):
        >>>     print(1)
        1
    """
    return check_prerequisites(prerequisites, checker=_check_executable)


def deprecated_api_warning(name_dict, cls_name=None):
    """A decorator to check if some arguments are deprecate and try to replace
    deprecate src_arg_name to dst_arg_name.

    Args:
        name_dict(dict):
            key (str): Deprecate argument names.
            val (str): Expected argument names.

    Returns:
        func: New function.
    """

    def api_warning_wrapper(old_func):

        @functools.wraps(old_func)
        def new_func(*args, **kwargs):
            # get the arg spec of the decorated method
            args_info = getfullargspec(old_func)
            # get name of the function
            func_name = old_func.__name__
            if cls_name is not None:
                func_name = f'{cls_name}.{func_name}'
            if args:
                arg_names = args_info.args[:len(args)]
                for src_arg_name, dst_arg_name in name_dict.items():
                    if src_arg_name in arg_names:
                        warnings.warn(
                            f'"{src_arg_name}" is deprecated in '
                            f'`{func_name}`, please use "{dst_arg_name}" '
                            'instead', DeprecationWarning)
                        arg_names[arg_names.index(src_arg_name)] = dst_arg_name
            if kwargs:
                for src_arg_name, dst_arg_name in name_dict.items():
                    if src_arg_name in kwargs:

                        assert dst_arg_name not in kwargs, (
                            f'The expected behavior is to replace '
                            f'the deprecated key `{src_arg_name}` to '
                            f'new key `{dst_arg_name}`, but got them '
                            f'in the arguments at the same time, which '
                            f'is confusing. `{src_arg_name} will be '
                            f'deprecated in the future, please '
                            f'use `{dst_arg_name}` instead.')

                        warnings.warn(
                            f'"{src_arg_name}" is deprecated in '
                            f'`{func_name}`, please use "{dst_arg_name}" '
                            'instead', DeprecationWarning)
                        kwargs[dst_arg_name] = kwargs.pop(src_arg_name)

            # apply converted arguments to the decorated method
            output = old_func(*args, **kwargs)
            return output

        return new_func

    return api_warning_wrapper


def is_method_overridden(method, base_class, derived_class):
    """Check if a method of base class is overridden in derived class.

    Args:
        method (str): the method name to check.
        base_class (type): the class of the base class.
        derived_class (type | Any): the class or instance of the derived class.
    """
    assert isinstance(base_class, type), \
        "base_class doesn't accept instance, Please pass class instead."

    if not isinstance(derived_class, type):
        derived_class = derived_class.__class__

    base_method = getattr(base_class, method)
    derived_method = getattr(derived_class, method)
    return derived_method != base_method


def has_method(obj: object, method: str) -> bool:
    """Check whether the object has a method.

    Args:
        method (str): The method name to check.
        obj (object): The object to check.

    Returns:
        bool: True if the object has the method else False.
    """
    return hasattr(obj, method) and callable(getattr(obj, method))


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/parrots_jit.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os

from .parrots_wrapper import TORCH_VERSION

parrots_jit_option = os.getenv('PARROTS_JIT_OPTION')

if TORCH_VERSION == 'parrots' and parrots_jit_option == 'ON':
    from parrots.jit import pat as jit
else:

    def jit(func=None,
            check_input=None,
            full_shape=True,
            derivate=False,
            coderize=False,
            optimize=False):

        def wrapper(func):

            def wrapper_inner(*args, **kargs):
                return func(*args, **kargs)

            return wrapper_inner

        if func is None:
            return wrapper
        else:
            return func


if TORCH_VERSION == 'parrots':
    from parrots.utils.tester import skip_no_elena
else:

    def skip_no_elena(func):

        def wrapper(*args, **kargs):
            return func(*args, **kargs)

        return wrapper


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/parrots_wrapper.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from functools import partial

import torch

TORCH_VERSION = torch.__version__


def is_rocm_pytorch() -> bool:
    is_rocm = False
    if TORCH_VERSION != 'parrots':
        try:
            from torch.utils.cpp_extension import ROCM_HOME
            is_rocm = True if ((torch.version.hip is not None) and
                               (ROCM_HOME is not None)) else False
        except ImportError:
            pass
    return is_rocm


def _get_cuda_home():
    if TORCH_VERSION == 'parrots':
        from parrots.utils.build_extension import CUDA_HOME
    else:
        if is_rocm_pytorch():
            from torch.utils.cpp_extension import ROCM_HOME
            CUDA_HOME = ROCM_HOME
        else:
            from torch.utils.cpp_extension import CUDA_HOME
    return CUDA_HOME


def get_build_config():
    if TORCH_VERSION == 'parrots':
        from parrots.config import get_build_info
        return get_build_info()
    else:
        return torch.__config__.show()


def _get_conv():
    if TORCH_VERSION == 'parrots':
        from parrots.nn.modules.conv import _ConvNd, _ConvTransposeMixin
    else:
        from torch.nn.modules.conv import _ConvNd, _ConvTransposeMixin
    return _ConvNd, _ConvTransposeMixin


def _get_dataloader():
    if TORCH_VERSION == 'parrots':
        from torch.utils.data import DataLoader, PoolDataLoader
    else:
        from torch.utils.data import DataLoader
        PoolDataLoader = DataLoader
    return DataLoader, PoolDataLoader


def _get_extension():
    if TORCH_VERSION == 'parrots':
        from parrots.utils.build_extension import BuildExtension, Extension
        CppExtension = partial(Extension, cuda=False)
        CUDAExtension = partial(Extension, cuda=True)
    else:
        from torch.utils.cpp_extension import (BuildExtension, CppExtension,
                                               CUDAExtension)
    return BuildExtension, CppExtension, CUDAExtension


def _get_pool():
    if TORCH_VERSION == 'parrots':
        from parrots.nn.modules.pool import (_AdaptiveAvgPoolNd,
                                             _AdaptiveMaxPoolNd, _AvgPoolNd,
                                             _MaxPoolNd)
    else:
        from torch.nn.modules.pooling import (_AdaptiveAvgPoolNd,
                                              _AdaptiveMaxPoolNd, _AvgPoolNd,
                                              _MaxPoolNd)
    return _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd


def _get_norm():
    if TORCH_VERSION == 'parrots':
        from parrots.nn.modules.batchnorm import _BatchNorm, _InstanceNorm
        SyncBatchNorm_ = torch.nn.SyncBatchNorm2d
    else:
        from torch.nn.modules.instancenorm import _InstanceNorm
        from torch.nn.modules.batchnorm import _BatchNorm
        SyncBatchNorm_ = torch.nn.SyncBatchNorm
    return _BatchNorm, _InstanceNorm, SyncBatchNorm_


_ConvNd, _ConvTransposeMixin = _get_conv()
DataLoader, PoolDataLoader = _get_dataloader()
BuildExtension, CppExtension, CUDAExtension = _get_extension()
_BatchNorm, _InstanceNorm, SyncBatchNorm_ = _get_norm()
_AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd = _get_pool()


class SyncBatchNorm(SyncBatchNorm_):

    def _check_input_dim(self, input):
        if TORCH_VERSION == 'parrots':
            if input.dim() < 2:
                raise ValueError(
                    f'expected at least 2D input (got {input.dim()}D input)')
        else:
            super()._check_input_dim(input)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/path.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
from pathlib import Path

from .misc import is_str


def is_filepath(x):
    return is_str(x) or isinstance(x, Path)


def fopen(filepath, *args, **kwargs):
    if is_str(filepath):
        return open(filepath, *args, **kwargs)
    elif isinstance(filepath, Path):
        return filepath.open(*args, **kwargs)
    raise ValueError('`filepath` should be a string or a Path')


def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
    if not osp.isfile(filename):
        raise FileNotFoundError(msg_tmpl.format(filename))


def mkdir_or_exist(dir_name, mode=0o777):
    if dir_name == '':
        return
    dir_name = osp.expanduser(dir_name)
    os.makedirs(dir_name, mode=mode, exist_ok=True)


def symlink(src, dst, overwrite=True, **kwargs):
    if os.path.lexists(dst) and overwrite:
        os.remove(dst)
    os.symlink(src, dst, **kwargs)


def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True):
    """Scan a directory to find the interested files.

    Args:
        dir_path (str | :obj:`Path`): Path of the directory.
        suffix (str | tuple(str), optional): File suffix that we are
            interested in. Default: None.
        recursive (bool, optional): If set to True, recursively scan the
            directory. Default: False.
        case_sensitive (bool, optional) : If set to False, ignore the case of
            suffix. Default: True.

    Returns:
        A generator for all the interested files with relative paths.
    """
    if isinstance(dir_path, (str, Path)):
        dir_path = str(dir_path)
    else:
        raise TypeError('"dir_path" must be a string or Path object')

    if (suffix is not None) and not isinstance(suffix, (str, tuple)):
        raise TypeError('"suffix" must be a string or tuple of strings')

    if suffix is not None and not case_sensitive:
        suffix = suffix.lower() if isinstance(suffix, str) else tuple(
            item.lower() for item in suffix)

    root = dir_path

    def _scandir(dir_path, suffix, recursive, case_sensitive):
        for entry in os.scandir(dir_path):
            if not entry.name.startswith('.') and entry.is_file():
                rel_path = osp.relpath(entry.path, root)
                _rel_path = rel_path if case_sensitive else rel_path.lower()
                if suffix is None or _rel_path.endswith(suffix):
                    yield rel_path
            elif recursive and os.path.isdir(entry.path):
                # scan recursively if entry.path is a directory
                yield from _scandir(entry.path, suffix, recursive,
                                    case_sensitive)

    return _scandir(dir_path, suffix, recursive, case_sensitive)


def find_vcs_root(path, markers=('.git', )):
    """Finds the root directory (including itself) of specified markers.

    Args:
        path (str): Path of directory or file.
        markers (list[str], optional): List of file or directory names.

    Returns:
        The directory contained one of the markers or None if not found.
    """
    if osp.isfile(path):
        path = osp.dirname(path)

    prev, cur = None, osp.abspath(osp.expanduser(path))
    while cur != prev:
        if any(osp.exists(osp.join(cur, marker)) for marker in markers):
            return cur
        prev, cur = cur, osp.split(cur)[0]
    return None


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/progressbar.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import sys
from collections.abc import Iterable
from multiprocessing import Pool
from shutil import get_terminal_size

from .timer import Timer


class ProgressBar:
    """A progress bar which can print the progress."""

    def __init__(self, task_num=0, bar_width=50, start=True, file=sys.stdout):
        self.task_num = task_num
        self.bar_width = bar_width
        self.completed = 0
        self.file = file
        if start:
            self.start()

    @property
    def terminal_width(self):
        width, _ = get_terminal_size()
        return width

    def start(self):
        if self.task_num > 0:
            self.file.write(f'[{" " * self.bar_width}] 0/{self.task_num}, '
                            'elapsed: 0s, ETA:')
        else:
            self.file.write('completed: 0, elapsed: 0s')
        self.file.flush()
        self.timer = Timer()

    def update(self, num_tasks=1):
        assert num_tasks > 0
        self.completed += num_tasks
        elapsed = self.timer.since_start()
        if elapsed > 0:
            fps = self.completed / elapsed
        else:
            fps = float('inf')
        if self.task_num > 0:
            percentage = self.completed / float(self.task_num)
            eta = int(elapsed * (1 - percentage) / percentage + 0.5)
            msg = f'\r[{{}}] {self.completed}/{self.task_num}, ' \
                  f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' \
                  f'ETA: {eta:5}s'

            bar_width = min(self.bar_width,
                            int(self.terminal_width - len(msg)) + 2,
                            int(self.terminal_width * 0.6))
            bar_width = max(2, bar_width)
            mark_width = int(bar_width * percentage)
            bar_chars = '>' * mark_width + ' ' * (bar_width - mark_width)
            self.file.write(msg.format(bar_chars))
        else:
            self.file.write(
                f'completed: {self.completed}, elapsed: {int(elapsed + 0.5)}s,'
                f' {fps:.1f} tasks/s')
        self.file.flush()


def track_progress(func, tasks, bar_width=50, file=sys.stdout, **kwargs):
    """Track the progress of tasks execution with a progress bar.

    Tasks are done with a simple for-loop.

    Args:
        func (callable): The function to be applied to each task.
        tasks (list or tuple[Iterable, int]): A list of tasks or
            (tasks, total num).
        bar_width (int): Width of progress bar.

    Returns:
        list: The task results.
    """
    if isinstance(tasks, tuple):
        assert len(tasks) == 2
        assert isinstance(tasks[0], Iterable)
        assert isinstance(tasks[1], int)
        task_num = tasks[1]
        tasks = tasks[0]
    elif isinstance(tasks, Iterable):
        task_num = len(tasks)
    else:
        raise TypeError(
            '"tasks" must be an iterable object or a (iterator, int) tuple')
    prog_bar = ProgressBar(task_num, bar_width, file=file)
    results = []
    for task in tasks:
        results.append(func(task, **kwargs))
        prog_bar.update()
    prog_bar.file.write('\n')
    return results


def init_pool(process_num, initializer=None, initargs=None):
    if initializer is None:
        return Pool(process_num)
    elif initargs is None:
        return Pool(process_num, initializer)
    else:
        if not isinstance(initargs, tuple):
            raise TypeError('"initargs" must be a tuple')
        return Pool(process_num, initializer, initargs)


def track_parallel_progress(func,
                            tasks,
                            nproc,
                            initializer=None,
                            initargs=None,
                            bar_width=50,
                            chunksize=1,
                            skip_first=False,
                            keep_order=True,
                            file=sys.stdout):
    """Track the progress of parallel task execution with a progress bar.

    The built-in :mod:`multiprocessing` module is used for process pools and
    tasks are done with :func:`Pool.map` or :func:`Pool.imap_unordered`.

    Args:
        func (callable): The function to be applied to each task.
        tasks (list or tuple[Iterable, int]): A list of tasks or
            (tasks, total num).
        nproc (int): Process (worker) number.
        initializer (None or callable): Refer to :class:`multiprocessing.Pool`
            for details.
        initargs (None or tuple): Refer to :class:`multiprocessing.Pool` for
            details.
        chunksize (int): Refer to :class:`multiprocessing.Pool` for details.
        bar_width (int): Width of progress bar.
        skip_first (bool): Whether to skip the first sample for each worker
            when estimating fps, since the initialization step may takes
            longer.
        keep_order (bool): If True, :func:`Pool.imap` is used, otherwise
            :func:`Pool.imap_unordered` is used.

    Returns:
        list: The task results.
    """
    if isinstance(tasks, tuple):
        assert len(tasks) == 2
        assert isinstance(tasks[0], Iterable)
        assert isinstance(tasks[1], int)
        task_num = tasks[1]
        tasks = tasks[0]
    elif isinstance(tasks, Iterable):
        task_num = len(tasks)
    else:
        raise TypeError(
            '"tasks" must be an iterable object or a (iterator, int) tuple')
    pool = init_pool(nproc, initializer, initargs)
    start = not skip_first
    task_num -= nproc * chunksize * int(skip_first)
    prog_bar = ProgressBar(task_num, bar_width, start, file=file)
    results = []
    if keep_order:
        gen = pool.imap(func, tasks, chunksize)
    else:
        gen = pool.imap_unordered(func, tasks, chunksize)
    for result in gen:
        results.append(result)
        if skip_first:
            if len(results) < nproc * chunksize:
                continue
            elif len(results) == nproc * chunksize:
                prog_bar.start()
                continue
        prog_bar.update()
    prog_bar.file.write('\n')
    pool.close()
    pool.join()
    return results


def track_iter_progress(tasks, bar_width=50, file=sys.stdout):
    """Track the progress of tasks iteration or enumeration with a progress
    bar.

    Tasks are yielded with a simple for-loop.

    Args:
        tasks (list or tuple[Iterable, int]): A list of tasks or
            (tasks, total num).
        bar_width (int): Width of progress bar.

    Yields:
        list: The task results.
    """
    if isinstance(tasks, tuple):
        assert len(tasks) == 2
        assert isinstance(tasks[0], Iterable)
        assert isinstance(tasks[1], int)
        task_num = tasks[1]
        tasks = tasks[0]
    elif isinstance(tasks, Iterable):
        task_num = len(tasks)
    else:
        raise TypeError(
            '"tasks" must be an iterable object or a (iterator, int) tuple')
    prog_bar = ProgressBar(task_num, bar_width, file=file)
    for task in tasks:
        yield task
        prog_bar.update()
    prog_bar.file.write('\n')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/registry.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import inspect
import warnings
from functools import partial

from .misc import is_seq_of


def build_from_cfg(cfg, registry, default_args=None):
    """Build a module from config dict.

    Args:
        cfg (dict): Config dict. It should at least contain the key "type".
        registry (:obj:`Registry`): The registry to search the type from.
        default_args (dict, optional): Default initialization arguments.

    Returns:
        object: The constructed object.
    """
    if not isinstance(cfg, dict):
        raise TypeError(f'cfg must be a dict, but got {type(cfg)}')
    if 'type' not in cfg:
        if default_args is None or 'type' not in default_args:
            raise KeyError(
                '`cfg` or `default_args` must contain the key "type", '
                f'but got {cfg}\n{default_args}')
    if not isinstance(registry, Registry):
        raise TypeError('registry must be an mmcv.Registry object, '
                        f'but got {type(registry)}')
    if not (isinstance(default_args, dict) or default_args is None):
        raise TypeError('default_args must be a dict or None, '
                        f'but got {type(default_args)}')

    args = cfg.copy()

    if default_args is not None:
        for name, value in default_args.items():
            args.setdefault(name, value)

    obj_type = args.pop('type')
    if isinstance(obj_type, str):
        obj_cls = registry.get(obj_type)
        if obj_cls is None:
            raise KeyError(
                f'{obj_type} is not in the {registry.name} registry')
    elif inspect.isclass(obj_type):
        obj_cls = obj_type
    else:
        raise TypeError(
            f'type must be a str or valid type, but got {type(obj_type)}')
    try:
        return obj_cls(**args)
    except Exception as e:
        # Normal TypeError does not print class name.
        raise type(e)(f'{obj_cls.__name__}: {e}')


class Registry:
    """A registry to map strings to classes.

    Registered object could be built from registry.

    Example:
        >>> MODELS = Registry('models')
        >>> @MODELS.register_module()
        >>> class ResNet:
        >>>     pass
        >>> resnet = MODELS.build(dict(type='ResNet'))

    Please refer to
    https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for
    advanced usage.

    Args:
        name (str): Registry name.
        build_func(func, optional): Build function to construct instance from
            Registry, func:`build_from_cfg` is used if neither ``parent`` or
            ``build_func`` is specified. If ``parent`` is specified and
            ``build_func`` is not given,  ``build_func`` will be inherited
            from ``parent``. Default: None.
        parent (Registry, optional): Parent registry. The class registered in
            children registry could be built from parent. Default: None.
        scope (str, optional): The scope of registry. It is the key to search
            for children registry. If not specified, scope will be the name of
            the package where class is defined, e.g. mmdet, mmcls, mmseg.
            Default: None.
    """

    def __init__(self, name, build_func=None, parent=None, scope=None):
        self._name = name
        self._module_dict = dict()
        self._children = dict()
        self._scope = self.infer_scope() if scope is None else scope

        # self.build_func will be set with the following priority:
        # 1. build_func
        # 2. parent.build_func
        # 3. build_from_cfg
        if build_func is None:
            if parent is not None:
                self.build_func = parent.build_func
            else:
                self.build_func = build_from_cfg
        else:
            self.build_func = build_func
        if parent is not None:
            assert isinstance(parent, Registry)
            parent._add_children(self)
            self.parent = parent
        else:
            self.parent = None

    def __len__(self):
        return len(self._module_dict)

    def __contains__(self, key):
        return self.get(key) is not None

    def __repr__(self):
        format_str = self.__class__.__name__ + \
                     f'(name={self._name}, ' \
                     f'items={self._module_dict})'
        return format_str

    @staticmethod
    def infer_scope():
        """Infer the scope of registry.

        The name of the package where registry is defined will be returned.

        Example:
            >>> # in mmdet/models/backbone/resnet.py
            >>> MODELS = Registry('models')
            >>> @MODELS.register_module()
            >>> class ResNet:
            >>>     pass
            The scope of ``ResNet`` will be ``mmdet``.

        Returns:
            str: The inferred scope name.
        """
        # inspect.stack() trace where this function is called, the index-2
        # indicates the frame where `infer_scope()` is called
        filename = inspect.getmodule(inspect.stack()[2][0]).__name__
        split_filename = filename.split('.')
        return split_filename[0]

    @staticmethod
    def split_scope_key(key):
        """Split scope and key.

        The first scope will be split from key.

        Examples:
            >>> Registry.split_scope_key('mmdet.ResNet')
            'mmdet', 'ResNet'
            >>> Registry.split_scope_key('ResNet')
            None, 'ResNet'

        Return:
            tuple[str | None, str]: The former element is the first scope of
            the key, which can be ``None``. The latter is the remaining key.
        """
        split_index = key.find('.')
        if split_index != -1:
            return key[:split_index], key[split_index + 1:]
        else:
            return None, key

    @property
    def name(self):
        return self._name

    @property
    def scope(self):
        return self._scope

    @property
    def module_dict(self):
        return self._module_dict

    @property
    def children(self):
        return self._children

    def get(self, key):
        """Get the registry record.

        Args:
            key (str): The class name in string format.

        Returns:
            class: The corresponding class.
        """
        scope, real_key = self.split_scope_key(key)
        if scope is None or scope == self._scope:
            # get from self
            if real_key in self._module_dict:
                return self._module_dict[real_key]
        else:
            # get from self._children
            if scope in self._children:
                return self._children[scope].get(real_key)
            else:
                # goto root
                parent = self.parent
                while parent.parent is not None:
                    parent = parent.parent
                return parent.get(key)

    def build(self, *args, **kwargs):
        return self.build_func(*args, **kwargs, registry=self)

    def _add_children(self, registry):
        """Add children for a registry.

        The ``registry`` will be added as children based on its scope.
        The parent registry could build objects from children registry.

        Example:
            >>> models = Registry('models')
            >>> mmdet_models = Registry('models', parent=models)
            >>> @mmdet_models.register_module()
            >>> class ResNet:
            >>>     pass
            >>> resnet = models.build(dict(type='mmdet.ResNet'))
        """

        assert isinstance(registry, Registry)
        assert registry.scope is not None
        assert registry.scope not in self.children, \
            f'scope {registry.scope} exists in {self.name} registry'
        self.children[registry.scope] = registry

    def _register_module(self, module_class, module_name=None, force=False):
        if not inspect.isclass(module_class):
            raise TypeError('module must be a class, '
                            f'but got {type(module_class)}')
        if not force and module_name in self._module_dict.keys():
            return self._module_dict[module_name]
        if module_name is None:
            module_name = module_class.__name__
        if isinstance(module_name, str):
            module_name = [module_name]
        for name in module_name:
            if not force and name in self._module_dict:
                # print(isinstance(module_name, list) and len(module_name) == 1)
                raise KeyError(f'{name} is already registered '
                               f'in {self.name}')
                # print(f'{name} is already registered in {self.name}')

            self._module_dict[name] = module_class

    def deprecated_register_module(self, cls=None, force=False):
        warnings.warn(
            'The old API of register_module(module, force=False) '
            'is deprecated and will be removed, please use the new API '
            'register_module(name=None, force=False, module=None) instead.',
            DeprecationWarning)
        if cls is None:
            return partial(self.deprecated_register_module, force=force)
        self._register_module(cls, force=force)
        return cls

    def register_module(self, name=None, force=False, module=None):
        """Register a module.

        A record will be added to `self._module_dict`, whose key is the class
        name or the specified name, and value is the class itself.
        It can be used as a decorator or a normal function.

        Example:
            >>> backbones = Registry('backbone')
            >>> @backbones.register_module()
            >>> class ResNet:
            >>>     pass

            >>> backbones = Registry('backbone')
            >>> @backbones.register_module(name='mnet')
            >>> class MobileNet:
            >>>     pass

            >>> backbones = Registry('backbone')
            >>> class ResNet:
            >>>     pass
            >>> backbones.register_module(ResNet)

        Args:
            name (str | None): The module name to be registered. If not
                specified, the class name will be used.
            force (bool, optional): Whether to override an existing class with
                the same name. Default: False.
            module (type): Module class to be registered.
        """
        if not isinstance(force, bool):
            raise TypeError(f'force must be a boolean, but got {type(force)}')
        # NOTE: This is a walkaround to be compatible with the old api,
        # while it may introduce unexpected bugs.
        if isinstance(name, type):
            return self.deprecated_register_module(name, force=force)

        # raise the error ahead of time
        if not (name is None or isinstance(name, str) or is_seq_of(name, str)):
            raise TypeError(
                'name must be either of None, an instance of str or a sequence'
                f'  of str, but got {type(name)}')

        # use it as a normal method: x.register_module(module=SomeClass)
        if module is not None:
            self._register_module(
                module_class=module, module_name=name, force=force)
            return module

        # use it as a decorator: @x.register_module()
        def _register(cls):
            self._register_module(
                module_class=cls, module_name=name, force=force)
            return cls

        return _register


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/testing.py
================================================
# Copyright (c) Open-MMLab.
import sys
from collections.abc import Iterable
from runpy import run_path
from shlex import split
from typing import Any, Dict, List
from unittest.mock import patch


def check_python_script(cmd):
    """Run the python cmd script with `__main__`. The difference between
    `os.system` is that, this function exectues code in the current process, so
    that it can be tracked by coverage tools. Currently it supports two forms:

    - ./tests/data/scripts/hello.py zz
    - python tests/data/scripts/hello.py zz
    """
    args = split(cmd)
    if args[0] == 'python':
        args = args[1:]
    with patch.object(sys, 'argv', args):
        run_path(args[0], run_name='__main__')


def _any(judge_result):
    """Since built-in ``any`` works only when the element of iterable is not
    iterable, implement the function."""
    if not isinstance(judge_result, Iterable):
        return judge_result

    try:
        for element in judge_result:
            if _any(element):
                return True
    except TypeError:
        # Maybe encounter the case: torch.tensor(True) | torch.tensor(False)
        if judge_result:
            return True
    return False


def assert_dict_contains_subset(dict_obj: Dict[Any, Any],
                                expected_subset: Dict[Any, Any]) -> bool:
    """Check if the dict_obj contains the expected_subset.

    Args:
        dict_obj (Dict[Any, Any]): Dict object to be checked.
        expected_subset (Dict[Any, Any]): Subset expected to be contained in
            dict_obj.

    Returns:
        bool: Whether the dict_obj contains the expected_subset.
    """

    for key, value in expected_subset.items():
        if key not in dict_obj.keys() or _any(dict_obj[key] != value):
            return False
    return True


def assert_attrs_equal(obj: Any, expected_attrs: Dict[str, Any]) -> bool:
    """Check if attribute of class object is correct.

    Args:
        obj (object): Class object to be checked.
        expected_attrs (Dict[str, Any]): Dict of the expected attrs.

    Returns:
        bool: Whether the attribute of class object is correct.
    """
    for attr, value in expected_attrs.items():
        if not hasattr(obj, attr) or _any(getattr(obj, attr) != value):
            return False
    return True


def assert_dict_has_keys(obj: Dict[str, Any],
                         expected_keys: List[str]) -> bool:
    """Check if the obj has all the expected_keys.

    Args:
        obj (Dict[str, Any]): Object to be checked.
        expected_keys (List[str]): Keys expected to contained in the keys of
            the obj.

    Returns:
        bool: Whether the obj has the expected keys.
    """
    return set(expected_keys).issubset(set(obj.keys()))


def assert_keys_equal(result_keys: List[str], target_keys: List[str]) -> bool:
    """Check if target_keys is equal to result_keys.

    Args:
        result_keys (List[str]): Result keys to be checked.
        target_keys (List[str]): Target keys to be checked.

    Returns:
        bool: Whether target_keys is equal to result_keys.
    """
    return set(result_keys) == set(target_keys)


def assert_is_norm_layer(module) -> bool:
    """Check if the module is a norm layer.

    Args:
        module (nn.Module): The module to be checked.

    Returns:
        bool: Whether the module is a norm layer.
    """
    from .parrots_wrapper import _BatchNorm, _InstanceNorm
    from torch.nn import GroupNorm, LayerNorm
    norm_layer_candidates = (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)
    return isinstance(module, norm_layer_candidates)


def assert_params_all_zeros(module) -> bool:
    """Check if the parameters of the module is all zeros.

    Args:
        module (nn.Module): The module to be checked.

    Returns:
        bool: Whether the parameters of the module is all zeros.
    """
    weight_data = module.weight.data
    is_weight_zero = weight_data.allclose(
        weight_data.new_zeros(weight_data.size()))

    if hasattr(module, 'bias') and module.bias is not None:
        bias_data = module.bias.data
        is_bias_zero = bias_data.allclose(
            bias_data.new_zeros(bias_data.size()))
    else:
        is_bias_zero = True

    return is_weight_zero and is_bias_zero


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/timer.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from time import time


class TimerError(Exception):

    def __init__(self, message):
        self.message = message
        super(TimerError, self).__init__(message)


class Timer:
    """A flexible Timer class.

    Examples:
        >>> import time
        >>> import mmcv
        >>> with mmcv.Timer():
        >>>     # simulate a code block that will run for 1s
        >>>     time.sleep(1)
        1.000
        >>> with mmcv.Timer(print_tmpl='it takes {:.1f} seconds'):
        >>>     # simulate a code block that will run for 1s
        >>>     time.sleep(1)
        it takes 1.0 seconds
        >>> timer = mmcv.Timer()
        >>> time.sleep(0.5)
        >>> print(timer.since_start())
        0.500
        >>> time.sleep(0.5)
        >>> print(timer.since_last_check())
        0.500
        >>> print(timer.since_start())
        1.000
    """

    def __init__(self, start=True, print_tmpl=None):
        self._is_running = False
        self.print_tmpl = print_tmpl if print_tmpl else '{:.3f}'
        if start:
            self.start()

    @property
    def is_running(self):
        """bool: indicate whether the timer is running"""
        return self._is_running

    def __enter__(self):
        self.start()
        return self

    def __exit__(self, type, value, traceback):
        print(self.print_tmpl.format(self.since_last_check()))
        self._is_running = False

    def start(self):
        """Start the timer."""
        if not self._is_running:
            self._t_start = time()
            self._is_running = True
        self._t_last = time()

    def since_start(self):
        """Total time since the timer is started.

        Returns:
            float: Time in seconds.
        """
        if not self._is_running:
            raise TimerError('timer is not running')
        self._t_last = time()
        return self._t_last - self._t_start

    def since_last_check(self):
        """Time since the last checking.

        Either :func:`since_start` or :func:`since_last_check` is a checking
        operation.

        Returns:
            float: Time in seconds.
        """
        if not self._is_running:
            raise TimerError('timer is not running')
        dur = time() - self._t_last
        self._t_last = time()
        return dur


_g_timers = {}  # global timers


def check_time(timer_id):
    """Add check points in a single line.

    This method is suitable for running a task on a list of items. A timer will
    be registered when the method is called for the first time.

    Examples:
        >>> import time
        >>> import mmcv
        >>> for i in range(1, 6):
        >>>     # simulate a code block
        >>>     time.sleep(i)
        >>>     mmcv.check_time('task1')
        2.000
        3.000
        4.000
        5.000

    Args:
        str: Timer identifier.
    """
    if timer_id not in _g_timers:
        _g_timers[timer_id] = Timer()
        return 0
    else:
        return _g_timers[timer_id].since_last_check()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/trace.py
================================================
import warnings

import torch

from mmcv.utils import digit_version


def is_jit_tracing() -> bool:
    if (torch.__version__ != 'parrots'
            and digit_version(torch.__version__) >= digit_version('1.6.0')):
        on_trace = torch.jit.is_tracing()
        # In PyTorch 1.6, torch.jit.is_tracing has a bug.
        # Refers to https://github.com/pytorch/pytorch/issues/42448
        if isinstance(on_trace, bool):
            return on_trace
        else:
            return torch._C._is_tracing()
    else:
        warnings.warn(
            'torch.jit.is_tracing is only supported after v1.6.0. '
            'Therefore is_tracing returns False automatically. Please '
            'set on_trace manually if you are using trace.', UserWarning)
        return False


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/utils/version_utils.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import subprocess
import warnings

from packaging.version import parse


def digit_version(version_str: str, length: int = 4):
    """Convert a version string into a tuple of integers.

    This method is usually used for comparing two versions. For pre-release
    versions: alpha < beta < rc.

    Args:
        version_str (str): The version string.
        length (int): The maximum number of version levels. Default: 4.

    Returns:
        tuple[int]: The version info in digits (integers).
    """
    assert 'parrots' not in version_str
    version = parse(version_str)
    assert version.release, f'failed to parse version {version_str}'
    release = list(version.release)
    release = release[:length]
    if len(release) < length:
        release = release + [0] * (length - len(release))
    if version.is_prerelease:
        mapping = {'a': -3, 'b': -2, 'rc': -1}
        val = -4
        # version.pre can be None
        if version.pre:
            if version.pre[0] not in mapping:
                warnings.warn(f'unknown prerelease version {version.pre[0]}, '
                              'version checking may go wrong')
            else:
                val = mapping[version.pre[0]]
            release.extend([val, version.pre[-1]])
        else:
            release.extend([val, 0])

    elif version.is_postrelease:
        release.extend([1, version.post])
    else:
        release.extend([0, 0])
    return tuple(release)


def _minimal_ext_cmd(cmd):
    # construct minimal environment
    env = {}
    for k in ['SYSTEMROOT', 'PATH', 'HOME']:
        v = os.environ.get(k)
        if v is not None:
            env[k] = v
    # LANGUAGE is used on win32
    env['LANGUAGE'] = 'C'
    env['LANG'] = 'C'
    env['LC_ALL'] = 'C'
    out = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
    return out


def get_git_hash(fallback='unknown', digits=None):
    """Get the git hash of the current repo.

    Args:
        fallback (str, optional): The fallback string when git hash is
            unavailable. Defaults to 'unknown'.
        digits (int, optional): kept digits of the hash. Defaults to None,
            meaning all digits are kept.

    Returns:
        str: Git commit hash.
    """

    if digits is not None and not isinstance(digits, int):
        raise TypeError('digits must be None or an integer')

    try:
        out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
        sha = out.strip().decode('ascii')
        if digits is not None:
            sha = sha[:digits]
    except OSError:
        sha = fallback

    return sha


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/version.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
__version__ = '1.4.4'


def parse_version_info(version_str: str, length: int = 4) -> tuple:
    """Parse a version string into a tuple.

    Args:
        version_str (str): The version string.
        length (int): The maximum number of version levels. Default: 4.

    Returns:
        tuple[int | str]: The version info, e.g., "1.3.0" is parsed into
            (1, 3, 0, 0, 0, 0), and "2.0.0rc1" is parsed into
            (2, 0, 0, 0, 'rc', 1) (when length is set to 4).
    """
    from packaging.version import parse
    version = parse(version_str)
    assert version.release, f'failed to parse version {version_str}'
    release = list(version.release)
    release = release[:length]
    if len(release) < length:
        release = release + [0] * (length - len(release))
    if version.is_prerelease:
        release.extend(list(version.pre))
    elif version.is_postrelease:
        release.extend(list(version.post))
    else:
        release.extend([0, 0])
    return tuple(release)


version_info = tuple(int(x) for x in __version__.split('.')[:3])

__all__ = ['__version__', 'version_info', 'parse_version_info']


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/video/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .io import Cache, VideoReader, frames2video
from .optflow import (dequantize_flow, flow_from_bytes, flow_warp, flowread,
                      flowwrite, quantize_flow, sparse_flow_from_bytes)
from .processing import concat_video, convert_video, cut_video, resize_video

__all__ = [
    'Cache', 'VideoReader', 'frames2video', 'convert_video', 'resize_video',
    'cut_video', 'concat_video', 'flowread', 'flowwrite', 'quantize_flow',
    'dequantize_flow', 'flow_warp', 'flow_from_bytes', 'sparse_flow_from_bytes'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/video/io.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from collections import OrderedDict

import cv2
from cv2 import (CAP_PROP_FOURCC, CAP_PROP_FPS, CAP_PROP_FRAME_COUNT,
                 CAP_PROP_FRAME_HEIGHT, CAP_PROP_FRAME_WIDTH,
                 CAP_PROP_POS_FRAMES, VideoWriter_fourcc)

from mmcv.utils import (check_file_exist, mkdir_or_exist, scandir,
                        track_progress)


class Cache:

    def __init__(self, capacity):
        self._cache = OrderedDict()
        self._capacity = int(capacity)
        if capacity <= 0:
            raise ValueError('capacity must be a positive integer')

    @property
    def capacity(self):
        return self._capacity

    @property
    def size(self):
        return len(self._cache)

    def put(self, key, val):
        if key in self._cache:
            return
        if len(self._cache) >= self.capacity:
            self._cache.popitem(last=False)
        self._cache[key] = val

    def get(self, key, default=None):
        val = self._cache[key] if key in self._cache else default
        return val


class VideoReader:
    """Video class with similar usage to a list object.

    This video warpper class provides convenient apis to access frames.
    There exists an issue of OpenCV's VideoCapture class that jumping to a
    certain frame may be inaccurate. It is fixed in this class by checking
    the position after jumping each time.
    Cache is used when decoding videos. So if the same frame is visited for
    the second time, there is no need to decode again if it is stored in the
    cache.

    Examples:
        >>> import mmcv
        >>> v = mmcv.VideoReader('sample.mp4')
        >>> len(v)  # get the total frame number with `len()`
        120
        >>> for img in v:  # v is iterable
        >>>     mmcv.imshow(img)
        >>> v[5]  # get the 6th frame
    """

    def __init__(self, filename, cache_capacity=10):
        # Check whether the video path is a url
        if not filename.startswith(('https://', 'http://')):
            check_file_exist(filename, 'Video file not found: ' + filename)
        self._vcap = cv2.VideoCapture(filename)
        assert cache_capacity > 0
        self._cache = Cache(cache_capacity)
        self._position = 0
        # get basic info
        self._width = int(self._vcap.get(CAP_PROP_FRAME_WIDTH))
        self._height = int(self._vcap.get(CAP_PROP_FRAME_HEIGHT))
        self._fps = self._vcap.get(CAP_PROP_FPS)
        self._frame_cnt = int(self._vcap.get(CAP_PROP_FRAME_COUNT))
        self._fourcc = self._vcap.get(CAP_PROP_FOURCC)

    @property
    def vcap(self):
        """:obj:`cv2.VideoCapture`: The raw VideoCapture object."""
        return self._vcap

    @property
    def opened(self):
        """bool: Indicate whether the video is opened."""
        return self._vcap.isOpened()

    @property
    def width(self):
        """int: Width of video frames."""
        return self._width

    @property
    def height(self):
        """int: Height of video frames."""
        return self._height

    @property
    def resolution(self):
        """tuple: Video resolution (width, height)."""
        return (self._width, self._height)

    @property
    def fps(self):
        """float: FPS of the video."""
        return self._fps

    @property
    def frame_cnt(self):
        """int: Total frames of the video."""
        return self._frame_cnt

    @property
    def fourcc(self):
        """str: "Four character code" of the video."""
        return self._fourcc

    @property
    def position(self):
        """int: Current cursor position, indicating frame decoded."""
        return self._position

    def _get_real_position(self):
        return int(round(self._vcap.get(CAP_PROP_POS_FRAMES)))

    def _set_real_position(self, frame_id):
        self._vcap.set(CAP_PROP_POS_FRAMES, frame_id)
        pos = self._get_real_position()
        for _ in range(frame_id - pos):
            self._vcap.read()
        self._position = frame_id

    def read(self):
        """Read the next frame.

        If the next frame have been decoded before and in the cache, then
        return it directly, otherwise decode, cache and return it.

        Returns:
            ndarray or None: Return the frame if successful, otherwise None.
        """
        # pos = self._position
        if self._cache:
            img = self._cache.get(self._position)
            if img is not None:
                ret = True
            else:
                if self._position != self._get_real_position():
                    self._set_real_position(self._position)
                ret, img = self._vcap.read()
                if ret:
                    self._cache.put(self._position, img)
        else:
            ret, img = self._vcap.read()
        if ret:
            self._position += 1
        return img

    def get_frame(self, frame_id):
        """Get frame by index.

        Args:
            frame_id (int): Index of the expected frame, 0-based.

        Returns:
            ndarray or None: Return the frame if successful, otherwise None.
        """
        if frame_id < 0 or frame_id >= self._frame_cnt:
            raise IndexError(
                f'"frame_id" must be between 0 and {self._frame_cnt - 1}')
        if frame_id == self._position:
            return self.read()
        if self._cache:
            img = self._cache.get(frame_id)
            if img is not None:
                self._position = frame_id + 1
                return img
        self._set_real_position(frame_id)
        ret, img = self._vcap.read()
        if ret:
            if self._cache:
                self._cache.put(self._position, img)
            self._position += 1
        return img

    def current_frame(self):
        """Get the current frame (frame that is just visited).

        Returns:
            ndarray or None: If the video is fresh, return None, otherwise
            return the frame.
        """
        if self._position == 0:
            return None
        return self._cache.get(self._position - 1)

    def cvt2frames(self,
                   frame_dir,
                   file_start=0,
                   filename_tmpl='{:06d}.jpg',
                   start=0,
                   max_num=0,
                   show_progress=True):
        """Convert a video to frame images.

        Args:
            frame_dir (str): Output directory to store all the frame images.
            file_start (int): Filenames will start from the specified number.
            filename_tmpl (str): Filename template with the index as the
                placeholder.
            start (int): The starting frame index.
            max_num (int): Maximum number of frames to be written.
            show_progress (bool): Whether to show a progress bar.
        """
        mkdir_or_exist(frame_dir)
        if max_num == 0:
            task_num = self.frame_cnt - start
        else:
            task_num = min(self.frame_cnt - start, max_num)
        if task_num <= 0:
            raise ValueError('start must be less than total frame number')
        if start > 0:
            self._set_real_position(start)

        def write_frame(file_idx):
            img = self.read()
            if img is None:
                return
            filename = osp.join(frame_dir, filename_tmpl.format(file_idx))
            cv2.imwrite(filename, img)

        if show_progress:
            track_progress(write_frame, range(file_start,
                                              file_start + task_num))
        else:
            for i in range(task_num):
                write_frame(file_start + i)

    def __len__(self):
        return self.frame_cnt

    def __getitem__(self, index):
        if isinstance(index, slice):
            return [
                self.get_frame(i)
                for i in range(*index.indices(self.frame_cnt))
            ]
        # support negative indexing
        if index < 0:
            index += self.frame_cnt
            if index < 0:
                raise IndexError('index out of range')
        return self.get_frame(index)

    def __iter__(self):
        self._set_real_position(0)
        return self

    def __next__(self):
        img = self.read()
        if img is not None:
            return img
        else:
            raise StopIteration

    next = __next__

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self._vcap.release()


def frames2video(frame_dir,
                 video_file,
                 fps=30,
                 fourcc='XVID',
                 filename_tmpl='{:06d}.jpg',
                 start=0,
                 end=0,
                 show_progress=True):
    """Read the frame images from a directory and join them as a video.

    Args:
        frame_dir (str): The directory containing video frames.
        video_file (str): Output filename.
        fps (float): FPS of the output video.
        fourcc (str): Fourcc of the output video, this should be compatible
            with the output file type.
        filename_tmpl (str): Filename template with the index as the variable.
        start (int): Starting frame index.
        end (int): Ending frame index.
        show_progress (bool): Whether to show a progress bar.
    """
    if end == 0:
        ext = filename_tmpl.split('.')[-1]
        end = len([name for name in scandir(frame_dir, ext)])
    first_file = osp.join(frame_dir, filename_tmpl.format(start))
    check_file_exist(first_file, 'The start frame not found: ' + first_file)
    img = cv2.imread(first_file)
    height, width = img.shape[:2]
    resolution = (width, height)
    vwriter = cv2.VideoWriter(video_file, VideoWriter_fourcc(*fourcc), fps,
                              resolution)

    def write_frame(file_idx):
        filename = osp.join(frame_dir, filename_tmpl.format(file_idx))
        img = cv2.imread(filename)
        vwriter.write(img)

    if show_progress:
        track_progress(write_frame, range(start, end))
    else:
        for i in range(start, end):
            write_frame(i)
    vwriter.release()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/video/optflow.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import warnings

import cv2
import numpy as np

from mmcv.arraymisc import dequantize, quantize
from mmcv.image import imread, imwrite
from mmcv.utils import is_str


def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs):
    """Read an optical flow map.

    Args:
        flow_or_path (ndarray or str): A flow map or filepath.
        quantize (bool): whether to read quantized pair, if set to True,
            remaining args will be passed to :func:`dequantize_flow`.
        concat_axis (int): The axis that dx and dy are concatenated,
            can be either 0 or 1. Ignored if quantize is False.

    Returns:
        ndarray: Optical flow represented as a (h, w, 2) numpy array
    """
    if isinstance(flow_or_path, np.ndarray):
        if (flow_or_path.ndim != 3) or (flow_or_path.shape[-1] != 2):
            raise ValueError(f'Invalid flow with shape {flow_or_path.shape}')
        return flow_or_path
    elif not is_str(flow_or_path):
        raise TypeError(f'"flow_or_path" must be a filename or numpy array, '
                        f'not {type(flow_or_path)}')

    if not quantize:
        with open(flow_or_path, 'rb') as f:
            try:
                header = f.read(4).decode('utf-8')
            except Exception:
                raise IOError(f'Invalid flow file: {flow_or_path}')
            else:
                if header != 'PIEH':
                    raise IOError(f'Invalid flow file: {flow_or_path}, '
                                  'header does not contain PIEH')

            w = np.fromfile(f, np.int32, 1).squeeze()
            h = np.fromfile(f, np.int32, 1).squeeze()
            flow = np.fromfile(f, np.float32, w * h * 2).reshape((h, w, 2))
    else:
        assert concat_axis in [0, 1]
        cat_flow = imread(flow_or_path, flag='unchanged')
        if cat_flow.ndim != 2:
            raise IOError(
                f'{flow_or_path} is not a valid quantized flow file, '
                f'its dimension is {cat_flow.ndim}.')
        assert cat_flow.shape[concat_axis] % 2 == 0
        dx, dy = np.split(cat_flow, 2, axis=concat_axis)
        flow = dequantize_flow(dx, dy, *args, **kwargs)

    return flow.astype(np.float32)


def flowwrite(flow, filename, quantize=False, concat_axis=0, *args, **kwargs):
    """Write optical flow to file.

    If the flow is not quantized, it will be saved as a .flo file losslessly,
    otherwise a jpeg image which is lossy but of much smaller size. (dx and dy
    will be concatenated horizontally into a single image if quantize is True.)

    Args:
        flow (ndarray): (h, w, 2) array of optical flow.
        filename (str): Output filepath.
        quantize (bool): Whether to quantize the flow and save it to 2 jpeg
            images. If set to True, remaining args will be passed to
            :func:`quantize_flow`.
        concat_axis (int): The axis that dx and dy are concatenated,
            can be either 0 or 1. Ignored if quantize is False.
    """
    if not quantize:
        with open(filename, 'wb') as f:
            f.write('PIEH'.encode('utf-8'))
            np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f)
            flow = flow.astype(np.float32)
            flow.tofile(f)
            f.flush()
    else:
        assert concat_axis in [0, 1]
        dx, dy = quantize_flow(flow, *args, **kwargs)
        dxdy = np.concatenate((dx, dy), axis=concat_axis)
        imwrite(dxdy, filename)


def quantize_flow(flow, max_val=0.02, norm=True):
    """Quantize flow to [0, 255].

    After this step, the size of flow will be much smaller, and can be
    dumped as jpeg images.

    Args:
        flow (ndarray): (h, w, 2) array of optical flow.
        max_val (float): Maximum value of flow, values beyond
                        [-max_val, max_val] will be truncated.
        norm (bool): Whether to divide flow values by image width/height.

    Returns:
        tuple[ndarray]: Quantized dx and dy.
    """
    h, w, _ = flow.shape
    dx = flow[..., 0]
    dy = flow[..., 1]
    if norm:
        dx = dx / w  # avoid inplace operations
        dy = dy / h
    # use 255 levels instead of 256 to make sure 0 is 0 after dequantization.
    flow_comps = [
        quantize(d, -max_val, max_val, 255, np.uint8) for d in [dx, dy]
    ]
    return tuple(flow_comps)


def dequantize_flow(dx, dy, max_val=0.02, denorm=True):
    """Recover from quantized flow.

    Args:
        dx (ndarray): Quantized dx.
        dy (ndarray): Quantized dy.
        max_val (float): Maximum value used when quantizing.
        denorm (bool): Whether to multiply flow values with width/height.

    Returns:
        ndarray: Dequantized flow.
    """
    assert dx.shape == dy.shape
    assert dx.ndim == 2 or (dx.ndim == 3 and dx.shape[-1] == 1)

    dx, dy = [dequantize(d, -max_val, max_val, 255) for d in [dx, dy]]

    if denorm:
        dx *= dx.shape[1]
        dy *= dx.shape[0]
    flow = np.dstack((dx, dy))
    return flow


def flow_warp(img, flow, filling_value=0, interpolate_mode='nearest'):
    """Use flow to warp img.

    Args:
        img (ndarray, float or uint8): Image to be warped.
        flow (ndarray, float): Optical Flow.
        filling_value (int): The missing pixels will be set with filling_value.
        interpolate_mode (str): bilinear -> Bilinear Interpolation;
                                nearest -> Nearest Neighbor.

    Returns:
        ndarray: Warped image with the same shape of img
    """
    warnings.warn('This function is just for prototyping and cannot '
                  'guarantee the computational efficiency.')
    assert flow.ndim == 3, 'Flow must be in 3D arrays.'
    height = flow.shape[0]
    width = flow.shape[1]
    channels = img.shape[2]

    output = np.ones(
        (height, width, channels), dtype=img.dtype) * filling_value

    grid = np.indices((height, width)).swapaxes(0, 1).swapaxes(1, 2)
    dx = grid[:, :, 0] + flow[:, :, 1]
    dy = grid[:, :, 1] + flow[:, :, 0]
    sx = np.floor(dx).astype(int)
    sy = np.floor(dy).astype(int)
    valid = (sx >= 0) & (sx < height - 1) & (sy >= 0) & (sy < width - 1)

    if interpolate_mode == 'nearest':
        output[valid, :] = img[dx[valid].round().astype(int),
                               dy[valid].round().astype(int), :]
    elif interpolate_mode == 'bilinear':
        # dirty walkround for integer positions
        eps_ = 1e-6
        dx, dy = dx + eps_, dy + eps_
        left_top_ = img[np.floor(dx[valid]).astype(int),
                        np.floor(dy[valid]).astype(int), :] * (
                            np.ceil(dx[valid]) - dx[valid])[:, None] * (
                                np.ceil(dy[valid]) - dy[valid])[:, None]
        left_down_ = img[np.ceil(dx[valid]).astype(int),
                         np.floor(dy[valid]).astype(int), :] * (
                             dx[valid] - np.floor(dx[valid]))[:, None] * (
                                 np.ceil(dy[valid]) - dy[valid])[:, None]
        right_top_ = img[np.floor(dx[valid]).astype(int),
                         np.ceil(dy[valid]).astype(int), :] * (
                             np.ceil(dx[valid]) - dx[valid])[:, None] * (
                                 dy[valid] - np.floor(dy[valid]))[:, None]
        right_down_ = img[np.ceil(dx[valid]).astype(int),
                          np.ceil(dy[valid]).astype(int), :] * (
                              dx[valid] - np.floor(dx[valid]))[:, None] * (
                                  dy[valid] - np.floor(dy[valid]))[:, None]
        output[valid, :] = left_top_ + left_down_ + right_top_ + right_down_
    else:
        raise NotImplementedError(
            'We only support interpolation modes of nearest and bilinear, '
            f'but got {interpolate_mode}.')
    return output.astype(img.dtype)


def flow_from_bytes(content):
    """Read dense optical flow from bytes.

    .. note::
        This load optical flow function works for FlyingChairs, FlyingThings3D,
        Sintel, FlyingChairsOcc datasets, but cannot load the data from
        ChairsSDHom.

    Args:
        content (bytes): Optical flow bytes got from files or other streams.

    Returns:
        ndarray: Loaded optical flow with the shape (H, W, 2).
    """

    # header in first 4 bytes
    header = content[:4]
    if header.decode('utf-8') != 'PIEH':
        raise Exception('Flow file header does not contain PIEH')
    # width in second 4 bytes
    width = np.frombuffer(content[4:], np.int32, 1).squeeze()
    # height in third 4 bytes
    height = np.frombuffer(content[8:], np.int32, 1).squeeze()
    # after first 12 bytes, all bytes are flow
    flow = np.frombuffer(content[12:], np.float32, width * height * 2).reshape(
        (height, width, 2))

    return flow


def sparse_flow_from_bytes(content):
    """Read the optical flow in KITTI datasets from bytes.

    This function is modified from RAFT load the `KITTI datasets
    <https://github.com/princeton-vl/RAFT/blob/224320502d66c356d88e6c712f38129e60661e80/core/utils/frame_utils.py#L102>`_.

    Args:
        content (bytes): Optical flow bytes got from files or other streams.

    Returns:
        Tuple(ndarray, ndarray): Loaded optical flow with the shape (H, W, 2)
        and flow valid mask with the shape (H, W).
    """  # nopa

    content = np.frombuffer(content, np.uint8)
    flow = cv2.imdecode(content, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR)
    flow = flow[:, :, ::-1].astype(np.float32)
    # flow shape (H, W, 2) valid shape (H, W)
    flow, valid = flow[:, :, :2], flow[:, :, 2]
    flow = (flow - 2**15) / 64.0
    return flow, valid


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/video/processing.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import subprocess
import tempfile

from mmcv.utils import requires_executable


@requires_executable('ffmpeg')
def convert_video(in_file,
                  out_file,
                  print_cmd=False,
                  pre_options='',
                  **kwargs):
    """Convert a video with ffmpeg.

    This provides a general api to ffmpeg, the executed command is::

        `ffmpeg -y <pre_options> -i <in_file> <options> <out_file>`

    Options(kwargs) are mapped to ffmpeg commands with the following rules:

    - key=val: "-key val"
    - key=True: "-key"
    - key=False: ""

    Args:
        in_file (str): Input video filename.
        out_file (str): Output video filename.
        pre_options (str): Options appears before "-i <in_file>".
        print_cmd (bool): Whether to print the final ffmpeg command.
    """
    options = []
    for k, v in kwargs.items():
        if isinstance(v, bool):
            if v:
                options.append(f'-{k}')
        elif k == 'log_level':
            assert v in [
                'quiet', 'panic', 'fatal', 'error', 'warning', 'info',
                'verbose', 'debug', 'trace'
            ]
            options.append(f'-loglevel {v}')
        else:
            options.append(f'-{k} {v}')
    cmd = f'ffmpeg -y {pre_options} -i {in_file} {" ".join(options)} ' \
          f'{out_file}'
    if print_cmd:
        print(cmd)
    subprocess.call(cmd, shell=True)


@requires_executable('ffmpeg')
def resize_video(in_file,
                 out_file,
                 size=None,
                 ratio=None,
                 keep_ar=False,
                 log_level='info',
                 print_cmd=False):
    """Resize a video.

    Args:
        in_file (str): Input video filename.
        out_file (str): Output video filename.
        size (tuple): Expected size (w, h), eg, (320, 240) or (320, -1).
        ratio (tuple or float): Expected resize ratio, (2, 0.5) means
            (w*2, h*0.5).
        keep_ar (bool): Whether to keep original aspect ratio.
        log_level (str): Logging level of ffmpeg.
        print_cmd (bool): Whether to print the final ffmpeg command.
    """
    if size is None and ratio is None:
        raise ValueError('expected size or ratio must be specified')
    if size is not None and ratio is not None:
        raise ValueError('size and ratio cannot be specified at the same time')
    options = {'log_level': log_level}
    if size:
        if not keep_ar:
            options['vf'] = f'scale={size[0]}:{size[1]}'
        else:
            options['vf'] = f'scale=w={size[0]}:h={size[1]}:' \
                            'force_original_aspect_ratio=decrease'
    else:
        if not isinstance(ratio, tuple):
            ratio = (ratio, ratio)
        options['vf'] = f'scale="trunc(iw*{ratio[0]}):trunc(ih*{ratio[1]})"'
    convert_video(in_file, out_file, print_cmd, **options)


@requires_executable('ffmpeg')
def cut_video(in_file,
              out_file,
              start=None,
              end=None,
              vcodec=None,
              acodec=None,
              log_level='info',
              print_cmd=False):
    """Cut a clip from a video.

    Args:
        in_file (str): Input video filename.
        out_file (str): Output video filename.
        start (None or float): Start time (in seconds).
        end (None or float): End time (in seconds).
        vcodec (None or str): Output video codec, None for unchanged.
        acodec (None or str): Output audio codec, None for unchanged.
        log_level (str): Logging level of ffmpeg.
        print_cmd (bool): Whether to print the final ffmpeg command.
    """
    options = {'log_level': log_level}
    if vcodec is None:
        options['vcodec'] = 'copy'
    if acodec is None:
        options['acodec'] = 'copy'
    if start:
        options['ss'] = start
    else:
        start = 0
    if end:
        options['t'] = end - start
    convert_video(in_file, out_file, print_cmd, **options)


@requires_executable('ffmpeg')
def concat_video(video_list,
                 out_file,
                 vcodec=None,
                 acodec=None,
                 log_level='info',
                 print_cmd=False):
    """Concatenate multiple videos into a single one.

    Args:
        video_list (list): A list of video filenames
        out_file (str): Output video filename
        vcodec (None or str): Output video codec, None for unchanged
        acodec (None or str): Output audio codec, None for unchanged
        log_level (str): Logging level of ffmpeg.
        print_cmd (bool): Whether to print the final ffmpeg command.
    """
    tmp_filehandler, tmp_filename = tempfile.mkstemp(suffix='.txt', text=True)
    with open(tmp_filename, 'w') as f:
        for filename in video_list:
            f.write(f'file {osp.abspath(filename)}\n')
    options = {'log_level': log_level}
    if vcodec is None:
        options['vcodec'] = 'copy'
    if acodec is None:
        options['acodec'] = 'copy'
    convert_video(
        tmp_filename,
        out_file,
        print_cmd,
        pre_options='-f concat -safe 0',
        **options)
    os.close(tmp_filehandler)
    os.remove(tmp_filename)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/visualization/__init__.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from .color import Color, color_val
from .image import imshow, imshow_bboxes, imshow_det_bboxes
from .optflow import flow2rgb, flowshow, make_color_wheel

__all__ = [
    'Color', 'color_val', 'imshow', 'imshow_bboxes', 'imshow_det_bboxes',
    'flowshow', 'flow2rgb', 'make_color_wheel'
]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/visualization/color.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from enum import Enum

import numpy as np

from mmcv.utils import is_str


class Color(Enum):
    """An enum that defines common colors.

    Contains red, green, blue, cyan, yellow, magenta, white and black.
    """
    red = (0, 0, 255)
    green = (0, 255, 0)
    blue = (255, 0, 0)
    cyan = (255, 255, 0)
    yellow = (0, 255, 255)
    magenta = (255, 0, 255)
    white = (255, 255, 255)
    black = (0, 0, 0)


def color_val(color):
    """Convert various input to color tuples.

    Args:
        color (:obj:`Color`/str/tuple/int/ndarray): Color inputs

    Returns:
        tuple[int]: A tuple of 3 integers indicating BGR channels.
    """
    if is_str(color):
        return Color[color].value
    elif isinstance(color, Color):
        return color.value
    elif isinstance(color, tuple):
        assert len(color) == 3
        for channel in color:
            assert 0 <= channel <= 255
        return color
    elif isinstance(color, int):
        assert 0 <= color <= 255
        return color, color, color
    elif isinstance(color, np.ndarray):
        assert color.ndim == 1 and color.size == 3
        assert np.all((color >= 0) & (color <= 255))
        color = color.astype(np.uint8)
        return tuple(color)
    else:
        raise TypeError(f'Invalid type for color: {type(color)}')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/visualization/image.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np

from mmcv.image import imread, imwrite
from .color import color_val


def imshow(img, win_name='', wait_time=0):
    """Show an image.

    Args:
        img (str or ndarray): The image to be displayed.
        win_name (str): The window name.
        wait_time (int): Value of waitKey param.
    """
    cv2.imshow(win_name, imread(img))
    if wait_time == 0:  # prevent from hanging if windows was closed
        while True:
            ret = cv2.waitKey(1)

            closed = cv2.getWindowProperty(win_name, cv2.WND_PROP_VISIBLE) < 1
            # if user closed window or if some key pressed
            if closed or ret != -1:
                break
    else:
        ret = cv2.waitKey(wait_time)


def imshow_bboxes(img,
                  bboxes,
                  colors='green',
                  top_k=-1,
                  thickness=1,
                  show=True,
                  win_name='',
                  wait_time=0,
                  out_file=None):
    """Draw bboxes on an image.

    Args:
        img (str or ndarray): The image to be displayed.
        bboxes (list or ndarray): A list of ndarray of shape (k, 4).
        colors (list[str or tuple or Color]): A list of colors.
        top_k (int): Plot the first k bboxes only if set positive.
        thickness (int): Thickness of lines.
        show (bool): Whether to show the image.
        win_name (str): The window name.
        wait_time (int): Value of waitKey param.
        out_file (str, optional): The filename to write the image.

    Returns:
        ndarray: The image with bboxes drawn on it.
    """
    img = imread(img)
    img = np.ascontiguousarray(img)

    if isinstance(bboxes, np.ndarray):
        bboxes = [bboxes]
    if not isinstance(colors, list):
        colors = [colors for _ in range(len(bboxes))]
    colors = [color_val(c) for c in colors]
    assert len(bboxes) == len(colors)

    for i, _bboxes in enumerate(bboxes):
        _bboxes = _bboxes.astype(np.int32)
        if top_k <= 0:
            _top_k = _bboxes.shape[0]
        else:
            _top_k = min(top_k, _bboxes.shape[0])
        for j in range(_top_k):
            left_top = (_bboxes[j, 0], _bboxes[j, 1])
            right_bottom = (_bboxes[j, 2], _bboxes[j, 3])
            cv2.rectangle(
                img, left_top, right_bottom, colors[i], thickness=thickness)

    if show:
        imshow(img, win_name, wait_time)
    if out_file is not None:
        imwrite(img, out_file)
    return img


def imshow_det_bboxes(img,
                      bboxes,
                      labels,
                      class_names=None,
                      score_thr=0,
                      bbox_color='green',
                      text_color='green',
                      thickness=1,
                      font_scale=0.5,
                      show=True,
                      win_name='',
                      wait_time=0,
                      out_file=None):
    """Draw bboxes and class labels (with scores) on an image.

    Args:
        img (str or ndarray): The image to be displayed.
        bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or
            (n, 5).
        labels (ndarray): Labels of bboxes.
        class_names (list[str]): Names of each classes.
        score_thr (float): Minimum score of bboxes to be shown.
        bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
        text_color (str or tuple or :obj:`Color`): Color of texts.
        thickness (int): Thickness of lines.
        font_scale (float): Font scales of texts.
        show (bool): Whether to show the image.
        win_name (str): The window name.
        wait_time (int): Value of waitKey param.
        out_file (str or None): The filename to write the image.

    Returns:
        ndarray: The image with bboxes drawn on it.
    """
    assert bboxes.ndim == 2
    assert labels.ndim == 1
    assert bboxes.shape[0] == labels.shape[0]
    assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5
    img = imread(img)
    img = np.ascontiguousarray(img)

    if score_thr > 0:
        assert bboxes.shape[1] == 5
        scores = bboxes[:, -1]
        inds = scores > score_thr
        bboxes = bboxes[inds, :]
        labels = labels[inds]

    bbox_color = color_val(bbox_color)
    text_color = color_val(text_color)

    for bbox, label in zip(bboxes, labels):
        bbox_int = bbox.astype(np.int32)
        left_top = (bbox_int[0], bbox_int[1])
        right_bottom = (bbox_int[2], bbox_int[3])
        cv2.rectangle(
            img, left_top, right_bottom, bbox_color, thickness=thickness)
        label_text = class_names[
            label] if class_names is not None else f'cls {label}'
        if len(bbox) > 4:
            label_text += f'|{bbox[-1]:.02f}'
        cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2),
                    cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color)

    if show:
        imshow(img, win_name, wait_time)
    if out_file is not None:
        imwrite(img, out_file)
    return img


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/mmcv/visualization/optflow.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from __future__ import division

import numpy as np

from mmcv.image import rgb2bgr
from mmcv.video import flowread
from .image import imshow


def flowshow(flow, win_name='', wait_time=0):
    """Show optical flow.

    Args:
        flow (ndarray or str): The optical flow to be displayed.
        win_name (str): The window name.
        wait_time (int): Value of waitKey param.
    """
    flow = flowread(flow)
    flow_img = flow2rgb(flow)
    imshow(rgb2bgr(flow_img), win_name, wait_time)


def flow2rgb(flow, color_wheel=None, unknown_thr=1e6):
    """Convert flow map to RGB image.

    Args:
        flow (ndarray): Array of optical flow.
        color_wheel (ndarray or None): Color wheel used to map flow field to
            RGB colorspace. Default color wheel will be used if not specified.
        unknown_thr (str): Values above this threshold will be marked as
            unknown and thus ignored.

    Returns:
        ndarray: RGB image that can be visualized.
    """
    assert flow.ndim == 3 and flow.shape[-1] == 2
    if color_wheel is None:
        color_wheel = make_color_wheel()
    assert color_wheel.ndim == 2 and color_wheel.shape[1] == 3
    num_bins = color_wheel.shape[0]

    dx = flow[:, :, 0].copy()
    dy = flow[:, :, 1].copy()

    ignore_inds = (
        np.isnan(dx) | np.isnan(dy) | (np.abs(dx) > unknown_thr) |
        (np.abs(dy) > unknown_thr))
    dx[ignore_inds] = 0
    dy[ignore_inds] = 0

    rad = np.sqrt(dx**2 + dy**2)
    if np.any(rad > np.finfo(float).eps):
        max_rad = np.max(rad)
        dx /= max_rad
        dy /= max_rad

    rad = np.sqrt(dx**2 + dy**2)
    angle = np.arctan2(-dy, -dx) / np.pi

    bin_real = (angle + 1) / 2 * (num_bins - 1)
    bin_left = np.floor(bin_real).astype(int)
    bin_right = (bin_left + 1) % num_bins
    w = (bin_real - bin_left.astype(np.float32))[..., None]
    flow_img = (1 -
                w) * color_wheel[bin_left, :] + w * color_wheel[bin_right, :]
    small_ind = rad <= 1
    flow_img[small_ind] = 1 - rad[small_ind, None] * (1 - flow_img[small_ind])
    flow_img[np.logical_not(small_ind)] *= 0.75

    flow_img[ignore_inds, :] = 0

    return flow_img


def make_color_wheel(bins=None):
    """Build a color wheel.

    Args:
        bins(list or tuple, optional): Specify the number of bins for each
            color range, corresponding to six ranges: red -> yellow,
            yellow -> green, green -> cyan, cyan -> blue, blue -> magenta,
            magenta -> red. [15, 6, 4, 11, 13, 6] is used for default
            (see Middlebury).

    Returns:
        ndarray: Color wheel of shape (total_bins, 3).
    """
    if bins is None:
        bins = [15, 6, 4, 11, 13, 6]
    assert len(bins) == 6

    RY, YG, GC, CB, BM, MR = tuple(bins)

    ry = [1, np.arange(RY) / RY, 0]
    yg = [1 - np.arange(YG) / YG, 1, 0]
    gc = [0, 1, np.arange(GC) / GC]
    cb = [0, 1 - np.arange(CB) / CB, 1]
    bm = [np.arange(BM) / BM, 0, 1]
    mr = [1, 0, 1 - np.arange(MR) / MR]

    num_bins = RY + YG + GC + CB + BM + MR

    color_wheel = np.zeros((3, num_bins), dtype=np.float32)

    col = 0
    for i, color in enumerate([ry, yg, gc, cb, bm, mr]):
        for j in range(3):
            color_wheel[j, col:col + bins[i]] = color[j]
        col += bins[i]

    return color_wheel.T


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/readme.md
================================================
test


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/setup.cfg
================================================
[bdist_wheel]
universal=1

[aliases]
test=pytest

[yapf]
based_on_style = pep8
blank_line_before_nested_class_or_def = true
split_before_expression_after_opening_paren = true

[isort]
line_length = 79
multi_line_output = 0
known_standard_library = pkg_resources,setuptools,logging,os,warnings,abc
known_first_party = mmcv
known_third_party = addict,cv2,numpy,onnx,onnxruntime,packaging,pytest,pytorch_sphinx_theme,scipy,sphinx,tensorrt,torch,torchvision,yaml,yapf
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY

[codespell]
ignore-words-list = inout,hist


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/setup.py
================================================
import glob
import os
import platform
import re
from pkg_resources import DistributionNotFound, get_distribution
from setuptools import find_packages, setup

EXT_TYPE = ''
try:
    import torch
    if torch.__version__ == 'parrots':
        from parrots.utils.build_extension import BuildExtension
        EXT_TYPE = 'parrots'
    else:
        from torch.utils.cpp_extension import BuildExtension
        EXT_TYPE = 'pytorch'
    cmd_class = {'build_ext': BuildExtension}
except ModuleNotFoundError:
    cmd_class = {}
    print('Skip building ext ops due to the absence of torch.')


def choose_requirement(primary, secondary):
    """If some version of primary requirement installed, return primary, else
    return secondary."""
    try:
        name = re.split(r'[!<>=]', primary)[0]
        get_distribution(name)
    except DistributionNotFound:
        return secondary

    return str(primary)


def get_version():
    version_file = 'mmcv/version.py'
    with open(version_file, 'r', encoding='utf-8') as f:
        exec(compile(f.read(), version_file, 'exec'))
    return locals()['__version__']


def parse_requirements(fname='requirements/runtime.txt', with_version=True):
    """Parse the package dependencies listed in a requirements file but strips
    specific versioning information.

    Args:
        fname (str): path to requirements file
        with_version (bool, default=False): if True include version specs

    Returns:
        List[str]: list of requirements items

    CommandLine:
        python -c "import setup; print(setup.parse_requirements())"
    """
    import sys
    from os.path import exists
    require_fpath = fname

    def parse_line(line):
        """Parse information from a line in a requirements text file."""
        if line.startswith('-r '):
            # Allow specifying requirements in other files
            target = line.split(' ')[1]
            for info in parse_require_file(target):
                yield info
        else:
            info = {'line': line}
            if line.startswith('-e '):
                info['package'] = line.split('#egg=')[1]
            else:
                # Remove versioning from the package
                pat = '(' + '|'.join(['>=', '==', '>']) + ')'
                parts = re.split(pat, line, maxsplit=1)
                parts = [p.strip() for p in parts]

                info['package'] = parts[0]
                if len(parts) > 1:
                    op, rest = parts[1:]
                    if ';' in rest:
                        # Handle platform specific dependencies
                        # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
                        version, platform_deps = map(str.strip,
                                                     rest.split(';'))
                        info['platform_deps'] = platform_deps
                    else:
                        version = rest  # NOQA
                    info['version'] = (op, version)
            yield info

    def parse_require_file(fpath):
        with open(fpath, 'r') as f:
            for line in f.readlines():
                line = line.strip()
                if line and not line.startswith('#'):
                    for info in parse_line(line):
                        yield info

    def gen_packages_items():
        if exists(require_fpath):
            for info in parse_require_file(require_fpath):
                parts = [info['package']]
                if with_version and 'version' in info:
                    parts.extend(info['version'])
                if not sys.version.startswith('3.4'):
                    # apparently package_deps are broken in 3.4
                    platform_deps = info.get('platform_deps')
                    if platform_deps is not None:
                        parts.append(';' + platform_deps)
                item = ''.join(parts)
                yield item

    packages = list(gen_packages_items())
    return packages


install_requires = parse_requirements()

try:
    # OpenCV installed via conda.
    import cv2  # NOQA: F401
    major, minor, *rest = cv2.__version__.split('.')
    if int(major) < 3:
        raise RuntimeError(
            f'OpenCV >=3 is required but {cv2.__version__} is installed')
except ImportError:
    # If first not installed install second package
    CHOOSE_INSTALL_REQUIRES = [('opencv-python-headless>=3',
                                'opencv-python>=3')]
    for main, secondary in CHOOSE_INSTALL_REQUIRES:
        install_requires.append(choose_requirement(main, secondary))


def get_extensions():
    extensions = []

    if os.getenv('MMCV_WITH_TRT', '0') != '0':
        ext_name = 'mmcv._ext_trt'
        from torch.utils.cpp_extension import include_paths, library_paths
        library_dirs = []
        libraries = []
        include_dirs = []
        tensorrt_path = os.getenv('TENSORRT_DIR', '0')
        tensorrt_lib_path = glob.glob(
            os.path.join(tensorrt_path, 'targets', '*', 'lib'))[0]
        library_dirs += [tensorrt_lib_path]
        libraries += ['nvinfer', 'nvparsers', 'nvinfer_plugin']
        libraries += ['cudart']
        define_macros = []
        extra_compile_args = {'cxx': []}

        include_path = os.path.abspath('./mmcv/ops/csrc/common/cuda')
        include_trt_path = os.path.abspath('./mmcv/ops/csrc/tensorrt')
        include_dirs.append(include_path)
        include_dirs.append(include_trt_path)
        include_dirs.append(os.path.join(tensorrt_path, 'include'))
        include_dirs += include_paths(cuda=True)

        op_files = glob.glob('./mmcv/ops/csrc/tensorrt/plugins/*')
        define_macros += [('MMCV_WITH_CUDA', None)]
        define_macros += [('MMCV_WITH_TRT', None)]
        cuda_args = os.getenv('MMCV_CUDA_ARGS')
        extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
        # prevent cub/thrust conflict with other python library
        # More context See issues #1454
        extra_compile_args['nvcc'] += ['-Xcompiler=-fno-gnu-unique']
        library_dirs += library_paths(cuda=True)

        from setuptools import Extension
        ext_ops = Extension(
            name=ext_name,
            sources=op_files,
            include_dirs=include_dirs,
            define_macros=define_macros,
            extra_compile_args=extra_compile_args,
            language='c++',
            library_dirs=library_dirs,
            libraries=libraries)
        extensions.append(ext_ops)

    if os.getenv('MMCV_WITH_OPS', '0') == '0':
        return extensions

    if EXT_TYPE == 'parrots':
        ext_name = 'mmcv._ext'
        from parrots.utils.build_extension import Extension
        # new parrots op impl do not use MMCV_USE_PARROTS
        # define_macros = [('MMCV_USE_PARROTS', None)]
        define_macros = []
        include_dirs = []
        op_files = glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') +\
            glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') +\
            glob.glob('./mmcv/ops/csrc/parrots/*.cpp')
        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
        cuda_args = os.getenv('MMCV_CUDA_ARGS')
        extra_compile_args = {
            'nvcc': [cuda_args, '-std=c++14'] if cuda_args else ['-std=c++14'],
            'cxx': ['-std=c++14'],
        }
        if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
            define_macros += [('MMCV_WITH_CUDA', None)]
            extra_compile_args['nvcc'] += [
                '-D__CUDA_NO_HALF_OPERATORS__',
                '-D__CUDA_NO_HALF_CONVERSIONS__',
                '-D__CUDA_NO_HALF2_OPERATORS__',
            ]
        ext_ops = Extension(
            name=ext_name,
            sources=op_files,
            include_dirs=include_dirs,
            define_macros=define_macros,
            extra_compile_args=extra_compile_args,
            cuda=True,
            pytorch=True)
        extensions.append(ext_ops)
    elif EXT_TYPE == 'pytorch':
        ext_name = 'mmcv._ext'
        from torch.utils.cpp_extension import CppExtension, CUDAExtension

        # prevent ninja from using too many resources
        try:
            import psutil
            num_cpu = len(psutil.Process().cpu_affinity())
            cpu_use = max(4, num_cpu - 1)
        except (ModuleNotFoundError, AttributeError):
            cpu_use = 4

        os.environ.setdefault('MAX_JOBS', str(cpu_use))
        define_macros = []

        # Before PyTorch1.8.0, when compiling CUDA code, `cxx` is a
        # required key passed to PyTorch. Even if there is no flag passed
        # to cxx, users also need to pass an empty list to PyTorch.
        # Since PyTorch1.8.0, it has a default value so users do not need
        # to pass an empty list anymore.
        # More details at https://github.com/pytorch/pytorch/pull/45956
        extra_compile_args = {'cxx': []}

        # Since the PR (https://github.com/open-mmlab/mmcv/pull/1463) uses
        # c++14 features, the argument ['std=c++14'] must be added here.
        # However, in the windows environment, some standard libraries
        # will depend on c++17 or higher. In fact, for the windows
        # environment, the compiler will choose the appropriate compiler
        # to compile those cpp files, so there is no need to add the
        # argument
        if platform.system() != 'Windows':
            extra_compile_args['cxx'] = ['-std=c++14']

        include_dirs = []

        is_rocm_pytorch = False
        try:
            from torch.utils.cpp_extension import ROCM_HOME
            is_rocm_pytorch = True if ((torch.version.hip is not None) and
                                       (ROCM_HOME is not None)) else False
        except ImportError:
            pass

        project_dir = 'mmcv/ops/csrc/'
        if is_rocm_pytorch:
            from torch.utils.hipify import hipify_python

            hipify_python.hipify(
                project_directory=project_dir,
                output_directory=project_dir,
                includes='mmcv/ops/csrc/*',
                show_detailed=True,
                is_pytorch_extension=True,
            )
            define_macros += [('MMCV_WITH_CUDA', None)]
            define_macros += [('HIP_DIFF', None)]
            cuda_args = os.getenv('MMCV_CUDA_ARGS')
            extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
            op_files = glob.glob('./mmcv/ops/csrc/pytorch/hip/*') \
                + glob.glob('./mmcv/ops/csrc/pytorch/cpu/hip/*')
            extension = CUDAExtension
            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/hip'))
        elif torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
            define_macros += [('MMCV_WITH_CUDA', None)]
            cuda_args = os.getenv('MMCV_CUDA_ARGS')
            extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
                glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
                glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') + \
                glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cpp')
            extension = CUDAExtension
            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
        else:
            print(f'Compiling {ext_name} without CUDA')
            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
                glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp')
            extension = CppExtension
            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))

        # Since the PR (https://github.com/open-mmlab/mmcv/pull/1463) uses
        # c++14 features, the argument ['std=c++14'] must be added here.
        # However, in the windows environment, some standard libraries
        # will depend on c++17 or higher. In fact, for the windows
        # environment, the compiler will choose the appropriate compiler
        # to compile those cpp files, so there is no need to add the
        # argument
        if 'nvcc' in extra_compile_args and platform.system() != 'Windows':
            extra_compile_args['nvcc'] += ['-std=c++14']

        ext_ops = extension(
            name=ext_name,
            sources=op_files,
            include_dirs=include_dirs,
            define_macros=define_macros,
            extra_compile_args=extra_compile_args)
        extensions.append(ext_ops)

    if EXT_TYPE == 'pytorch' and os.getenv('MMCV_WITH_ORT', '0') != '0':
        ext_name = 'mmcv._ext_ort'
        from torch.utils.cpp_extension import library_paths, include_paths
        import onnxruntime
        library_dirs = []
        libraries = []
        include_dirs = []
        ort_path = os.getenv('ONNXRUNTIME_DIR', '0')
        library_dirs += [os.path.join(ort_path, 'lib')]
        libraries.append('onnxruntime')
        define_macros = []
        extra_compile_args = {'cxx': []}

        include_path = os.path.abspath('./mmcv/ops/csrc/onnxruntime')
        include_dirs.append(include_path)
        include_dirs.append(os.path.join(ort_path, 'include'))

        op_files = glob.glob('./mmcv/ops/csrc/onnxruntime/cpu/*')
        if onnxruntime.get_device() == 'GPU' or os.getenv('FORCE_CUDA',
                                                          '0') == '1':
            define_macros += [('MMCV_WITH_CUDA', None)]
            cuda_args = os.getenv('MMCV_CUDA_ARGS')
            extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
            op_files += glob.glob('./mmcv/ops/csrc/onnxruntime/gpu/*')
            include_dirs += include_paths(cuda=True)
            library_dirs += library_paths(cuda=True)
        else:
            include_dirs += include_paths(cuda=False)
            library_dirs += library_paths(cuda=False)

        from setuptools import Extension
        ext_ops = Extension(
            name=ext_name,
            sources=op_files,
            include_dirs=include_dirs,
            define_macros=define_macros,
            extra_compile_args=extra_compile_args,
            language='c++',
            library_dirs=library_dirs,
            libraries=libraries)
        extensions.append(ext_ops)

    return extensions


setup(
    name='mmcv' if os.getenv('MMCV_WITH_OPS', '0') == '0' else 'mmcv-full',
    version=get_version(),
    description='OpenMMLab Computer Vision Foundation',
    keywords='computer vision',
    packages=find_packages(),
    include_package_data=True,
    classifiers=[
        'Development Status :: 4 - Beta',
        'License :: OSI Approved :: Apache Software License',
        'Operating System :: OS Independent',
        'Programming Language :: Python :: 3',
        'Programming Language :: Python :: 3.6',
        'Programming Language :: Python :: 3.7',
        'Programming Language :: Python :: 3.8',
        'Programming Language :: Python :: 3.9',
        'Topic :: Utilities',
    ],
    url='https://github.com/open-mmlab/mmcv',
    author='MMCV Contributors',
    author_email='openmmlab@gmail.com',
    install_requires=install_requires,
    extras_require={
        'all': parse_requirements('requirements.txt'),
        'tests': parse_requirements('requirements/test.txt'),
        'build': parse_requirements('requirements/build.txt'),
        'optional': parse_requirements('requirements/optional.txt'),
    },
    ext_modules=get_extensions(),
    cmdclass=cmd_class,
    zip_safe=False)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_arraymisc.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
from __future__ import division

import numpy as np
import pytest

import mmcv


def test_quantize():
    arr = np.random.randn(10, 10)
    levels = 20

    qarr = mmcv.quantize(arr, -1, 1, levels)
    assert qarr.shape == arr.shape
    assert qarr.dtype == np.dtype('int64')
    for i in range(arr.shape[0]):
        for j in range(arr.shape[1]):
            ref = min(levels - 1,
                      int(np.floor(10 * (1 + max(min(arr[i, j], 1), -1)))))
            assert qarr[i, j] == ref

    qarr = mmcv.quantize(arr, -1, 1, 20, dtype=np.uint8)
    assert qarr.shape == arr.shape
    assert qarr.dtype == np.dtype('uint8')

    with pytest.raises(ValueError):
        mmcv.quantize(arr, -1, 1, levels=0)
    with pytest.raises(ValueError):
        mmcv.quantize(arr, -1, 1, levels=10.0)
    with pytest.raises(ValueError):
        mmcv.quantize(arr, 2, 1, levels)


def test_dequantize():
    levels = 20
    qarr = np.random.randint(levels, size=(10, 10))

    arr = mmcv.dequantize(qarr, -1, 1, levels)
    assert arr.shape == qarr.shape
    assert arr.dtype == np.dtype('float64')
    for i in range(qarr.shape[0]):
        for j in range(qarr.shape[1]):
            assert arr[i, j] == (qarr[i, j] + 0.5) / 10 - 1

    arr = mmcv.dequantize(qarr, -1, 1, levels, dtype=np.float32)
    assert arr.shape == qarr.shape
    assert arr.dtype == np.dtype('float32')

    with pytest.raises(ValueError):
        mmcv.dequantize(arr, -1, 1, levels=0)
    with pytest.raises(ValueError):
        mmcv.dequantize(arr, -1, 1, levels=10.0)
    with pytest.raises(ValueError):
        mmcv.dequantize(arr, 2, 1, levels)


def test_joint():
    arr = np.random.randn(100, 100)
    levels = 1000
    qarr = mmcv.quantize(arr, -1, 1, levels)
    recover = mmcv.dequantize(qarr, -1, 1, levels)
    assert np.abs(recover[arr < -1] + 0.999).max() < 1e-6
    assert np.abs(recover[arr > 1] - 0.999).max() < 1e-6
    assert np.abs((recover - arr)[(arr >= -1) & (arr <= 1)]).max() <= 1e-3

    arr = np.clip(np.random.randn(100) / 1000, -0.01, 0.01)
    levels = 99
    qarr = mmcv.quantize(arr, -1, 1, levels)
    recover = mmcv.dequantize(qarr, -1, 1, levels)
    assert np.all(recover == 0)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_build_layers.py
================================================
import numpy as np
import pytest
import torch
import torch.nn as nn

from mmcv.cnn.bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS,
                             PADDING_LAYERS, PLUGIN_LAYERS,
                             build_activation_layer, build_conv_layer,
                             build_norm_layer, build_padding_layer,
                             build_plugin_layer, build_upsample_layer, is_norm)
from mmcv.cnn.bricks.norm import infer_abbr as infer_norm_abbr
from mmcv.cnn.bricks.plugin import infer_abbr as infer_plugin_abbr
from mmcv.cnn.bricks.upsample import PixelShufflePack
from mmcv.utils.parrots_wrapper import _BatchNorm


def test_build_conv_layer():
    with pytest.raises(TypeError):
        # cfg must be a dict
        cfg = 'Conv2d'
        build_conv_layer(cfg)

    with pytest.raises(KeyError):
        # `type` must be in cfg
        cfg = dict(kernel_size=3)
        build_conv_layer(cfg)

    with pytest.raises(KeyError):
        # unsupported conv type
        cfg = dict(type='FancyConv')
        build_conv_layer(cfg)

    kwargs = dict(
        in_channels=4, out_channels=8, kernel_size=3, groups=2, dilation=2)
    cfg = None
    layer = build_conv_layer(cfg, **kwargs)
    assert isinstance(layer, nn.Conv2d)
    assert layer.in_channels == kwargs['in_channels']
    assert layer.out_channels == kwargs['out_channels']
    assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size'])
    assert layer.groups == kwargs['groups']
    assert layer.dilation == (kwargs['dilation'], kwargs['dilation'])

    cfg = dict(type='Conv')
    layer = build_conv_layer(cfg, **kwargs)
    assert isinstance(layer, nn.Conv2d)
    assert layer.in_channels == kwargs['in_channels']
    assert layer.out_channels == kwargs['out_channels']
    assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size'])
    assert layer.groups == kwargs['groups']
    assert layer.dilation == (kwargs['dilation'], kwargs['dilation'])

    cfg = dict(type='deconv')
    layer = build_conv_layer(cfg, **kwargs)
    assert isinstance(layer, nn.ConvTranspose2d)
    assert layer.in_channels == kwargs['in_channels']
    assert layer.out_channels == kwargs['out_channels']
    assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size'])
    assert layer.groups == kwargs['groups']
    assert layer.dilation == (kwargs['dilation'], kwargs['dilation'])

    for type_name, module in CONV_LAYERS.module_dict.items():
        cfg = dict(type=type_name)
        layer = build_conv_layer(cfg, **kwargs)
        assert isinstance(layer, module)
        assert layer.in_channels == kwargs['in_channels']
        assert layer.out_channels == kwargs['out_channels']


def test_infer_norm_abbr():
    with pytest.raises(TypeError):
        # class_type must be a class
        infer_norm_abbr(0)

    class MyNorm:

        _abbr_ = 'mn'

    assert infer_norm_abbr(MyNorm) == 'mn'

    class FancyBatchNorm:
        pass

    assert infer_norm_abbr(FancyBatchNorm) == 'bn'

    class FancyInstanceNorm:
        pass

    assert infer_norm_abbr(FancyInstanceNorm) == 'in'

    class FancyLayerNorm:
        pass

    assert infer_norm_abbr(FancyLayerNorm) == 'ln'

    class FancyGroupNorm:
        pass

    assert infer_norm_abbr(FancyGroupNorm) == 'gn'

    class FancyNorm:
        pass

    assert infer_norm_abbr(FancyNorm) == 'norm_layer'


def test_build_norm_layer():
    with pytest.raises(TypeError):
        # cfg must be a dict
        cfg = 'BN'
        build_norm_layer(cfg, 3)

    with pytest.raises(KeyError):
        # `type` must be in cfg
        cfg = dict()
        build_norm_layer(cfg, 3)

    with pytest.raises(KeyError):
        # unsupported norm type
        cfg = dict(type='FancyNorm')
        build_norm_layer(cfg, 3)

    with pytest.raises(AssertionError):
        # postfix must be int or str
        cfg = dict(type='BN')
        build_norm_layer(cfg, 3, postfix=[1, 2])

    with pytest.raises(AssertionError):
        # `num_groups` must be in cfg when using 'GN'
        cfg = dict(type='GN')
        build_norm_layer(cfg, 3)

    # test each type of norm layer in norm_cfg
    abbr_mapping = {
        'BN': 'bn',
        'BN1d': 'bn',
        'BN2d': 'bn',
        'BN3d': 'bn',
        'SyncBN': 'bn',
        'GN': 'gn',
        'LN': 'ln',
        'IN': 'in',
        'IN1d': 'in',
        'IN2d': 'in',
        'IN3d': 'in',
    }
    for type_name, module in NORM_LAYERS.module_dict.items():
        if type_name == 'MMSyncBN':  # skip MMSyncBN
            continue
        for postfix in ['_test', 1]:
            cfg = dict(type=type_name)
            if type_name == 'GN':
                cfg['num_groups'] = 2
            name, layer = build_norm_layer(cfg, 3, postfix=postfix)
            assert name == abbr_mapping[type_name] + str(postfix)
            assert isinstance(layer, module)
            if type_name == 'GN':
                assert layer.num_channels == 3
                assert layer.num_groups == cfg['num_groups']
            elif type_name != 'LN':
                assert layer.num_features == 3


def test_build_activation_layer():
    with pytest.raises(TypeError):
        # cfg must be a dict
        cfg = 'ReLU'
        build_activation_layer(cfg)

    with pytest.raises(KeyError):
        # `type` must be in cfg
        cfg = dict()
        build_activation_layer(cfg)

    with pytest.raises(KeyError):
        # unsupported activation type
        cfg = dict(type='FancyReLU')
        build_activation_layer(cfg)

    # test each type of activation layer in activation_cfg
    for type_name, module in ACTIVATION_LAYERS.module_dict.items():
        cfg['type'] = type_name
        layer = build_activation_layer(cfg)
        assert isinstance(layer, module)

    # sanity check for Clamp
    act = build_activation_layer(dict(type='Clamp'))
    x = torch.randn(10) * 1000
    y = act(x)
    assert np.logical_and((y >= -1).numpy(), (y <= 1).numpy()).all()
    act = build_activation_layer(dict(type='Clip', min=0))
    y = act(x)
    assert np.logical_and((y >= 0).numpy(), (y <= 1).numpy()).all()
    act = build_activation_layer(dict(type='Clamp', max=0))
    y = act(x)
    assert np.logical_and((y >= -1).numpy(), (y <= 0).numpy()).all()


def test_build_padding_layer():
    with pytest.raises(TypeError):
        # cfg must be a dict
        cfg = 'reflect'
        build_padding_layer(cfg)

    with pytest.raises(KeyError):
        # `type` must be in cfg
        cfg = dict()
        build_padding_layer(cfg)

    with pytest.raises(KeyError):
        # unsupported activation type
        cfg = dict(type='FancyPad')
        build_padding_layer(cfg)

    for type_name, module in PADDING_LAYERS.module_dict.items():
        cfg['type'] = type_name
        layer = build_padding_layer(cfg, 2)
        assert isinstance(layer, module)

    input_x = torch.randn(1, 2, 5, 5)
    cfg = dict(type='reflect')
    padding_layer = build_padding_layer(cfg, 2)
    res = padding_layer(input_x)
    assert res.shape == (1, 2, 9, 9)


def test_upsample_layer():
    with pytest.raises(TypeError):
        # cfg must be a dict
        cfg = 'bilinear'
        build_upsample_layer(cfg)

    with pytest.raises(KeyError):
        # `type` must be in cfg
        cfg = dict()
        build_upsample_layer(cfg)

    with pytest.raises(KeyError):
        # unsupported activation type
        cfg = dict(type='FancyUpsample')
        build_upsample_layer(cfg)

    for type_name in ['nearest', 'bilinear']:
        cfg['type'] = type_name
        layer = build_upsample_layer(cfg)
        assert isinstance(layer, nn.Upsample)
        assert layer.mode == type_name

    cfg = dict(
        type='deconv', in_channels=3, out_channels=3, kernel_size=3, stride=2)
    layer = build_upsample_layer(cfg)
    assert isinstance(layer, nn.ConvTranspose2d)

    cfg = dict(type='deconv')
    kwargs = dict(in_channels=3, out_channels=3, kernel_size=3, stride=2)
    layer = build_upsample_layer(cfg, **kwargs)
    assert isinstance(layer, nn.ConvTranspose2d)
    assert layer.in_channels == kwargs['in_channels']
    assert layer.out_channels == kwargs['out_channels']
    assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size'])
    assert layer.stride == (kwargs['stride'], kwargs['stride'])

    layer = build_upsample_layer(cfg, 3, 3, 3, 2)
    assert isinstance(layer, nn.ConvTranspose2d)
    assert layer.in_channels == kwargs['in_channels']
    assert layer.out_channels == kwargs['out_channels']
    assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size'])
    assert layer.stride == (kwargs['stride'], kwargs['stride'])

    cfg = dict(
        type='pixel_shuffle',
        in_channels=3,
        out_channels=3,
        scale_factor=2,
        upsample_kernel=3)
    layer = build_upsample_layer(cfg)

    assert isinstance(layer, PixelShufflePack)
    assert layer.scale_factor == 2
    assert layer.upsample_kernel == 3


def test_pixel_shuffle_pack():
    x_in = torch.rand(2, 3, 10, 10)
    pixel_shuffle = PixelShufflePack(3, 3, scale_factor=2, upsample_kernel=3)
    assert pixel_shuffle.upsample_conv.kernel_size == (3, 3)
    x_out = pixel_shuffle(x_in)
    assert x_out.shape == (2, 3, 20, 20)


def test_is_norm():
    norm_set1 = [
        nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.InstanceNorm1d,
        nn.InstanceNorm2d, nn.InstanceNorm3d, nn.LayerNorm
    ]
    norm_set2 = [nn.GroupNorm]
    for norm_type in norm_set1:
        layer = norm_type(3)
        assert is_norm(layer)
        assert not is_norm(layer, exclude=(norm_type, ))
    for norm_type in norm_set2:
        layer = norm_type(3, 6)
        assert is_norm(layer)
        assert not is_norm(layer, exclude=(norm_type, ))

    class MyNorm(nn.BatchNorm2d):
        pass

    layer = MyNorm(3)
    assert is_norm(layer)
    assert not is_norm(layer, exclude=_BatchNorm)
    assert not is_norm(layer, exclude=(_BatchNorm, ))

    layer = nn.Conv2d(3, 8, 1)
    assert not is_norm(layer)

    with pytest.raises(TypeError):
        layer = nn.BatchNorm1d(3)
        is_norm(layer, exclude='BN')

    with pytest.raises(TypeError):
        layer = nn.BatchNorm1d(3)
        is_norm(layer, exclude=('BN', ))


def test_infer_plugin_abbr():
    with pytest.raises(TypeError):
        # class_type must be a class
        infer_plugin_abbr(0)

    class MyPlugin:

        _abbr_ = 'mp'

    assert infer_plugin_abbr(MyPlugin) == 'mp'

    class FancyPlugin:
        pass

    assert infer_plugin_abbr(FancyPlugin) == 'fancy_plugin'


def test_build_plugin_layer():
    with pytest.raises(TypeError):
        # cfg must be a dict
        cfg = 'Plugin'
        build_plugin_layer(cfg)

    with pytest.raises(KeyError):
        # `type` must be in cfg
        cfg = dict()
        build_plugin_layer(cfg)

    with pytest.raises(KeyError):
        # unsupported plugin type
        cfg = dict(type='FancyPlugin')
        build_plugin_layer(cfg)

    with pytest.raises(AssertionError):
        # postfix must be int or str
        cfg = dict(type='ConvModule')
        build_plugin_layer(cfg, postfix=[1, 2])

    # test ContextBlock
    for postfix in ['', '_test', 1]:
        cfg = dict(type='ContextBlock')
        name, layer = build_plugin_layer(
            cfg, postfix=postfix, in_channels=16, ratio=1. / 4)
        assert name == 'context_block' + str(postfix)
        assert isinstance(layer, PLUGIN_LAYERS.module_dict['ContextBlock'])

    # test GeneralizedAttention
    for postfix in ['', '_test', 1]:
        cfg = dict(type='GeneralizedAttention')
        name, layer = build_plugin_layer(cfg, postfix=postfix, in_channels=16)
        assert name == 'gen_attention_block' + str(postfix)
        assert isinstance(layer,
                          PLUGIN_LAYERS.module_dict['GeneralizedAttention'])

    # test NonLocal2d
    for postfix in ['', '_test', 1]:
        cfg = dict(type='NonLocal2d')
        name, layer = build_plugin_layer(cfg, postfix=postfix, in_channels=16)
        assert name == 'nonlocal_block' + str(postfix)
        assert isinstance(layer, PLUGIN_LAYERS.module_dict['NonLocal2d'])

    # test ConvModule
    for postfix in ['', '_test', 1]:
        cfg = dict(type='ConvModule')
        name, layer = build_plugin_layer(
            cfg,
            postfix=postfix,
            in_channels=16,
            out_channels=4,
            kernel_size=3)
        assert name == 'conv_block' + str(postfix)
        assert isinstance(layer, PLUGIN_LAYERS.module_dict['ConvModule'])


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_context_block.py
================================================
import pytest
import torch

from mmcv.cnn.bricks import ContextBlock


def test_context_block():
    with pytest.raises(AssertionError):
        # pooling_type should be in ['att', 'avg']
        ContextBlock(16, 1. / 4, pooling_type='unsupport_type')

    with pytest.raises(AssertionError):
        # fusion_types should be of type list or tuple
        ContextBlock(16, 1. / 4, fusion_types='unsupport_type')

    with pytest.raises(AssertionError):
        # fusion_types should be in ['channel_add', 'channel_mul']
        ContextBlock(16, 1. / 4, fusion_types=('unsupport_type', ))

    # test pooling_type='att'
    imgs = torch.randn(2, 16, 20, 20)
    context_block = ContextBlock(16, 1. / 4, pooling_type='att')
    out = context_block(imgs)
    assert context_block.conv_mask.in_channels == 16
    assert context_block.conv_mask.out_channels == 1
    assert out.shape == imgs.shape

    # test pooling_type='avg'
    imgs = torch.randn(2, 16, 20, 20)
    context_block = ContextBlock(16, 1. / 4, pooling_type='avg')
    out = context_block(imgs)
    assert hasattr(context_block, 'avg_pool')
    assert out.shape == imgs.shape

    # test fusion_types=('channel_add',)
    imgs = torch.randn(2, 16, 20, 20)
    context_block = ContextBlock(16, 1. / 4, fusion_types=('channel_add', ))
    out = context_block(imgs)
    assert context_block.channel_add_conv is not None
    assert context_block.channel_mul_conv is None
    assert out.shape == imgs.shape

    # test fusion_types=('channel_mul',)
    imgs = torch.randn(2, 16, 20, 20)
    context_block = ContextBlock(16, 1. / 4, fusion_types=('channel_mul', ))
    out = context_block(imgs)
    assert context_block.channel_add_conv is None
    assert context_block.channel_mul_conv is not None
    assert out.shape == imgs.shape

    # test fusion_types=('channel_add', 'channel_mul')
    imgs = torch.randn(2, 16, 20, 20)
    context_block = ContextBlock(
        16, 1. / 4, fusion_types=('channel_add', 'channel_mul'))
    out = context_block(imgs)
    assert context_block.channel_add_conv is not None
    assert context_block.channel_mul_conv is not None
    assert out.shape == imgs.shape


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_conv2d_adaptive_padding.py
================================================
import torch

from mmcv.cnn.bricks import Conv2dAdaptivePadding


def test_conv2d_samepadding():
    # test Conv2dAdaptivePadding with stride=1
    inputs = torch.rand((1, 3, 28, 28))
    conv = Conv2dAdaptivePadding(3, 3, kernel_size=3, stride=1)
    output = conv(inputs)
    assert output.shape == inputs.shape

    inputs = torch.rand((1, 3, 13, 13))
    conv = Conv2dAdaptivePadding(3, 3, kernel_size=3, stride=1)
    output = conv(inputs)
    assert output.shape == inputs.shape

    # test Conv2dAdaptivePadding with stride=2
    inputs = torch.rand((1, 3, 28, 28))
    conv = Conv2dAdaptivePadding(3, 3, kernel_size=3, stride=2)
    output = conv(inputs)
    assert output.shape == torch.Size([1, 3, 14, 14])

    inputs = torch.rand((1, 3, 13, 13))
    conv = Conv2dAdaptivePadding(3, 3, kernel_size=3, stride=2)
    output = conv(inputs)
    assert output.shape == torch.Size([1, 3, 7, 7])


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_conv_module.py
================================================
import warnings
from unittest.mock import patch

import pytest
import torch
import torch.nn as nn

from mmcv.cnn.bricks import CONV_LAYERS, ConvModule, HSigmoid, HSwish


@CONV_LAYERS.register_module()
class ExampleConv(nn.Module):

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 norm_cfg=None):
        super(ExampleConv, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.groups = groups
        self.bias = bias
        self.norm_cfg = norm_cfg
        self.output_padding = (0, 0, 0)
        self.transposed = False

        self.conv0 = nn.Conv2d(in_channels, out_channels, kernel_size)
        self.init_weights()

    def forward(self, x):
        x = self.conv0(x)
        return x

    def init_weights(self):
        nn.init.constant_(self.conv0.weight, 0)


def test_conv_module():
    with pytest.raises(AssertionError):
        # conv_cfg must be a dict or None
        conv_cfg = 'conv'
        ConvModule(3, 8, 2, conv_cfg=conv_cfg)

    with pytest.raises(AssertionError):
        # norm_cfg must be a dict or None
        norm_cfg = 'norm'
        ConvModule(3, 8, 2, norm_cfg=norm_cfg)

    with pytest.raises(KeyError):
        # softmax is not supported
        act_cfg = dict(type='softmax')
        ConvModule(3, 8, 2, act_cfg=act_cfg)

    # conv + norm + act
    conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
    assert conv.with_activation
    assert hasattr(conv, 'activate')
    assert conv.with_norm
    assert hasattr(conv, 'norm')
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    # conv + act
    conv = ConvModule(3, 8, 2)
    assert conv.with_activation
    assert hasattr(conv, 'activate')
    assert not conv.with_norm
    assert conv.norm is None
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    # conv
    conv = ConvModule(3, 8, 2, act_cfg=None)
    assert not conv.with_norm
    assert conv.norm is None
    assert not conv.with_activation
    assert not hasattr(conv, 'activate')
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    # conv with its own `init_weights` method
    conv_module = ConvModule(
        3, 8, 2, conv_cfg=dict(type='ExampleConv'), act_cfg=None)
    assert torch.equal(conv_module.conv.conv0.weight, torch.zeros(8, 3, 2, 2))

    # with_spectral_norm=True
    conv = ConvModule(3, 8, 3, padding=1, with_spectral_norm=True)
    assert hasattr(conv.conv, 'weight_orig')
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # padding_mode='reflect'
    conv = ConvModule(3, 8, 3, padding=1, padding_mode='reflect')
    assert isinstance(conv.padding_layer, nn.ReflectionPad2d)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # non-existing padding mode
    with pytest.raises(KeyError):
        conv = ConvModule(3, 8, 3, padding=1, padding_mode='non_exists')

    # leaky relu
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU'))
    assert isinstance(conv.activate, nn.LeakyReLU)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # tanh
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='Tanh'))
    assert isinstance(conv.activate, nn.Tanh)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # Sigmoid
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='Sigmoid'))
    assert isinstance(conv.activate, nn.Sigmoid)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # PReLU
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='PReLU'))
    assert isinstance(conv.activate, nn.PReLU)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # HSwish
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='HSwish'))
    assert isinstance(conv.activate, HSwish)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # HSigmoid
    conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='HSigmoid'))
    assert isinstance(conv.activate, HSigmoid)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)


def test_bias():
    # bias: auto, without norm
    conv = ConvModule(3, 8, 2)
    assert conv.conv.bias is not None

    # bias: auto, with norm
    conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
    assert conv.conv.bias is None

    # bias: False, without norm
    conv = ConvModule(3, 8, 2, bias=False)
    assert conv.conv.bias is None

    # bias: True, with batch norm
    with pytest.warns(UserWarning) as record:
        ConvModule(3, 8, 2, bias=True, norm_cfg=dict(type='BN'))
    assert len(record) == 1
    assert record[0].message.args[
        0] == 'Unnecessary conv bias before batch/instance norm'

    # bias: True, with instance norm
    with pytest.warns(UserWarning) as record:
        ConvModule(3, 8, 2, bias=True, norm_cfg=dict(type='IN'))
    assert len(record) == 1
    assert record[0].message.args[
        0] == 'Unnecessary conv bias before batch/instance norm'

    # bias: True, with other norm
    with pytest.warns(UserWarning) as record:
        norm_cfg = dict(type='GN', num_groups=1)
        ConvModule(3, 8, 2, bias=True, norm_cfg=norm_cfg)
        warnings.warn('No warnings')
    assert len(record) == 1
    assert record[0].message.args[0] == 'No warnings'


def conv_forward(self, x):
    return x + '_conv'


def bn_forward(self, x):
    return x + '_bn'


def relu_forward(self, x):
    return x + '_relu'


@patch('torch.nn.ReLU.forward', relu_forward)
@patch('torch.nn.BatchNorm2d.forward', bn_forward)
@patch('torch.nn.Conv2d.forward', conv_forward)
def test_order():

    with pytest.raises(AssertionError):
        # order must be a tuple
        order = ['conv', 'norm', 'act']
        ConvModule(3, 8, 2, order=order)

    with pytest.raises(AssertionError):
        # length of order must be 3
        order = ('conv', 'norm')
        ConvModule(3, 8, 2, order=order)

    with pytest.raises(AssertionError):
        # order must be an order of 'conv', 'norm', 'act'
        order = ('conv', 'norm', 'norm')
        ConvModule(3, 8, 2, order=order)

    with pytest.raises(AssertionError):
        # order must be an order of 'conv', 'norm', 'act'
        order = ('conv', 'norm', 'something')
        ConvModule(3, 8, 2, order=order)

    # ('conv', 'norm', 'act')
    conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
    out = conv('input')
    assert out == 'input_conv_bn_relu'

    # ('norm', 'conv', 'act')
    conv = ConvModule(
        3, 8, 2, norm_cfg=dict(type='BN'), order=('norm', 'conv', 'act'))
    out = conv('input')
    assert out == 'input_bn_conv_relu'

    # ('conv', 'norm', 'act'), activate=False
    conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
    out = conv('input', activate=False)
    assert out == 'input_conv_bn'

    # ('conv', 'norm', 'act'), activate=False
    conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
    out = conv('input', norm=False)
    assert out == 'input_conv_relu'


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_depthwise_seperable_conv_module.py
================================================
import pytest
import torch
import torch.nn as nn

from mmcv.cnn.bricks import DepthwiseSeparableConvModule


def test_depthwise_separable_conv():
    with pytest.raises(AssertionError):
        # conv_cfg must be a dict or None
        DepthwiseSeparableConvModule(4, 8, 2, groups=2)

    # test default config
    conv = DepthwiseSeparableConvModule(3, 8, 2)
    assert conv.depthwise_conv.conv.groups == 3
    assert conv.pointwise_conv.conv.kernel_size == (1, 1)
    assert not conv.depthwise_conv.with_norm
    assert not conv.pointwise_conv.with_norm
    assert conv.depthwise_conv.activate.__class__.__name__ == 'ReLU'
    assert conv.pointwise_conv.activate.__class__.__name__ == 'ReLU'
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    # test dw_norm_cfg
    conv = DepthwiseSeparableConvModule(3, 8, 2, dw_norm_cfg=dict(type='BN'))
    assert conv.depthwise_conv.norm_name == 'bn'
    assert not conv.pointwise_conv.with_norm
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    # test pw_norm_cfg
    conv = DepthwiseSeparableConvModule(3, 8, 2, pw_norm_cfg=dict(type='BN'))
    assert not conv.depthwise_conv.with_norm
    assert conv.pointwise_conv.norm_name == 'bn'
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    # test norm_cfg
    conv = DepthwiseSeparableConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
    assert conv.depthwise_conv.norm_name == 'bn'
    assert conv.pointwise_conv.norm_name == 'bn'
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    # add test for ['norm', 'conv', 'act']
    conv = DepthwiseSeparableConvModule(3, 8, 2, order=('norm', 'conv', 'act'))
    x = torch.rand(1, 3, 256, 256)
    output = conv(x)
    assert output.shape == (1, 8, 255, 255)

    conv = DepthwiseSeparableConvModule(
        3, 8, 3, padding=1, with_spectral_norm=True)
    assert hasattr(conv.depthwise_conv.conv, 'weight_orig')
    assert hasattr(conv.pointwise_conv.conv, 'weight_orig')
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    conv = DepthwiseSeparableConvModule(
        3, 8, 3, padding=1, padding_mode='reflect')
    assert isinstance(conv.depthwise_conv.padding_layer, nn.ReflectionPad2d)
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # test dw_act_cfg
    conv = DepthwiseSeparableConvModule(
        3, 8, 3, padding=1, dw_act_cfg=dict(type='LeakyReLU'))
    assert conv.depthwise_conv.activate.__class__.__name__ == 'LeakyReLU'
    assert conv.pointwise_conv.activate.__class__.__name__ == 'ReLU'
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # test pw_act_cfg
    conv = DepthwiseSeparableConvModule(
        3, 8, 3, padding=1, pw_act_cfg=dict(type='LeakyReLU'))
    assert conv.depthwise_conv.activate.__class__.__name__ == 'ReLU'
    assert conv.pointwise_conv.activate.__class__.__name__ == 'LeakyReLU'
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)

    # test act_cfg
    conv = DepthwiseSeparableConvModule(
        3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU'))
    assert conv.depthwise_conv.activate.__class__.__name__ == 'LeakyReLU'
    assert conv.pointwise_conv.activate.__class__.__name__ == 'LeakyReLU'
    output = conv(x)
    assert output.shape == (1, 8, 256, 256)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_flops_counter.py
================================================
import pytest
import torch
import torch.nn as nn

from mmcv.cnn import get_model_complexity_info
from mmcv.cnn.utils.flops_counter import flops_to_string, params_to_string

try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO

# yapf: disable
gt_results = [
    {'model': nn.Conv1d(3, 8, 3), 'input': (3, 16), 'flops': 1120.0, 'params': 80.0},  # noqa: E501
    {'model': nn.Conv2d(3, 8, 3), 'input': (3, 16, 16), 'flops': 43904.0, 'params': 224.0},  # noqa: E501
    {'model': nn.Conv3d(3, 8, 3), 'input': (3, 3, 16, 16), 'flops': 128576.0, 'params': 656.0},  # noqa: E501
    {'model': nn.ReLU(), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0},  # noqa: E501
    {'model': nn.PReLU(), 'input': (3, 16, 16), 'flops': 768.0, 'params': 1},  # noqa: E501
    {'model': nn.ELU(), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0},  # noqa: E501
    {'model': nn.LeakyReLU(), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0},  # noqa: E501
    {'model': nn.ReLU6(), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0},  # noqa: E501
    {'model': nn.MaxPool1d(2), 'input': (3, 16), 'flops': 48.0, 'params': 0},  # noqa: E501
    {'model': nn.MaxPool2d(2), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0},  # noqa: E501
    {'model': nn.MaxPool3d(2), 'input': (3, 3, 16, 16), 'flops': 2304.0, 'params': 0},  # noqa: E501
    {'model': nn.AvgPool1d(2), 'input': (3, 16), 'flops': 48.0, 'params': 0},  # noqa: E501
    {'model': nn.AvgPool2d(2), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0},  # noqa: E501
    {'model': nn.AvgPool3d(2), 'input': (3, 3, 16, 16), 'flops': 2304.0, 'params': 0},  # noqa: E501
    {'model': nn.AdaptiveMaxPool1d(2), 'input': (3, 16), 'flops': 48.0, 'params': 0},  # noqa: E501
    {'model': nn.AdaptiveMaxPool2d(2), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0},  # noqa: E501
    {'model': nn.AdaptiveMaxPool3d(2), 'input': (3, 3, 16, 16), 'flops': 2304.0, 'params': 0},  # noqa: E501
    {'model': nn.AdaptiveAvgPool1d(2), 'input': (3, 16), 'flops': 48.0, 'params': 0},  # noqa: E501
    {'model': nn.AdaptiveAvgPool2d(2), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0},  # noqa: E501
    {'model': nn.AdaptiveAvgPool3d(2), 'input': (3, 3, 16, 16), 'flops': 2304.0, 'params': 0},  # noqa: E501
    {'model': nn.BatchNorm1d(3), 'input': (3, 16), 'flops': 96.0, 'params': 6.0},  # noqa: E501
    {'model': nn.BatchNorm2d(3), 'input': (3, 16, 16), 'flops': 1536.0, 'params': 6.0},  # noqa: E501
    {'model': nn.BatchNorm3d(3), 'input': (3, 3, 16, 16), 'flops': 4608.0, 'params': 6.0},  # noqa: E501
    {'model': nn.GroupNorm(2, 6), 'input': (6, 16, 16), 'flops': 3072.0, 'params': 12.0},  # noqa: E501
    {'model': nn.InstanceNorm1d(3, affine=True), 'input': (3, 16), 'flops': 96.0, 'params': 6.0},  # noqa: E501
    {'model': nn.InstanceNorm2d(3, affine=True), 'input': (3, 16, 16), 'flops': 1536.0, 'params': 6.0},  # noqa: E501
    {'model': nn.InstanceNorm3d(3, affine=True), 'input': (3, 3, 16, 16), 'flops': 4608.0, 'params': 6.0},  # noqa: E501
    {'model': nn.LayerNorm((3, 16, 16)), 'input': (3, 16, 16), 'flops': 1536.0, 'params': 1536.0},  # noqa: E501
    {'model': nn.LayerNorm((3, 16, 16), elementwise_affine=False), 'input': (3, 16, 16), 'flops': 768.0, 'params': 0},  # noqa: E501
    {'model': nn.Linear(1024, 2), 'input': (1024, ), 'flops': 2048.0, 'params': 2050.0},  # noqa: E501
    {'model': nn.ConvTranspose2d(3, 8, 3), 'input': (3, 16, 16), 'flops': 57888, 'params': 224.0},  # noqa: E501
    {'model': nn.Upsample((32, 32)), 'input': (3, 16, 16), 'flops': 3072.0, 'params': 0}  # noqa: E501
]
# yapf: enable


class ExampleModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv2d = nn.Conv2d(3, 8, 3)

    def forward(self, imgs):
        x = torch.randn((1, *imgs))
        return self.conv2d(x)


def input_constructor(x):
    return dict(imgs=x)


def test_flops_counter():
    with pytest.raises(AssertionError):
        # input_res should be a tuple
        model = nn.Conv2d(3, 8, 3)
        input_res = [1, 3, 16, 16]
        get_model_complexity_info(model, input_res)

    with pytest.raises(AssertionError):
        # len(input_res) >= 2
        model = nn.Conv2d(3, 8, 3)
        input_res = tuple()
        get_model_complexity_info(model, input_res)

    # test common layers
    for item in gt_results:
        model = item['model']
        input = item['input']
        flops, params = get_model_complexity_info(
            model, input, as_strings=False, print_per_layer_stat=False)
        assert flops == item['flops'] and params == item['params']

    # test input constructor
    model = ExampleModel()
    x = (3, 16, 16)
    flops, params = get_model_complexity_info(
        model,
        x,
        as_strings=False,
        print_per_layer_stat=False,
        input_constructor=input_constructor)
    assert flops == 43904.0 and params == 224.0

    # test output string
    model = nn.Conv3d(3, 8, 3)
    x = (3, 3, 512, 512)
    flops, params = get_model_complexity_info(
        model, x, print_per_layer_stat=False)
    assert flops == '0.17 GFLOPs' and params == str(656)

    # test print per layer status
    model = nn.Conv1d(3, 8, 3)
    x = (3, 16)
    out = StringIO()
    get_model_complexity_info(model, x, ost=out)
    assert out.getvalue() == \
        'Conv1d(0.0 M, 100.000% Params, 0.0 GFLOPs, 100.000% FLOPs, 3, 8, kernel_size=(3,), stride=(1,))\n'  # noqa: E501

    # test when model is not a common instance
    model = nn.Sequential(nn.Conv2d(3, 8, 3), nn.Flatten(), nn.Linear(1568, 2))
    x = (3, 16, 16)
    flops, params = get_model_complexity_info(
        model, x, as_strings=False, print_per_layer_stat=True)
    assert flops == 47040.0 and params == 3362


def test_flops_to_string():
    flops = 6.54321 * 10.**9
    assert flops_to_string(flops) == '6.54 GFLOPs'
    assert flops_to_string(flops, 'MFLOPs') == '6543.21 MFLOPs'
    assert flops_to_string(flops, 'KFLOPs') == '6543210.0 KFLOPs'
    assert flops_to_string(flops, 'FLOPs') == '6543210000.0 FLOPs'
    assert flops_to_string(flops, precision=4) == '6.5432 GFLOPs'

    flops = 6.54321 * 10.**9
    assert flops_to_string(flops, None) == '6.54 GFLOPs'
    flops = 3.21 * 10.**7
    assert flops_to_string(flops, None) == '32.1 MFLOPs'
    flops = 5.4 * 10.**3
    assert flops_to_string(flops, None) == '5.4 KFLOPs'
    flops = 987
    assert flops_to_string(flops, None) == '987 FLOPs'


def test_params_to_string():
    num_params = 3.21 * 10.**7
    assert params_to_string(num_params) == '32.1 M'
    num_params = 4.56 * 10.**5
    assert params_to_string(num_params) == '456.0 k'
    num_params = 7.89 * 10.**2
    assert params_to_string(num_params) == '789.0'

    num_params = 6.54321 * 10.**7
    assert params_to_string(num_params, 'M') == '65.43 M'
    assert params_to_string(num_params, 'K') == '65432.1 K'
    assert params_to_string(num_params, '') == '65432100.0'
    assert params_to_string(num_params, precision=4) == '65.4321 M'


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_fuse_conv_bn.py
================================================
import torch
import torch.nn as nn

from mmcv.cnn import ConvModule, fuse_conv_bn


def test_fuse_conv_bn():
    inputs = torch.rand((1, 3, 5, 5))
    modules = nn.ModuleList()
    modules.append(nn.BatchNorm2d(3))
    modules.append(ConvModule(3, 5, 3, norm_cfg=dict(type='BN')))
    modules.append(ConvModule(5, 5, 3, norm_cfg=dict(type='BN')))
    modules = nn.Sequential(*modules)
    fused_modules = fuse_conv_bn(modules)
    assert torch.equal(modules(inputs), fused_modules(inputs))


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_generalized_attention.py
================================================
import torch

from mmcv.cnn.bricks import GeneralizedAttention


def test_context_block():

    # test attention_type='1000'
    imgs = torch.randn(2, 16, 20, 20)
    gen_attention_block = GeneralizedAttention(16, attention_type='1000')
    assert gen_attention_block.query_conv.in_channels == 16
    assert gen_attention_block.key_conv.in_channels == 16
    assert gen_attention_block.key_conv.in_channels == 16
    out = gen_attention_block(imgs)
    assert out.shape == imgs.shape

    # test attention_type='0100'
    imgs = torch.randn(2, 16, 20, 20)
    gen_attention_block = GeneralizedAttention(16, attention_type='0100')
    assert gen_attention_block.query_conv.in_channels == 16
    assert gen_attention_block.appr_geom_fc_x.in_features == 8
    assert gen_attention_block.appr_geom_fc_y.in_features == 8
    out = gen_attention_block(imgs)
    assert out.shape == imgs.shape

    # test attention_type='0010'
    imgs = torch.randn(2, 16, 20, 20)
    gen_attention_block = GeneralizedAttention(16, attention_type='0010')
    assert gen_attention_block.key_conv.in_channels == 16
    assert hasattr(gen_attention_block, 'appr_bias')
    out = gen_attention_block(imgs)
    assert out.shape == imgs.shape

    # test attention_type='0001'
    imgs = torch.randn(2, 16, 20, 20)
    gen_attention_block = GeneralizedAttention(16, attention_type='0001')
    assert gen_attention_block.appr_geom_fc_x.in_features == 8
    assert gen_attention_block.appr_geom_fc_y.in_features == 8
    assert hasattr(gen_attention_block, 'geom_bias')
    out = gen_attention_block(imgs)
    assert out.shape == imgs.shape

    # test spatial_range >= 0
    imgs = torch.randn(2, 256, 20, 20)
    gen_attention_block = GeneralizedAttention(256, spatial_range=10)
    assert hasattr(gen_attention_block, 'local_constraint_map')
    out = gen_attention_block(imgs)
    assert out.shape == imgs.shape

    # test q_stride > 1
    imgs = torch.randn(2, 16, 20, 20)
    gen_attention_block = GeneralizedAttention(16, q_stride=2)
    assert gen_attention_block.q_downsample is not None
    out = gen_attention_block(imgs)
    assert out.shape == imgs.shape

    # test kv_stride > 1
    imgs = torch.randn(2, 16, 20, 20)
    gen_attention_block = GeneralizedAttention(16, kv_stride=2)
    assert gen_attention_block.kv_downsample is not None
    out = gen_attention_block(imgs)
    assert out.shape == imgs.shape

    # test fp16 with attention_type='1111'
    if torch.cuda.is_available():
        imgs = torch.randn(2, 16, 20, 20).cuda().to(torch.half)
        gen_attention_block = GeneralizedAttention(
            16,
            spatial_range=-1,
            num_heads=8,
            attention_type='1111',
            kv_stride=2)
        gen_attention_block.cuda().type(torch.half)
        out = gen_attention_block(imgs)
        assert out.shape == imgs.shape


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_hsigmoid.py
================================================
import pytest
import torch

from mmcv.cnn.bricks import HSigmoid


def test_hsigmoid():
    # test assertion divisor can not be zero
    with pytest.raises(AssertionError):
        HSigmoid(divisor=0)

    # test with default parameters
    act = HSigmoid()
    input_shape = torch.Size([1, 3, 64, 64])
    input = torch.randn(input_shape)
    output = act(input)
    expected_output = torch.min(
        torch.max((input + 3) / 6, torch.zeros(input_shape)),
        torch.ones(input_shape))
    # test output shape
    assert output.shape == expected_output.shape
    # test output value
    assert torch.equal(output, expected_output)

    # test with designated parameters
    act = HSigmoid(1, 2, 0, 1)
    input_shape = torch.Size([1, 3, 64, 64])
    input = torch.randn(input_shape)
    output = act(input)
    expected_output = torch.min(
        torch.max((input + 1) / 2, torch.zeros(input_shape)),
        torch.ones(input_shape))
    # test output shape
    assert output.shape == expected_output.shape
    # test output value
    assert torch.equal(output, expected_output)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_hswish.py
================================================
import torch
from torch.nn.functional import relu6

from mmcv.cnn.bricks import HSwish


def test_hswish():
    # test inplace
    act = HSwish(inplace=True)
    assert act.act.inplace
    act = HSwish()
    assert not act.act.inplace

    input = torch.randn(1, 3, 64, 64)
    expected_output = input * relu6(input + 3) / 6
    output = act(input)
    # test output shape
    assert output.shape == expected_output.shape
    # test output value
    assert torch.equal(output, expected_output)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_model_registry.py
================================================
import torch.nn as nn

import mmcv
from mmcv.cnn import MODELS, build_model_from_cfg


def test_build_model_from_cfg():
    BACKBONES = mmcv.Registry('backbone', build_func=build_model_from_cfg)

    @BACKBONES.register_module()
    class ResNet(nn.Module):

        def __init__(self, depth, stages=4):
            super().__init__()
            self.depth = depth
            self.stages = stages

        def forward(self, x):
            return x

    @BACKBONES.register_module()
    class ResNeXt(nn.Module):

        def __init__(self, depth, stages=4):
            super().__init__()
            self.depth = depth
            self.stages = stages

        def forward(self, x):
            return x

    cfg = dict(type='ResNet', depth=50)
    model = BACKBONES.build(cfg)
    assert isinstance(model, ResNet)
    assert model.depth == 50 and model.stages == 4

    cfg = dict(type='ResNeXt', depth=50, stages=3)
    model = BACKBONES.build(cfg)
    assert isinstance(model, ResNeXt)
    assert model.depth == 50 and model.stages == 3

    cfg = [
        dict(type='ResNet', depth=50),
        dict(type='ResNeXt', depth=50, stages=3)
    ]
    model = BACKBONES.build(cfg)
    assert isinstance(model, nn.Sequential)
    assert isinstance(model[0], ResNet)
    assert model[0].depth == 50 and model[0].stages == 4
    assert isinstance(model[1], ResNeXt)
    assert model[1].depth == 50 and model[1].stages == 3

    # test inherit `build_func` from parent
    NEW_MODELS = mmcv.Registry('models', parent=MODELS, scope='new')
    assert NEW_MODELS.build_func is build_model_from_cfg

    # test specify `build_func`
    def pseudo_build(cfg):
        return cfg

    NEW_MODELS = mmcv.Registry(
        'models', parent=MODELS, build_func=pseudo_build)
    assert NEW_MODELS.build_func is pseudo_build


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_non_local.py
================================================
import pytest
import torch
import torch.nn as nn

from mmcv.cnn import NonLocal1d, NonLocal2d, NonLocal3d
from mmcv.cnn.bricks.non_local import _NonLocalNd


def test_nonlocal():
    with pytest.raises(ValueError):
        # mode should be in ['embedded_gaussian', 'dot_product']
        _NonLocalNd(3, mode='unsupport_mode')

    # _NonLocalNd with zero initialization
    _NonLocalNd(3)
    _NonLocalNd(3, norm_cfg=dict(type='BN'))

    # _NonLocalNd without zero initialization
    _NonLocalNd(3, zeros_init=False)
    _NonLocalNd(3, norm_cfg=dict(type='BN'), zeros_init=False)


def test_nonlocal3d():
    # NonLocal3d with 'embedded_gaussian' mode
    imgs = torch.randn(2, 3, 10, 20, 20)
    nonlocal_3d = NonLocal3d(3)
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            # NonLocal is only implemented on gpu in parrots
            imgs = imgs.cuda()
            nonlocal_3d.cuda()
    out = nonlocal_3d(imgs)
    assert out.shape == imgs.shape

    # NonLocal3d with 'dot_product' mode
    nonlocal_3d = NonLocal3d(3, mode='dot_product')
    assert nonlocal_3d.mode == 'dot_product'
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            nonlocal_3d.cuda()
    out = nonlocal_3d(imgs)
    assert out.shape == imgs.shape

    # NonLocal3d with 'concatenation' mode
    nonlocal_3d = NonLocal3d(3, mode='concatenation')
    assert nonlocal_3d.mode == 'concatenation'
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            nonlocal_3d.cuda()
    out = nonlocal_3d(imgs)
    assert out.shape == imgs.shape

    # NonLocal3d with 'gaussian' mode
    nonlocal_3d = NonLocal3d(3, mode='gaussian')
    assert not hasattr(nonlocal_3d, 'phi')
    assert nonlocal_3d.mode == 'gaussian'
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            nonlocal_3d.cuda()
    out = nonlocal_3d(imgs)
    assert out.shape == imgs.shape

    # NonLocal3d with 'gaussian' mode and sub_sample
    nonlocal_3d = NonLocal3d(3, mode='gaussian', sub_sample=True)
    assert isinstance(nonlocal_3d.g, nn.Sequential) and len(nonlocal_3d.g) == 2
    assert isinstance(nonlocal_3d.g[1], nn.MaxPool3d)
    assert nonlocal_3d.g[1].kernel_size == (1, 2, 2)
    assert isinstance(nonlocal_3d.phi, nn.MaxPool3d)
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            nonlocal_3d.cuda()
    out = nonlocal_3d(imgs)
    assert out.shape == imgs.shape

    # NonLocal3d with 'dot_product' mode and sub_sample
    nonlocal_3d = NonLocal3d(3, mode='dot_product', sub_sample=True)
    for m in [nonlocal_3d.g, nonlocal_3d.phi]:
        assert isinstance(m, nn.Sequential) and len(m) == 2
        assert isinstance(m[1], nn.MaxPool3d)
        assert m[1].kernel_size == (1, 2, 2)
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            nonlocal_3d.cuda()
    out = nonlocal_3d(imgs)
    assert out.shape == imgs.shape


def test_nonlocal2d():
    # NonLocal2d with 'embedded_gaussian' mode
    imgs = torch.randn(2, 3, 20, 20)
    nonlocal_2d = NonLocal2d(3)
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            nonlocal_2d.cuda()
    out = nonlocal_2d(imgs)
    assert out.shape == imgs.shape

    # NonLocal2d with 'dot_product' mode
    imgs = torch.randn(2, 3, 20, 20)
    nonlocal_2d = NonLocal2d(3, mode='dot_product')
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            nonlocal_2d.cuda()
    out = nonlocal_2d(imgs)
    assert out.shape == imgs.shape

    # NonLocal2d with 'concatenation' mode
    imgs = torch.randn(2, 3, 20, 20)
    nonlocal_2d = NonLocal2d(3, mode='concatenation')
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            nonlocal_2d.cuda()
    out = nonlocal_2d(imgs)
    assert out.shape == imgs.shape

    # NonLocal2d with 'gaussian' mode
    imgs = torch.randn(2, 3, 20, 20)
    nonlocal_2d = NonLocal2d(3, mode='gaussian')
    assert not hasattr(nonlocal_2d, 'phi')
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            nonlocal_2d.cuda()
    out = nonlocal_2d(imgs)
    assert out.shape == imgs.shape

    # NonLocal2d with 'gaussian' mode and sub_sample
    nonlocal_2d = NonLocal2d(3, mode='gaussian', sub_sample=True)
    assert isinstance(nonlocal_2d.g, nn.Sequential) and len(nonlocal_2d.g) == 2
    assert isinstance(nonlocal_2d.g[1], nn.MaxPool2d)
    assert nonlocal_2d.g[1].kernel_size == (2, 2)
    assert isinstance(nonlocal_2d.phi, nn.MaxPool2d)
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            nonlocal_2d.cuda()
    out = nonlocal_2d(imgs)
    assert out.shape == imgs.shape

    # NonLocal2d with 'dot_product' mode and sub_sample
    nonlocal_2d = NonLocal2d(3, mode='dot_product', sub_sample=True)
    for m in [nonlocal_2d.g, nonlocal_2d.phi]:
        assert isinstance(m, nn.Sequential) and len(m) == 2
        assert isinstance(m[1], nn.MaxPool2d)
        assert m[1].kernel_size == (2, 2)
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            nonlocal_2d.cuda()
    out = nonlocal_2d(imgs)
    assert out.shape == imgs.shape


def test_nonlocal1d():
    # NonLocal1d with 'embedded_gaussian' mode
    imgs = torch.randn(2, 3, 20)
    nonlocal_1d = NonLocal1d(3)
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            nonlocal_1d.cuda()
    out = nonlocal_1d(imgs)
    assert out.shape == imgs.shape

    # NonLocal1d with 'dot_product' mode
    imgs = torch.randn(2, 3, 20)
    nonlocal_1d = NonLocal1d(3, mode='dot_product')
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            nonlocal_1d.cuda()
    out = nonlocal_1d(imgs)
    assert out.shape == imgs.shape

    # NonLocal1d with 'concatenation' mode
    imgs = torch.randn(2, 3, 20)
    nonlocal_1d = NonLocal1d(3, mode='concatenation')
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            nonlocal_1d.cuda()
    out = nonlocal_1d(imgs)
    assert out.shape == imgs.shape

    # NonLocal1d with 'gaussian' mode
    imgs = torch.randn(2, 3, 20)
    nonlocal_1d = NonLocal1d(3, mode='gaussian')
    assert not hasattr(nonlocal_1d, 'phi')
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            nonlocal_1d.cuda()
    out = nonlocal_1d(imgs)
    assert out.shape == imgs.shape

    # NonLocal1d with 'gaussian' mode and sub_sample
    nonlocal_1d = NonLocal1d(3, mode='gaussian', sub_sample=True)
    assert isinstance(nonlocal_1d.g, nn.Sequential) and len(nonlocal_1d.g) == 2
    assert isinstance(nonlocal_1d.g[1], nn.MaxPool1d)
    assert nonlocal_1d.g[1].kernel_size == 2
    assert isinstance(nonlocal_1d.phi, nn.MaxPool1d)
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            nonlocal_1d.cuda()
    out = nonlocal_1d(imgs)
    assert out.shape == imgs.shape

    # NonLocal1d with 'dot_product' mode and sub_sample
    nonlocal_1d = NonLocal1d(3, mode='dot_product', sub_sample=True)
    for m in [nonlocal_1d.g, nonlocal_1d.phi]:
        assert isinstance(m, nn.Sequential) and len(m) == 2
        assert isinstance(m[1], nn.MaxPool1d)
        assert m[1].kernel_size == 2
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            nonlocal_1d.cuda()
    out = nonlocal_1d(imgs)
    assert out.shape == imgs.shape


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_revert_syncbn.py
================================================
import os
import platform

import numpy as np
import pytest
import torch
import torch.distributed as dist

from mmcv.cnn.bricks import ConvModule
from mmcv.cnn.utils import revert_sync_batchnorm

if platform.system() == 'Windows':
    import regex as re
else:
    import re


def test_revert_syncbn():
    conv = ConvModule(3, 8, 2, norm_cfg=dict(type='SyncBN'))
    x = torch.randn(1, 3, 10, 10)
    # Expect a ValueError prompting that SyncBN is not supported on CPU
    with pytest.raises(ValueError):
        y = conv(x)
    conv = revert_sync_batchnorm(conv)
    y = conv(x)
    assert y.shape == (1, 8, 9, 9)


def test_revert_mmsyncbn():
    if 'SLURM_NTASKS' not in os.environ or int(os.environ['SLURM_NTASKS']) < 2:
        print('Must run on slurm with more than 1 process!\n'
              'srun -p test --gres=gpu:2 -n2')
        return
    rank = int(os.environ['SLURM_PROCID'])
    world_size = int(os.environ['SLURM_NTASKS'])
    local_rank = int(os.environ['SLURM_LOCALID'])
    node_list = str(os.environ['SLURM_NODELIST'])

    node_parts = re.findall('[0-9]+', node_list)
    os.environ['MASTER_ADDR'] = (f'{node_parts[1]}.{node_parts[2]}' +
                                 f'.{node_parts[3]}.{node_parts[4]}')
    os.environ['MASTER_PORT'] = '12341'
    os.environ['WORLD_SIZE'] = str(world_size)
    os.environ['RANK'] = str(rank)

    dist.init_process_group('nccl')
    torch.cuda.set_device(local_rank)
    x = torch.randn(1, 3, 10, 10).cuda()
    dist.broadcast(x, src=0)
    conv = ConvModule(3, 8, 2, norm_cfg=dict(type='MMSyncBN')).cuda()
    conv.eval()
    y_mmsyncbn = conv(x).detach().cpu().numpy()
    conv = revert_sync_batchnorm(conv)
    y_bn = conv(x).detach().cpu().numpy()
    assert np.all(np.isclose(y_bn, y_mmsyncbn, 1e-3))
    conv, x = conv.to('cpu'), x.to('cpu')
    y_bn_cpu = conv(x).detach().numpy()
    assert np.all(np.isclose(y_bn, y_bn_cpu, 1e-3))


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_scale.py
================================================
import torch

from mmcv.cnn.bricks import Scale


def test_scale():
    # test default scale
    scale = Scale()
    assert scale.scale.data == 1.
    assert scale.scale.dtype == torch.float
    x = torch.rand(1, 3, 64, 64)
    output = scale(x)
    assert output.shape == (1, 3, 64, 64)

    # test given scale
    scale = Scale(10.)
    assert scale.scale.data == 10.
    assert scale.scale.dtype == torch.float
    x = torch.rand(1, 3, 64, 64)
    output = scale(x)
    assert output.shape == (1, 3, 64, 64)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_swish.py
================================================
import torch
import torch.nn.functional as F

from mmcv.cnn.bricks import Swish


def test_swish():
    act = Swish()
    input = torch.randn(1, 3, 64, 64)
    expected_output = input * F.sigmoid(input)
    output = act(input)
    # test output shape
    assert output.shape == expected_output.shape
    # test output value
    assert torch.equal(output, expected_output)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_transformer.py
================================================
import copy

import pytest
import torch

from mmcv.cnn.bricks.drop import DropPath
from mmcv.cnn.bricks.transformer import (FFN, AdaptivePadding,
                                         BaseTransformerLayer,
                                         MultiheadAttention, PatchEmbed,
                                         PatchMerging,
                                         TransformerLayerSequence)
from mmcv.runner import ModuleList


def test_adaptive_padding():

    for padding in ('same', 'corner'):
        kernel_size = 16
        stride = 16
        dilation = 1
        input = torch.rand(1, 1, 15, 17)
        adap_pad = AdaptivePadding(
            kernel_size=kernel_size,
            stride=stride,
            dilation=dilation,
            padding=padding)
        out = adap_pad(input)
        # padding to divisible by 16
        assert (out.shape[2], out.shape[3]) == (16, 32)
        input = torch.rand(1, 1, 16, 17)
        out = adap_pad(input)
        # padding to divisible by 16
        assert (out.shape[2], out.shape[3]) == (16, 32)

        kernel_size = (2, 2)
        stride = (2, 2)
        dilation = (1, 1)

        adap_pad = AdaptivePadding(
            kernel_size=kernel_size,
            stride=stride,
            dilation=dilation,
            padding=padding)
        input = torch.rand(1, 1, 11, 13)
        out = adap_pad(input)
        # padding to divisible by 2
        assert (out.shape[2], out.shape[3]) == (12, 14)

        kernel_size = (2, 2)
        stride = (10, 10)
        dilation = (1, 1)

        adap_pad = AdaptivePadding(
            kernel_size=kernel_size,
            stride=stride,
            dilation=dilation,
            padding=padding)
        input = torch.rand(1, 1, 10, 13)
        out = adap_pad(input)
        #  no padding
        assert (out.shape[2], out.shape[3]) == (10, 13)

        kernel_size = (11, 11)
        adap_pad = AdaptivePadding(
            kernel_size=kernel_size,
            stride=stride,
            dilation=dilation,
            padding=padding)
        input = torch.rand(1, 1, 11, 13)
        out = adap_pad(input)
        #  all padding
        assert (out.shape[2], out.shape[3]) == (21, 21)

        # test padding as kernel is (7,9)
        input = torch.rand(1, 1, 11, 13)
        stride = (3, 4)
        kernel_size = (4, 5)
        dilation = (2, 2)
        # actually (7, 9)
        adap_pad = AdaptivePadding(
            kernel_size=kernel_size,
            stride=stride,
            dilation=dilation,
            padding=padding)
        dilation_out = adap_pad(input)
        assert (dilation_out.shape[2], dilation_out.shape[3]) == (16, 21)
        kernel_size = (7, 9)
        dilation = (1, 1)
        adap_pad = AdaptivePadding(
            kernel_size=kernel_size,
            stride=stride,
            dilation=dilation,
            padding=padding)
        kernel79_out = adap_pad(input)
        assert (kernel79_out.shape[2], kernel79_out.shape[3]) == (16, 21)
        assert kernel79_out.shape == dilation_out.shape

    # assert only support "same" "corner"
    with pytest.raises(AssertionError):
        AdaptivePadding(
            kernel_size=kernel_size,
            stride=stride,
            dilation=dilation,
            padding=1)


def test_patch_embed():
    B = 2
    H = 3
    W = 4
    C = 3
    embed_dims = 10
    kernel_size = 3
    stride = 1
    dummy_input = torch.rand(B, C, H, W)
    patch_merge_1 = PatchEmbed(
        in_channels=C,
        embed_dims=embed_dims,
        kernel_size=kernel_size,
        stride=stride,
        padding=0,
        dilation=1,
        norm_cfg=None)

    x1, shape = patch_merge_1(dummy_input)
    # test out shape
    assert x1.shape == (2, 2, 10)
    # test outsize is correct
    assert shape == (1, 2)
    # test L = out_h * out_w
    assert shape[0] * shape[1] == x1.shape[1]

    B = 2
    H = 10
    W = 10
    C = 3
    embed_dims = 10
    kernel_size = 5
    stride = 2
    dummy_input = torch.rand(B, C, H, W)
    # test dilation
    patch_merge_2 = PatchEmbed(
        in_channels=C,
        embed_dims=embed_dims,
        kernel_size=kernel_size,
        stride=stride,
        padding=0,
        dilation=2,
        norm_cfg=None,
    )

    x2, shape = patch_merge_2(dummy_input)
    # test out shape
    assert x2.shape == (2, 1, 10)
    # test outsize is correct
    assert shape == (1, 1)
    # test L = out_h * out_w
    assert shape[0] * shape[1] == x2.shape[1]

    stride = 2
    input_size = (10, 10)

    dummy_input = torch.rand(B, C, H, W)
    # test stride and norm
    patch_merge_3 = PatchEmbed(
        in_channels=C,
        embed_dims=embed_dims,
        kernel_size=kernel_size,
        stride=stride,
        padding=0,
        dilation=2,
        norm_cfg=dict(type='LN'),
        input_size=input_size)

    x3, shape = patch_merge_3(dummy_input)
    # test out shape
    assert x3.shape == (2, 1, 10)
    # test outsize is correct
    assert shape == (1, 1)
    # test L = out_h * out_w
    assert shape[0] * shape[1] == x3.shape[1]

    # test the init_out_size with nn.Unfold
    assert patch_merge_3.init_out_size[1] == (input_size[0] - 2 * 4 -
                                              1) // 2 + 1
    assert patch_merge_3.init_out_size[0] == (input_size[0] - 2 * 4 -
                                              1) // 2 + 1
    H = 11
    W = 12
    input_size = (H, W)
    dummy_input = torch.rand(B, C, H, W)
    # test stride and norm
    patch_merge_3 = PatchEmbed(
        in_channels=C,
        embed_dims=embed_dims,
        kernel_size=kernel_size,
        stride=stride,
        padding=0,
        dilation=2,
        norm_cfg=dict(type='LN'),
        input_size=input_size)

    _, shape = patch_merge_3(dummy_input)
    # when input_size equal to real input
    # the out_size should be equal to `init_out_size`
    assert shape == patch_merge_3.init_out_size

    input_size = (H, W)
    dummy_input = torch.rand(B, C, H, W)
    # test stride and norm
    patch_merge_3 = PatchEmbed(
        in_channels=C,
        embed_dims=embed_dims,
        kernel_size=kernel_size,
        stride=stride,
        padding=0,
        dilation=2,
        norm_cfg=dict(type='LN'),
        input_size=input_size)

    _, shape = patch_merge_3(dummy_input)
    # when input_size equal to real input
    # the out_size should be equal to `init_out_size`
    assert shape == patch_merge_3.init_out_size

    # test adap padding
    for padding in ('same', 'corner'):
        in_c = 2
        embed_dims = 3
        B = 2

        # test stride is 1
        input_size = (5, 5)
        kernel_size = (5, 5)
        stride = (1, 1)
        dilation = 1
        bias = False

        x = torch.rand(B, in_c, *input_size)
        patch_embed = PatchEmbed(
            in_channels=in_c,
            embed_dims=embed_dims,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias=bias)

        x_out, out_size = patch_embed(x)
        assert x_out.size() == (B, 25, 3)
        assert out_size == (5, 5)
        assert x_out.size(1) == out_size[0] * out_size[1]

        # test kernel_size == stride
        input_size = (5, 5)
        kernel_size = (5, 5)
        stride = (5, 5)
        dilation = 1
        bias = False

        x = torch.rand(B, in_c, *input_size)
        patch_embed = PatchEmbed(
            in_channels=in_c,
            embed_dims=embed_dims,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias=bias)

        x_out, out_size = patch_embed(x)
        assert x_out.size() == (B, 1, 3)
        assert out_size == (1, 1)
        assert x_out.size(1) == out_size[0] * out_size[1]

        # test kernel_size == stride
        input_size = (6, 5)
        kernel_size = (5, 5)
        stride = (5, 5)
        dilation = 1
        bias = False

        x = torch.rand(B, in_c, *input_size)
        patch_embed = PatchEmbed(
            in_channels=in_c,
            embed_dims=embed_dims,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias=bias)

        x_out, out_size = patch_embed(x)
        assert x_out.size() == (B, 2, 3)
        assert out_size == (2, 1)
        assert x_out.size(1) == out_size[0] * out_size[1]

        # test different kernel_size with different stride
        input_size = (6, 5)
        kernel_size = (6, 2)
        stride = (6, 2)
        dilation = 1
        bias = False

        x = torch.rand(B, in_c, *input_size)
        patch_embed = PatchEmbed(
            in_channels=in_c,
            embed_dims=embed_dims,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias=bias)

        x_out, out_size = patch_embed(x)
        assert x_out.size() == (B, 3, 3)
        assert out_size == (1, 3)
        assert x_out.size(1) == out_size[0] * out_size[1]


def test_patch_merging():

    # Test the model with int padding
    in_c = 3
    out_c = 4
    kernel_size = 3
    stride = 3
    padding = 1
    dilation = 1
    bias = False
    # test the case `pad_to_stride` is False
    patch_merge = PatchMerging(
        in_channels=in_c,
        out_channels=out_c,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        bias=bias)
    B, L, C = 1, 100, 3
    input_size = (10, 10)
    x = torch.rand(B, L, C)
    x_out, out_size = patch_merge(x, input_size)
    assert x_out.size() == (1, 16, 4)
    assert out_size == (4, 4)
    # assert out size is consistent with real output
    assert x_out.size(1) == out_size[0] * out_size[1]
    in_c = 4
    out_c = 5
    kernel_size = 6
    stride = 3
    padding = 2
    dilation = 2
    bias = False
    patch_merge = PatchMerging(
        in_channels=in_c,
        out_channels=out_c,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        bias=bias)
    B, L, C = 1, 100, 4
    input_size = (10, 10)
    x = torch.rand(B, L, C)
    x_out, out_size = patch_merge(x, input_size)
    assert x_out.size() == (1, 4, 5)
    assert out_size == (2, 2)
    # assert out size is consistent with real output
    assert x_out.size(1) == out_size[0] * out_size[1]

    # Test with adaptive padding
    for padding in ('same', 'corner'):
        in_c = 2
        out_c = 3
        B = 2

        # test stride is 1
        input_size = (5, 5)
        kernel_size = (5, 5)
        stride = (1, 1)
        dilation = 1
        bias = False
        L = input_size[0] * input_size[1]

        x = torch.rand(B, L, in_c)
        patch_merge = PatchMerging(
            in_channels=in_c,
            out_channels=out_c,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias=bias)

        x_out, out_size = patch_merge(x, input_size)
        assert x_out.size() == (B, 25, 3)
        assert out_size == (5, 5)
        assert x_out.size(1) == out_size[0] * out_size[1]

        # test kernel_size == stride
        input_size = (5, 5)
        kernel_size = (5, 5)
        stride = (5, 5)
        dilation = 1
        bias = False
        L = input_size[0] * input_size[1]

        x = torch.rand(B, L, in_c)
        patch_merge = PatchMerging(
            in_channels=in_c,
            out_channels=out_c,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias=bias)

        x_out, out_size = patch_merge(x, input_size)
        assert x_out.size() == (B, 1, 3)
        assert out_size == (1, 1)
        assert x_out.size(1) == out_size[0] * out_size[1]

        # test kernel_size == stride
        input_size = (6, 5)
        kernel_size = (5, 5)
        stride = (5, 5)
        dilation = 1
        bias = False
        L = input_size[0] * input_size[1]

        x = torch.rand(B, L, in_c)
        patch_merge = PatchMerging(
            in_channels=in_c,
            out_channels=out_c,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias=bias)

        x_out, out_size = patch_merge(x, input_size)
        assert x_out.size() == (B, 2, 3)
        assert out_size == (2, 1)
        assert x_out.size(1) == out_size[0] * out_size[1]

        # test different kernel_size with different stride
        input_size = (6, 5)
        kernel_size = (6, 2)
        stride = (6, 2)
        dilation = 1
        bias = False
        L = input_size[0] * input_size[1]

        x = torch.rand(B, L, in_c)
        patch_merge = PatchMerging(
            in_channels=in_c,
            out_channels=out_c,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias=bias)

        x_out, out_size = patch_merge(x, input_size)
        assert x_out.size() == (B, 3, 3)
        assert out_size == (1, 3)
        assert x_out.size(1) == out_size[0] * out_size[1]


def test_multiheadattention():
    MultiheadAttention(
        embed_dims=5,
        num_heads=5,
        attn_drop=0,
        proj_drop=0,
        dropout_layer=dict(type='Dropout', drop_prob=0.),
        batch_first=True)
    batch_dim = 2
    embed_dim = 5
    num_query = 100
    attn_batch_first = MultiheadAttention(
        embed_dims=5,
        num_heads=5,
        attn_drop=0,
        proj_drop=0,
        dropout_layer=dict(type='DropPath', drop_prob=0.),
        batch_first=True)

    attn_query_first = MultiheadAttention(
        embed_dims=5,
        num_heads=5,
        attn_drop=0,
        proj_drop=0,
        dropout_layer=dict(type='DropPath', drop_prob=0.),
        batch_first=False)

    param_dict = dict(attn_query_first.named_parameters())
    for n, v in attn_batch_first.named_parameters():
        param_dict[n].data = v.data

    input_batch_first = torch.rand(batch_dim, num_query, embed_dim)
    input_query_first = input_batch_first.transpose(0, 1)

    assert torch.allclose(
        attn_query_first(input_query_first).sum(),
        attn_batch_first(input_batch_first).sum())

    key_batch_first = torch.rand(batch_dim, num_query, embed_dim)
    key_query_first = key_batch_first.transpose(0, 1)

    assert torch.allclose(
        attn_query_first(input_query_first, key_query_first).sum(),
        attn_batch_first(input_batch_first, key_batch_first).sum())

    identity = torch.ones_like(input_query_first)

    # check deprecated arguments can be used normally

    assert torch.allclose(
        attn_query_first(
            input_query_first, key_query_first, residual=identity).sum(),
        attn_batch_first(input_batch_first, key_batch_first).sum() +
        identity.sum() - input_batch_first.sum())

    assert torch.allclose(
        attn_query_first(
            input_query_first, key_query_first, identity=identity).sum(),
        attn_batch_first(input_batch_first, key_batch_first).sum() +
        identity.sum() - input_batch_first.sum())

    attn_query_first(
        input_query_first, key_query_first, identity=identity).sum(),


def test_ffn():
    with pytest.raises(AssertionError):
        # num_fcs should be no less than 2
        FFN(num_fcs=1)
    FFN(dropout=0, add_residual=True)
    ffn = FFN(dropout=0, add_identity=True)

    input_tensor = torch.rand(2, 20, 256)
    input_tensor_nbc = input_tensor.transpose(0, 1)
    assert torch.allclose(ffn(input_tensor).sum(), ffn(input_tensor_nbc).sum())
    residual = torch.rand_like(input_tensor)
    torch.allclose(
        ffn(input_tensor, residual=residual).sum(),
        ffn(input_tensor).sum() + residual.sum() - input_tensor.sum())

    torch.allclose(
        ffn(input_tensor, identity=residual).sum(),
        ffn(input_tensor).sum() + residual.sum() - input_tensor.sum())


@pytest.mark.skipif(not torch.cuda.is_available(), reason='Cuda not available')
def test_basetransformerlayer_cuda():
    # To test if the BaseTransformerLayer's behaviour remains
    # consistent after being deepcopied
    operation_order = ('self_attn', 'ffn')
    baselayer = BaseTransformerLayer(
        operation_order=operation_order,
        batch_first=True,
        attn_cfgs=dict(
            type='MultiheadAttention',
            embed_dims=256,
            num_heads=8,
        ),
    )
    baselayers = ModuleList([copy.deepcopy(baselayer) for _ in range(2)])
    baselayers.to('cuda')
    x = torch.rand(2, 10, 256).cuda()
    for m in baselayers:
        x = m(x)
        assert x.shape == torch.Size([2, 10, 256])


@pytest.mark.parametrize('embed_dims', [False, 256])
def test_basetransformerlayer(embed_dims):
    attn_cfgs = dict(type='MultiheadAttention', embed_dims=256, num_heads=8),
    if embed_dims:
        ffn_cfgs = dict(
            type='FFN',
            embed_dims=embed_dims,
            feedforward_channels=1024,
            num_fcs=2,
            ffn_drop=0.,
            act_cfg=dict(type='ReLU', inplace=True),
        )
    else:
        ffn_cfgs = dict(
            type='FFN',
            feedforward_channels=1024,
            num_fcs=2,
            ffn_drop=0.,
            act_cfg=dict(type='ReLU', inplace=True),
        )

    feedforward_channels = 2048
    ffn_dropout = 0.1
    operation_order = ('self_attn', 'norm', 'ffn', 'norm')

    # test deprecated_args
    baselayer = BaseTransformerLayer(
        attn_cfgs=attn_cfgs,
        ffn_cfgs=ffn_cfgs,
        feedforward_channels=feedforward_channels,
        ffn_dropout=ffn_dropout,
        operation_order=operation_order)
    assert baselayer.batch_first is False
    assert baselayer.ffns[0].feedforward_channels == feedforward_channels

    attn_cfgs = dict(type='MultiheadAttention', num_heads=8, embed_dims=256),
    feedforward_channels = 2048
    ffn_dropout = 0.1
    operation_order = ('self_attn', 'norm', 'ffn', 'norm')
    baselayer = BaseTransformerLayer(
        attn_cfgs=attn_cfgs,
        feedforward_channels=feedforward_channels,
        ffn_dropout=ffn_dropout,
        operation_order=operation_order,
        batch_first=True)
    assert baselayer.attentions[0].batch_first
    in_tensor = torch.rand(2, 10, 256)
    baselayer(in_tensor)


def test_transformerlayersequence():
    squeue = TransformerLayerSequence(
        num_layers=6,
        transformerlayers=dict(
            type='BaseTransformerLayer',
            attn_cfgs=[
                dict(
                    type='MultiheadAttention',
                    embed_dims=256,
                    num_heads=8,
                    dropout=0.1),
                dict(type='MultiheadAttention', embed_dims=256, num_heads=4)
            ],
            feedforward_channels=1024,
            ffn_dropout=0.1,
            operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
                             'norm')))
    assert len(squeue.layers) == 6
    assert squeue.pre_norm is False
    with pytest.raises(AssertionError):
        # if transformerlayers is a list, len(transformerlayers)
        # should be equal to num_layers
        TransformerLayerSequence(
            num_layers=6,
            transformerlayers=[
                dict(
                    type='BaseTransformerLayer',
                    attn_cfgs=[
                        dict(
                            type='MultiheadAttention',
                            embed_dims=256,
                            num_heads=8,
                            dropout=0.1),
                        dict(type='MultiheadAttention', embed_dims=256)
                    ],
                    feedforward_channels=1024,
                    ffn_dropout=0.1,
                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
                                     'ffn', 'norm'))
            ])


def test_drop_path():
    drop_path = DropPath(drop_prob=0)
    test_in = torch.rand(2, 3, 4, 5)
    assert test_in is drop_path(test_in)

    drop_path = DropPath(drop_prob=0.1)
    drop_path.training = False
    test_in = torch.rand(2, 3, 4, 5)
    assert test_in is drop_path(test_in)
    drop_path.training = True
    assert test_in is not drop_path(test_in)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_weight_init.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import random
from tempfile import TemporaryDirectory

import numpy as np
import pytest
import torch
from scipy import stats
from torch import nn

from mmcv.cnn import (Caffe2XavierInit, ConstantInit, KaimingInit, NormalInit,
                      PretrainedInit, TruncNormalInit, UniformInit, XavierInit,
                      bias_init_with_prob, caffe2_xavier_init, constant_init,
                      initialize, kaiming_init, normal_init, trunc_normal_init,
                      uniform_init, xavier_init)


def test_constant_init():
    conv_module = nn.Conv2d(3, 16, 3)
    constant_init(conv_module, 0.1)
    assert conv_module.weight.allclose(
        torch.full_like(conv_module.weight, 0.1))
    assert conv_module.bias.allclose(torch.zeros_like(conv_module.bias))
    conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False)
    constant_init(conv_module_no_bias, 0.1)
    assert conv_module.weight.allclose(
        torch.full_like(conv_module.weight, 0.1))


def test_xavier_init():
    conv_module = nn.Conv2d(3, 16, 3)
    xavier_init(conv_module, bias=0.1)
    assert conv_module.bias.allclose(torch.full_like(conv_module.bias, 0.1))
    xavier_init(conv_module, distribution='uniform')
    # TODO: sanity check of weight distribution, e.g. mean, std
    with pytest.raises(AssertionError):
        xavier_init(conv_module, distribution='student-t')
    conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False)
    xavier_init(conv_module_no_bias)


def test_normal_init():
    conv_module = nn.Conv2d(3, 16, 3)
    normal_init(conv_module, bias=0.1)
    # TODO: sanity check of weight distribution, e.g. mean, std
    assert conv_module.bias.allclose(torch.full_like(conv_module.bias, 0.1))
    conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False)
    normal_init(conv_module_no_bias)
    # TODO: sanity check distribution, e.g. mean, std


def test_trunc_normal_init():

    def _random_float(a, b):
        return (b - a) * random.random() + a

    def _is_trunc_normal(tensor, mean, std, a, b):
        # scipy's trunc norm is suited for data drawn from N(0, 1),
        # so we need to transform our data to test it using scipy.
        z_samples = (tensor.view(-1) - mean) / std
        z_samples = z_samples.tolist()
        a0 = (a - mean) / std
        b0 = (b - mean) / std
        p_value = stats.kstest(z_samples, 'truncnorm', args=(a0, b0))[1]
        return p_value > 0.0001

    conv_module = nn.Conv2d(3, 16, 3)
    mean = _random_float(-3, 3)
    std = _random_float(.01, 1)
    a = _random_float(mean - 2 * std, mean)
    b = _random_float(mean, mean + 2 * std)
    trunc_normal_init(conv_module, mean, std, a, b, bias=0.1)
    assert _is_trunc_normal(conv_module.weight, mean, std, a, b)
    assert conv_module.bias.allclose(torch.full_like(conv_module.bias, 0.1))

    conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False)
    trunc_normal_init(conv_module_no_bias)
    # TODO: sanity check distribution, e.g. mean, std


def test_uniform_init():
    conv_module = nn.Conv2d(3, 16, 3)
    uniform_init(conv_module, bias=0.1)
    # TODO: sanity check of weight distribution, e.g. mean, std
    assert conv_module.bias.allclose(torch.full_like(conv_module.bias, 0.1))
    conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False)
    uniform_init(conv_module_no_bias)


def test_kaiming_init():
    conv_module = nn.Conv2d(3, 16, 3)
    kaiming_init(conv_module, bias=0.1)
    # TODO: sanity check of weight distribution, e.g. mean, std
    assert conv_module.bias.allclose(torch.full_like(conv_module.bias, 0.1))
    kaiming_init(conv_module, distribution='uniform')
    with pytest.raises(AssertionError):
        kaiming_init(conv_module, distribution='student-t')
    conv_module_no_bias = nn.Conv2d(3, 16, 3, bias=False)
    kaiming_init(conv_module_no_bias)


def test_caffe_xavier_init():
    conv_module = nn.Conv2d(3, 16, 3)
    caffe2_xavier_init(conv_module)


def test_bias_init_with_prob():
    conv_module = nn.Conv2d(3, 16, 3)
    prior_prob = 0.1
    normal_init(conv_module, bias=bias_init_with_prob(0.1))
    # TODO: sanity check of weight distribution, e.g. mean, std
    bias = float(-np.log((1 - prior_prob) / prior_prob))
    assert conv_module.bias.allclose(torch.full_like(conv_module.bias, bias))


def test_constaninit():
    """test ConstantInit class."""
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
    func = ConstantInit(val=1, bias=2, layer='Conv2d')
    func(model)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.))

    assert not torch.equal(model[2].weight,
                           torch.full(model[2].weight.shape, 1.))
    assert not torch.equal(model[2].bias, torch.full(model[2].bias.shape, 2.))

    func = ConstantInit(val=3, bias_prob=0.01, layer='Linear')
    func(model)
    res = bias_init_with_prob(0.01)

    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 3.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, res))

    # test layer key with base class name
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1))
    func = ConstantInit(val=4., bias=5., layer='_ConvNd')
    func(model)
    assert torch.all(model[0].weight == 4.)
    assert torch.all(model[2].weight == 4.)
    assert torch.all(model[0].bias == 5.)
    assert torch.all(model[2].bias == 5.)

    # test bias input type
    with pytest.raises(TypeError):
        func = ConstantInit(val=1, bias='1')
    # test bias_prob type
    with pytest.raises(TypeError):
        func = ConstantInit(val=1, bias_prob='1')
    # test layer input type
    with pytest.raises(TypeError):
        func = ConstantInit(val=1, layer=1)


def test_xavierinit():
    """test XavierInit class."""
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
    func = XavierInit(bias=0.1, layer='Conv2d')
    func(model)
    assert model[0].bias.allclose(torch.full_like(model[2].bias, 0.1))
    assert not model[2].bias.allclose(torch.full_like(model[0].bias, 0.1))

    constant_func = ConstantInit(val=0, bias=0, layer=['Conv2d', 'Linear'])
    func = XavierInit(gain=100, bias_prob=0.01, layer=['Conv2d', 'Linear'])
    model.apply(constant_func)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.))

    res = bias_init_with_prob(0.01)
    func(model)
    assert not torch.equal(model[0].weight,
                           torch.full(model[0].weight.shape, 0.))
    assert not torch.equal(model[2].weight,
                           torch.full(model[2].weight.shape, 0.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, res))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, res))

    # test layer key with base class name
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1))
    func = ConstantInit(val=4., bias=5., layer='_ConvNd')
    func(model)
    assert torch.all(model[0].weight == 4.)
    assert torch.all(model[2].weight == 4.)
    assert torch.all(model[0].bias == 5.)
    assert torch.all(model[2].bias == 5.)

    func = XavierInit(gain=100, bias_prob=0.01, layer='_ConvNd')
    func(model)
    assert not torch.all(model[0].weight == 4.)
    assert not torch.all(model[2].weight == 4.)
    assert torch.all(model[0].bias == res)
    assert torch.all(model[2].bias == res)

    # test bias input type
    with pytest.raises(TypeError):
        func = XavierInit(bias='0.1', layer='Conv2d')
    # test layer inpur type
    with pytest.raises(TypeError):
        func = XavierInit(bias=0.1, layer=1)


def test_normalinit():
    """test Normalinit class."""
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))

    func = NormalInit(mean=100, std=1e-5, bias=200, layer=['Conv2d', 'Linear'])
    func(model)
    assert model[0].weight.allclose(torch.tensor(100.))
    assert model[2].weight.allclose(torch.tensor(100.))
    assert model[0].bias.allclose(torch.tensor(200.))
    assert model[2].bias.allclose(torch.tensor(200.))

    func = NormalInit(
        mean=300, std=1e-5, bias_prob=0.01, layer=['Conv2d', 'Linear'])
    res = bias_init_with_prob(0.01)
    func(model)
    assert model[0].weight.allclose(torch.tensor(300.))
    assert model[2].weight.allclose(torch.tensor(300.))
    assert model[0].bias.allclose(torch.tensor(res))
    assert model[2].bias.allclose(torch.tensor(res))

    # test layer key with base class name
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1))

    func = NormalInit(mean=300, std=1e-5, bias_prob=0.01, layer='_ConvNd')
    func(model)
    assert model[0].weight.allclose(torch.tensor(300.))
    assert model[2].weight.allclose(torch.tensor(300.))
    assert torch.all(model[0].bias == res)
    assert torch.all(model[2].bias == res)


def test_truncnormalinit():
    """test TruncNormalInit class."""
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))

    func = TruncNormalInit(
        mean=100, std=1e-5, bias=200, a=0, b=200, layer=['Conv2d', 'Linear'])
    func(model)
    assert model[0].weight.allclose(torch.tensor(100.))
    assert model[2].weight.allclose(torch.tensor(100.))
    assert model[0].bias.allclose(torch.tensor(200.))
    assert model[2].bias.allclose(torch.tensor(200.))

    func = TruncNormalInit(
        mean=300,
        std=1e-5,
        a=100,
        b=400,
        bias_prob=0.01,
        layer=['Conv2d', 'Linear'])
    res = bias_init_with_prob(0.01)
    func(model)
    assert model[0].weight.allclose(torch.tensor(300.))
    assert model[2].weight.allclose(torch.tensor(300.))
    assert model[0].bias.allclose(torch.tensor(res))
    assert model[2].bias.allclose(torch.tensor(res))

    # test layer key with base class name
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1))

    func = TruncNormalInit(
        mean=300, std=1e-5, a=100, b=400, bias_prob=0.01, layer='_ConvNd')
    func(model)
    assert model[0].weight.allclose(torch.tensor(300.))
    assert model[2].weight.allclose(torch.tensor(300.))
    assert torch.all(model[0].bias == res)
    assert torch.all(model[2].bias == res)


def test_uniforminit():
    """"test UniformInit class."""
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
    func = UniformInit(a=1, b=1, bias=2, layer=['Conv2d', 'Linear'])
    func(model)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 2.))

    func = UniformInit(a=100, b=100, layer=['Conv2d', 'Linear'], bias=10)
    func(model)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape,
                                                   100.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape,
                                                   100.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 10.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 10.))

    # test layer key with base class name
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1))

    func = UniformInit(a=100, b=100, bias_prob=0.01, layer='_ConvNd')
    res = bias_init_with_prob(0.01)
    func(model)
    assert torch.all(model[0].weight == 100.)
    assert torch.all(model[2].weight == 100.)
    assert torch.all(model[0].bias == res)
    assert torch.all(model[2].bias == res)


def test_kaiminginit():
    """test KaimingInit class."""
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
    func = KaimingInit(bias=0.1, layer='Conv2d')
    func(model)
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.1))
    assert not torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.1))

    func = KaimingInit(a=100, bias=10, layer=['Conv2d', 'Linear'])
    constant_func = ConstantInit(val=0, bias=0, layer=['Conv2d', 'Linear'])
    model.apply(constant_func)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.))

    func(model)
    assert not torch.equal(model[0].weight,
                           torch.full(model[0].weight.shape, 0.))
    assert not torch.equal(model[2].weight,
                           torch.full(model[2].weight.shape, 0.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 10.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 10.))

    # test layer key with base class name
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Conv1d(1, 2, 1))
    func = KaimingInit(bias=0.1, layer='_ConvNd')
    func(model)
    assert torch.all(model[0].bias == 0.1)
    assert torch.all(model[2].bias == 0.1)

    func = KaimingInit(a=100, bias=10, layer='_ConvNd')
    constant_func = ConstantInit(val=0, bias=0, layer='_ConvNd')
    model.apply(constant_func)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.))

    func(model)
    assert not torch.equal(model[0].weight,
                           torch.full(model[0].weight.shape, 0.))
    assert not torch.equal(model[2].weight,
                           torch.full(model[2].weight.shape, 0.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 10.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 10.))


def test_caffe2xavierinit():
    """test Caffe2XavierInit."""
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
    func = Caffe2XavierInit(bias=0.1, layer='Conv2d')
    func(model)
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.1))
    assert not torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.1))


class FooModule(nn.Module):

    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 2)
        self.conv2d = nn.Conv2d(3, 1, 3)
        self.conv2d_2 = nn.Conv2d(3, 2, 3)


def test_pretrainedinit():
    """test PretrainedInit class."""

    modelA = FooModule()
    constant_func = ConstantInit(val=1, bias=2, layer=['Conv2d', 'Linear'])
    modelA.apply(constant_func)
    modelB = FooModule()
    funcB = PretrainedInit(checkpoint='modelA.pth')
    modelC = nn.Linear(1, 2)
    funcC = PretrainedInit(checkpoint='modelA.pth', prefix='linear.')
    with TemporaryDirectory():
        torch.save(modelA.state_dict(), 'modelA.pth')
        funcB(modelB)
        assert torch.equal(modelB.linear.weight,
                           torch.full(modelB.linear.weight.shape, 1.))
        assert torch.equal(modelB.linear.bias,
                           torch.full(modelB.linear.bias.shape, 2.))
        assert torch.equal(modelB.conv2d.weight,
                           torch.full(modelB.conv2d.weight.shape, 1.))
        assert torch.equal(modelB.conv2d.bias,
                           torch.full(modelB.conv2d.bias.shape, 2.))
        assert torch.equal(modelB.conv2d_2.weight,
                           torch.full(modelB.conv2d_2.weight.shape, 1.))
        assert torch.equal(modelB.conv2d_2.bias,
                           torch.full(modelB.conv2d_2.bias.shape, 2.))

        funcC(modelC)
        assert torch.equal(modelC.weight, torch.full(modelC.weight.shape, 1.))
        assert torch.equal(modelC.bias, torch.full(modelC.bias.shape, 2.))


def test_initialize():
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
    foonet = FooModule()

    # test layer key
    init_cfg = dict(type='Constant', layer=['Conv2d', 'Linear'], val=1, bias=2)
    initialize(model, init_cfg)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 2.))
    assert init_cfg == dict(
        type='Constant', layer=['Conv2d', 'Linear'], val=1, bias=2)

    # test init_cfg with list type
    init_cfg = [
        dict(type='Constant', layer='Conv2d', val=1, bias=2),
        dict(type='Constant', layer='Linear', val=3, bias=4)
    ]
    initialize(model, init_cfg)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 3.))
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 4.))
    assert init_cfg == [
        dict(type='Constant', layer='Conv2d', val=1, bias=2),
        dict(type='Constant', layer='Linear', val=3, bias=4)
    ]

    # test layer key and override key
    init_cfg = dict(
        type='Constant',
        val=1,
        bias=2,
        layer=['Conv2d', 'Linear'],
        override=dict(type='Constant', name='conv2d_2', val=3, bias=4))
    initialize(foonet, init_cfg)
    assert torch.equal(foonet.linear.weight,
                       torch.full(foonet.linear.weight.shape, 1.))
    assert torch.equal(foonet.linear.bias,
                       torch.full(foonet.linear.bias.shape, 2.))
    assert torch.equal(foonet.conv2d.weight,
                       torch.full(foonet.conv2d.weight.shape, 1.))
    assert torch.equal(foonet.conv2d.bias,
                       torch.full(foonet.conv2d.bias.shape, 2.))
    assert torch.equal(foonet.conv2d_2.weight,
                       torch.full(foonet.conv2d_2.weight.shape, 3.))
    assert torch.equal(foonet.conv2d_2.bias,
                       torch.full(foonet.conv2d_2.bias.shape, 4.))
    assert init_cfg == dict(
        type='Constant',
        val=1,
        bias=2,
        layer=['Conv2d', 'Linear'],
        override=dict(type='Constant', name='conv2d_2', val=3, bias=4))

    # test override key
    init_cfg = dict(
        type='Constant', val=5, bias=6, override=dict(name='conv2d_2'))
    initialize(foonet, init_cfg)
    assert not torch.equal(foonet.linear.weight,
                           torch.full(foonet.linear.weight.shape, 5.))
    assert not torch.equal(foonet.linear.bias,
                           torch.full(foonet.linear.bias.shape, 6.))
    assert not torch.equal(foonet.conv2d.weight,
                           torch.full(foonet.conv2d.weight.shape, 5.))
    assert not torch.equal(foonet.conv2d.bias,
                           torch.full(foonet.conv2d.bias.shape, 6.))
    assert torch.equal(foonet.conv2d_2.weight,
                       torch.full(foonet.conv2d_2.weight.shape, 5.))
    assert torch.equal(foonet.conv2d_2.bias,
                       torch.full(foonet.conv2d_2.bias.shape, 6.))
    assert init_cfg == dict(
        type='Constant', val=5, bias=6, override=dict(name='conv2d_2'))

    init_cfg = dict(
        type='Pretrained',
        checkpoint='modelA.pth',
        override=dict(type='Constant', name='conv2d_2', val=3, bias=4))
    modelA = FooModule()
    constant_func = ConstantInit(val=1, bias=2, layer=['Conv2d', 'Linear'])
    modelA.apply(constant_func)
    with TemporaryDirectory():
        torch.save(modelA.state_dict(), 'modelA.pth')
        initialize(foonet, init_cfg)
        assert torch.equal(foonet.linear.weight,
                           torch.full(foonet.linear.weight.shape, 1.))
        assert torch.equal(foonet.linear.bias,
                           torch.full(foonet.linear.bias.shape, 2.))
        assert torch.equal(foonet.conv2d.weight,
                           torch.full(foonet.conv2d.weight.shape, 1.))
        assert torch.equal(foonet.conv2d.bias,
                           torch.full(foonet.conv2d.bias.shape, 2.))
        assert torch.equal(foonet.conv2d_2.weight,
                           torch.full(foonet.conv2d_2.weight.shape, 3.))
        assert torch.equal(foonet.conv2d_2.bias,
                           torch.full(foonet.conv2d_2.bias.shape, 4.))
    assert init_cfg == dict(
        type='Pretrained',
        checkpoint='modelA.pth',
        override=dict(type='Constant', name='conv2d_2', val=3, bias=4))

    # test init_cfg type
    with pytest.raises(TypeError):
        init_cfg = 'init_cfg'
        initialize(foonet, init_cfg)

    # test override value type
    with pytest.raises(TypeError):
        init_cfg = dict(
            type='Constant',
            val=1,
            bias=2,
            layer=['Conv2d', 'Linear'],
            override='conv')
        initialize(foonet, init_cfg)

    # test override name
    with pytest.raises(RuntimeError):
        init_cfg = dict(
            type='Constant',
            val=1,
            bias=2,
            layer=['Conv2d', 'Linear'],
            override=dict(type='Constant', name='conv2d_3', val=3, bias=4))
        initialize(foonet, init_cfg)

    # test list override name
    with pytest.raises(RuntimeError):
        init_cfg = dict(
            type='Constant',
            val=1,
            bias=2,
            layer=['Conv2d', 'Linear'],
            override=[
                dict(type='Constant', name='conv2d', val=3, bias=4),
                dict(type='Constant', name='conv2d_3', val=5, bias=6)
            ])
        initialize(foonet, init_cfg)

    # test override with args except type key
    with pytest.raises(ValueError):
        init_cfg = dict(
            type='Constant',
            val=1,
            bias=2,
            override=dict(name='conv2d_2', val=3, bias=4))
        initialize(foonet, init_cfg)

    # test override without name
    with pytest.raises(ValueError):
        init_cfg = dict(
            type='Constant',
            val=1,
            bias=2,
            override=dict(type='Constant', val=3, bias=4))
        initialize(foonet, init_cfg)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_cnn/test_wrappers.py
================================================
from unittest.mock import patch

import pytest
import torch
import torch.nn as nn

from mmcv.cnn.bricks import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d,
                             Linear, MaxPool2d, MaxPool3d)

if torch.__version__ != 'parrots':
    torch_version = '1.1'
else:
    torch_version = 'parrots'


@patch('torch.__version__', torch_version)
@pytest.mark.parametrize(
    'in_w,in_h,in_channel,out_channel,kernel_size,stride,padding,dilation',
    [(10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 3, 3, 5, 2, 1, 2)])
def test_conv2d(in_w, in_h, in_channel, out_channel, kernel_size, stride,
                padding, dilation):
    """
    CommandLine:
        xdoctest -m tests/test_wrappers.py test_conv2d
    """
    # train mode
    # wrapper op with 0-dim input
    x_empty = torch.randn(0, in_channel, in_h, in_w)
    torch.manual_seed(0)
    wrapper = Conv2d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation)
    wrapper_out = wrapper(x_empty)

    # torch op with 3-dim input as shape reference
    x_normal = torch.randn(3, in_channel, in_h, in_w).requires_grad_(True)
    torch.manual_seed(0)
    ref = nn.Conv2d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation)
    ref_out = ref(x_normal)

    assert wrapper_out.shape[0] == 0
    assert wrapper_out.shape[1:] == ref_out.shape[1:]

    wrapper_out.sum().backward()
    assert wrapper.weight.grad is not None
    assert wrapper.weight.grad.shape == wrapper.weight.shape

    assert torch.equal(wrapper(x_normal), ref_out)

    # eval mode
    x_empty = torch.randn(0, in_channel, in_h, in_w)
    wrapper = Conv2d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation)
    wrapper.eval()
    wrapper(x_empty)


@patch('torch.__version__', torch_version)
@pytest.mark.parametrize(
    'in_w,in_h,in_t,in_channel,out_channel,kernel_size,stride,padding,dilation',  # noqa: E501
    [(10, 10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 20, 3, 3, 5, 2, 1, 2)])
def test_conv3d(in_w, in_h, in_t, in_channel, out_channel, kernel_size, stride,
                padding, dilation):
    """
    CommandLine:
        xdoctest -m tests/test_wrappers.py test_conv3d
    """
    # train mode
    # wrapper op with 0-dim input
    x_empty = torch.randn(0, in_channel, in_t, in_h, in_w)
    torch.manual_seed(0)
    wrapper = Conv3d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation)
    wrapper_out = wrapper(x_empty)

    # torch op with 3-dim input as shape reference
    x_normal = torch.randn(3, in_channel, in_t, in_h,
                           in_w).requires_grad_(True)
    torch.manual_seed(0)
    ref = nn.Conv3d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation)
    ref_out = ref(x_normal)

    assert wrapper_out.shape[0] == 0
    assert wrapper_out.shape[1:] == ref_out.shape[1:]

    wrapper_out.sum().backward()
    assert wrapper.weight.grad is not None
    assert wrapper.weight.grad.shape == wrapper.weight.shape

    assert torch.equal(wrapper(x_normal), ref_out)

    # eval mode
    x_empty = torch.randn(0, in_channel, in_t, in_h, in_w)
    wrapper = Conv3d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation)
    wrapper.eval()
    wrapper(x_empty)


@patch('torch.__version__', torch_version)
@pytest.mark.parametrize(
    'in_w,in_h,in_channel,out_channel,kernel_size,stride,padding,dilation',
    [(10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 3, 3, 5, 2, 1, 2)])
def test_conv_transposed_2d(in_w, in_h, in_channel, out_channel, kernel_size,
                            stride, padding, dilation):
    # wrapper op with 0-dim input
    x_empty = torch.randn(0, in_channel, in_h, in_w, requires_grad=True)
    # out padding must be smaller than either stride or dilation
    op = min(stride, dilation) - 1
    if torch.__version__ == 'parrots':
        op = 0
    torch.manual_seed(0)
    wrapper = ConvTranspose2d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        output_padding=op)
    wrapper_out = wrapper(x_empty)

    # torch op with 3-dim input as shape reference
    x_normal = torch.randn(3, in_channel, in_h, in_w)
    torch.manual_seed(0)
    ref = nn.ConvTranspose2d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        output_padding=op)
    ref_out = ref(x_normal)

    assert wrapper_out.shape[0] == 0
    assert wrapper_out.shape[1:] == ref_out.shape[1:]

    wrapper_out.sum().backward()
    assert wrapper.weight.grad is not None
    assert wrapper.weight.grad.shape == wrapper.weight.shape

    assert torch.equal(wrapper(x_normal), ref_out)

    # eval mode
    x_empty = torch.randn(0, in_channel, in_h, in_w)
    wrapper = ConvTranspose2d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        output_padding=op)
    wrapper.eval()
    wrapper(x_empty)


@patch('torch.__version__', torch_version)
@pytest.mark.parametrize(
    'in_w,in_h,in_t,in_channel,out_channel,kernel_size,stride,padding,dilation',  # noqa: E501
    [(10, 10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 20, 3, 3, 5, 2, 1, 2)])
def test_conv_transposed_3d(in_w, in_h, in_t, in_channel, out_channel,
                            kernel_size, stride, padding, dilation):
    # wrapper op with 0-dim input
    x_empty = torch.randn(0, in_channel, in_t, in_h, in_w, requires_grad=True)
    # out padding must be smaller than either stride or dilation
    op = min(stride, dilation) - 1
    torch.manual_seed(0)
    wrapper = ConvTranspose3d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        output_padding=op)
    wrapper_out = wrapper(x_empty)

    # torch op with 3-dim input as shape reference
    x_normal = torch.randn(3, in_channel, in_t, in_h, in_w)
    torch.manual_seed(0)
    ref = nn.ConvTranspose3d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        output_padding=op)
    ref_out = ref(x_normal)

    assert wrapper_out.shape[0] == 0
    assert wrapper_out.shape[1:] == ref_out.shape[1:]

    wrapper_out.sum().backward()
    assert wrapper.weight.grad is not None
    assert wrapper.weight.grad.shape == wrapper.weight.shape

    assert torch.equal(wrapper(x_normal), ref_out)

    # eval mode
    x_empty = torch.randn(0, in_channel, in_t, in_h, in_w)
    wrapper = ConvTranspose3d(
        in_channel,
        out_channel,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        output_padding=op)
    wrapper.eval()
    wrapper(x_empty)


@patch('torch.__version__', torch_version)
@pytest.mark.parametrize(
    'in_w,in_h,in_channel,out_channel,kernel_size,stride,padding,dilation',
    [(10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 3, 3, 5, 2, 1, 2)])
def test_max_pool_2d(in_w, in_h, in_channel, out_channel, kernel_size, stride,
                     padding, dilation):
    # wrapper op with 0-dim input
    x_empty = torch.randn(0, in_channel, in_h, in_w, requires_grad=True)
    wrapper = MaxPool2d(
        kernel_size, stride=stride, padding=padding, dilation=dilation)
    wrapper_out = wrapper(x_empty)

    # torch op with 3-dim input as shape reference
    x_normal = torch.randn(3, in_channel, in_h, in_w)
    ref = nn.MaxPool2d(
        kernel_size, stride=stride, padding=padding, dilation=dilation)
    ref_out = ref(x_normal)

    assert wrapper_out.shape[0] == 0
    assert wrapper_out.shape[1:] == ref_out.shape[1:]

    assert torch.equal(wrapper(x_normal), ref_out)


@patch('torch.__version__', torch_version)
@pytest.mark.parametrize(
    'in_w,in_h,in_t,in_channel,out_channel,kernel_size,stride,padding,dilation',  # noqa: E501
    [(10, 10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 20, 3, 3, 5, 2, 1, 2)])
@pytest.mark.skipif(
    torch.__version__ == 'parrots' and not torch.cuda.is_available(),
    reason='parrots requires CUDA support')
def test_max_pool_3d(in_w, in_h, in_t, in_channel, out_channel, kernel_size,
                     stride, padding, dilation):
    # wrapper op with 0-dim input
    x_empty = torch.randn(0, in_channel, in_t, in_h, in_w, requires_grad=True)
    wrapper = MaxPool3d(
        kernel_size, stride=stride, padding=padding, dilation=dilation)
    if torch.__version__ == 'parrots':
        x_empty = x_empty.cuda()
    wrapper_out = wrapper(x_empty)
    # torch op with 3-dim input as shape reference
    x_normal = torch.randn(3, in_channel, in_t, in_h, in_w)
    ref = nn.MaxPool3d(
        kernel_size, stride=stride, padding=padding, dilation=dilation)
    if torch.__version__ == 'parrots':
        x_normal = x_normal.cuda()
    ref_out = ref(x_normal)

    assert wrapper_out.shape[0] == 0
    assert wrapper_out.shape[1:] == ref_out.shape[1:]

    assert torch.equal(wrapper(x_normal), ref_out)


@patch('torch.__version__', torch_version)
@pytest.mark.parametrize('in_w,in_h,in_feature,out_feature', [(10, 10, 1, 1),
                                                              (20, 20, 3, 3)])
def test_linear(in_w, in_h, in_feature, out_feature):
    # wrapper op with 0-dim input
    x_empty = torch.randn(0, in_feature, requires_grad=True)
    torch.manual_seed(0)
    wrapper = Linear(in_feature, out_feature)
    wrapper_out = wrapper(x_empty)

    # torch op with 3-dim input as shape reference
    x_normal = torch.randn(3, in_feature)
    torch.manual_seed(0)
    ref = nn.Linear(in_feature, out_feature)
    ref_out = ref(x_normal)

    assert wrapper_out.shape[0] == 0
    assert wrapper_out.shape[1:] == ref_out.shape[1:]

    wrapper_out.sum().backward()
    assert wrapper.weight.grad is not None
    assert wrapper.weight.grad.shape == wrapper.weight.shape

    assert torch.equal(wrapper(x_normal), ref_out)

    # eval mode
    x_empty = torch.randn(0, in_feature)
    wrapper = Linear(in_feature, out_feature)
    wrapper.eval()
    wrapper(x_empty)


@patch('mmcv.cnn.bricks.wrappers.TORCH_VERSION', (1, 10))
def test_nn_op_forward_called():

    for m in ['Conv2d', 'ConvTranspose2d', 'MaxPool2d']:
        with patch(f'torch.nn.{m}.forward') as nn_module_forward:
            # randn input
            x_empty = torch.randn(0, 3, 10, 10)
            wrapper = eval(m)(3, 2, 1)
            wrapper(x_empty)
            nn_module_forward.assert_called_with(x_empty)

            # non-randn input
            x_normal = torch.randn(1, 3, 10, 10)
            wrapper = eval(m)(3, 2, 1)
            wrapper(x_normal)
            nn_module_forward.assert_called_with(x_normal)

    for m in ['Conv3d', 'ConvTranspose3d', 'MaxPool3d']:
        with patch(f'torch.nn.{m}.forward') as nn_module_forward:
            # randn input
            x_empty = torch.randn(0, 3, 10, 10, 10)
            wrapper = eval(m)(3, 2, 1)
            wrapper(x_empty)
            nn_module_forward.assert_called_with(x_empty)

            # non-randn input
            x_normal = torch.randn(1, 3, 10, 10, 10)
            wrapper = eval(m)(3, 2, 1)
            wrapper(x_normal)
            nn_module_forward.assert_called_with(x_normal)

    with patch('torch.nn.Linear.forward') as nn_module_forward:
        # randn input
        x_empty = torch.randn(0, 3)
        wrapper = Linear(3, 3)
        wrapper(x_empty)
        nn_module_forward.assert_called_with(x_empty)

        # non-randn input
        x_normal = torch.randn(1, 3)
        wrapper = Linear(3, 3)
        wrapper(x_normal)
        nn_module_forward.assert_called_with(x_normal)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_fileclient.py
================================================
import os
import os.path as osp
import sys
import tempfile
from contextlib import contextmanager
from copy import deepcopy
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest

import mmcv
from mmcv import BaseStorageBackend, FileClient
from mmcv.utils import has_method

sys.modules['ceph'] = MagicMock()
sys.modules['petrel_client'] = MagicMock()
sys.modules['petrel_client.client'] = MagicMock()
sys.modules['mc'] = MagicMock()


@contextmanager
def build_temporary_directory():
    """Build a temporary directory containing many files to test
    ``FileClient.list_dir_or_file``.

    . \n
    | -- dir1 \n
    | -- | -- text3.txt \n
    | -- dir2 \n
    | -- | -- dir3 \n
    | -- | -- | -- text4.txt \n
    | -- | -- img.jpg \n
    | -- text1.txt \n
    | -- text2.txt \n
    """
    with tempfile.TemporaryDirectory() as tmp_dir:
        text1 = Path(tmp_dir) / 'text1.txt'
        text1.open('w').write('text1')
        text2 = Path(tmp_dir) / 'text2.txt'
        text2.open('w').write('text2')
        dir1 = Path(tmp_dir) / 'dir1'
        dir1.mkdir()
        text3 = dir1 / 'text3.txt'
        text3.open('w').write('text3')
        dir2 = Path(tmp_dir) / 'dir2'
        dir2.mkdir()
        jpg1 = dir2 / 'img.jpg'
        jpg1.open('wb').write(b'img')
        dir3 = dir2 / 'dir3'
        dir3.mkdir()
        text4 = dir3 / 'text4.txt'
        text4.open('w').write('text4')
        yield tmp_dir


@contextmanager
def delete_and_reset_method(obj, method):
    method_obj = deepcopy(getattr(type(obj), method))
    try:
        delattr(type(obj), method)
        yield
    finally:
        setattr(type(obj), method, method_obj)


class MockS3Client:

    def __init__(self, enable_mc=True):
        self.enable_mc = enable_mc

    def Get(self, filepath):
        with open(filepath, 'rb') as f:
            content = f.read()
        return content


class MockPetrelClient:

    def __init__(self, enable_mc=True, enable_multi_cluster=False):
        self.enable_mc = enable_mc
        self.enable_multi_cluster = enable_multi_cluster

    def Get(self, filepath):
        with open(filepath, 'rb') as f:
            content = f.read()
        return content

    def put(self):
        pass

    def delete(self):
        pass

    def contains(self):
        pass

    def isdir(self):
        pass

    def list(self, dir_path):
        for entry in os.scandir(dir_path):
            if not entry.name.startswith('.') and entry.is_file():
                yield entry.name
            elif osp.isdir(entry.path):
                yield entry.name + '/'


class MockMemcachedClient:

    def __init__(self, server_list_cfg, client_cfg):
        pass

    def Get(self, filepath, buffer):
        with open(filepath, 'rb') as f:
            buffer.content = f.read()


class TestFileClient:

    @classmethod
    def setup_class(cls):
        cls.test_data_dir = Path(__file__).parent / 'data'
        cls.img_path = cls.test_data_dir / 'color.jpg'
        cls.img_shape = (300, 400, 3)
        cls.text_path = cls.test_data_dir / 'filelist.txt'

    def test_error(self):
        with pytest.raises(ValueError):
            FileClient('hadoop')

    def test_disk_backend(self):
        disk_backend = FileClient('disk')

        # test `name` attribute
        assert disk_backend.name == 'HardDiskBackend'
        # test `allow_symlink` attribute
        assert disk_backend.allow_symlink
        # test `get`
        # input path is Path object
        img_bytes = disk_backend.get(self.img_path)
        img = mmcv.imfrombytes(img_bytes)
        assert self.img_path.open('rb').read() == img_bytes
        assert img.shape == self.img_shape
        # input path is str
        img_bytes = disk_backend.get(str(self.img_path))
        img = mmcv.imfrombytes(img_bytes)
        assert self.img_path.open('rb').read() == img_bytes
        assert img.shape == self.img_shape

        # test `get_text`
        # input path is Path object
        value_buf = disk_backend.get_text(self.text_path)
        assert self.text_path.open('r').read() == value_buf
        # input path is str
        value_buf = disk_backend.get_text(str(self.text_path))
        assert self.text_path.open('r').read() == value_buf

        with tempfile.TemporaryDirectory() as tmp_dir:
            # test `put`
            filepath1 = Path(tmp_dir) / 'test.jpg'
            disk_backend.put(b'disk', filepath1)
            assert filepath1.open('rb').read() == b'disk'
            # test the `mkdir_or_exist` behavior in `put`
            _filepath1 = Path(tmp_dir) / 'not_existed_dir1' / 'test.jpg'
            disk_backend.put(b'disk', _filepath1)
            assert _filepath1.open('rb').read() == b'disk'

            # test `put_text`
            filepath2 = Path(tmp_dir) / 'test.txt'
            disk_backend.put_text('disk', filepath2)
            assert filepath2.open('r').read() == 'disk'
            # test the `mkdir_or_exist` behavior in `put_text`
            _filepath2 = Path(tmp_dir) / 'not_existed_dir2' / 'test.txt'
            disk_backend.put_text('disk', _filepath2)
            assert _filepath2.open('r').read() == 'disk'

            # test `isfile`
            assert disk_backend.isfile(filepath2)
            assert not disk_backend.isfile(Path(tmp_dir) / 'not/existed/path')

            # test `remove`
            disk_backend.remove(filepath2)

            # test `exists`
            assert not disk_backend.exists(filepath2)

            # test `get_local_path`
            # if the backend is disk, `get_local_path` just return the input
            with disk_backend.get_local_path(filepath1) as path:
                assert str(filepath1) == path
            assert osp.isfile(filepath1)

        # test `join_path`
        disk_dir = '/path/of/your/directory'
        assert disk_backend.join_path(disk_dir, 'file') == \
            osp.join(disk_dir, 'file')
        assert disk_backend.join_path(disk_dir, 'dir', 'file') == \
            osp.join(disk_dir, 'dir', 'file')

        # test `list_dir_or_file`
        with build_temporary_directory() as tmp_dir:
            # 1. list directories and files
            assert set(disk_backend.list_dir_or_file(tmp_dir)) == set(
                ['dir1', 'dir2', 'text1.txt', 'text2.txt'])
            # 2. list directories and files recursively
            assert set(disk_backend.list_dir_or_file(
                tmp_dir, recursive=True)) == set([
                    'dir1',
                    osp.join('dir1', 'text3.txt'), 'dir2',
                    osp.join('dir2', 'dir3'),
                    osp.join('dir2', 'dir3', 'text4.txt'),
                    osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt'
                ])
            # 3. only list directories
            assert set(
                disk_backend.list_dir_or_file(
                    tmp_dir, list_file=False)) == set(['dir1', 'dir2'])
            with pytest.raises(
                    TypeError,
                    match='`suffix` should be None when `list_dir` is True'):
                # Exception is raised among the `list_dir_or_file` of client,
                # so we need to invode the client to trigger the exception
                disk_backend.client.list_dir_or_file(
                    tmp_dir, list_file=False, suffix='.txt')
            # 4. only list directories recursively
            assert set(
                disk_backend.list_dir_or_file(
                    tmp_dir, list_file=False, recursive=True)) == set(
                        ['dir1', 'dir2',
                         osp.join('dir2', 'dir3')])
            # 5. only list files
            assert set(disk_backend.list_dir_or_file(
                tmp_dir, list_dir=False)) == set(['text1.txt', 'text2.txt'])
            # 6. only list files recursively
            assert set(
                disk_backend.list_dir_or_file(
                    tmp_dir, list_dir=False, recursive=True)) == set([
                        osp.join('dir1', 'text3.txt'),
                        osp.join('dir2', 'dir3', 'text4.txt'),
                        osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt'
                    ])
            # 7. only list files ending with suffix
            assert set(
                disk_backend.list_dir_or_file(
                    tmp_dir, list_dir=False,
                    suffix='.txt')) == set(['text1.txt', 'text2.txt'])
            assert set(
                disk_backend.list_dir_or_file(
                    tmp_dir, list_dir=False,
                    suffix=('.txt',
                            '.jpg'))) == set(['text1.txt', 'text2.txt'])
            with pytest.raises(
                    TypeError,
                    match='`suffix` must be a string or tuple of strings'):
                disk_backend.client.list_dir_or_file(
                    tmp_dir, list_dir=False, suffix=['.txt', '.jpg'])
            # 8. only list files ending with suffix recursively
            assert set(
                disk_backend.list_dir_or_file(
                    tmp_dir, list_dir=False, suffix='.txt',
                    recursive=True)) == set([
                        osp.join('dir1', 'text3.txt'),
                        osp.join('dir2', 'dir3', 'text4.txt'), 'text1.txt',
                        'text2.txt'
                    ])
            # 7. only list files ending with suffix
            assert set(
                disk_backend.list_dir_or_file(
                    tmp_dir,
                    list_dir=False,
                    suffix=('.txt', '.jpg'),
                    recursive=True)) == set([
                        osp.join('dir1', 'text3.txt'),
                        osp.join('dir2', 'dir3', 'text4.txt'),
                        osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt'
                    ])

    @patch('ceph.S3Client', MockS3Client)
    def test_ceph_backend(self):
        ceph_backend = FileClient('ceph')

        # test `allow_symlink` attribute
        assert not ceph_backend.allow_symlink

        # input path is Path object
        with pytest.raises(NotImplementedError):
            ceph_backend.get_text(self.text_path)
        # input path is str
        with pytest.raises(NotImplementedError):
            ceph_backend.get_text(str(self.text_path))

        # input path is Path object
        img_bytes = ceph_backend.get(self.img_path)
        img = mmcv.imfrombytes(img_bytes)
        assert img.shape == self.img_shape
        # input path is str
        img_bytes = ceph_backend.get(str(self.img_path))
        img = mmcv.imfrombytes(img_bytes)
        assert img.shape == self.img_shape

        # `path_mapping` is either None or dict
        with pytest.raises(AssertionError):
            FileClient('ceph', path_mapping=1)
        # test `path_mapping`
        ceph_path = 's3://user/data'
        ceph_backend = FileClient(
            'ceph', path_mapping={str(self.test_data_dir): ceph_path})
        ceph_backend.client._client.Get = MagicMock(
            return_value=ceph_backend.client._client.Get(self.img_path))
        img_bytes = ceph_backend.get(self.img_path)
        img = mmcv.imfrombytes(img_bytes)
        assert img.shape == self.img_shape
        ceph_backend.client._client.Get.assert_called_with(
            str(self.img_path).replace(str(self.test_data_dir), ceph_path))

    @patch('petrel_client.client.Client', MockPetrelClient)
    @pytest.mark.parametrize('backend,prefix', [('petrel', None),
                                                (None, 's3')])
    def test_petrel_backend(self, backend, prefix):
        petrel_backend = FileClient(backend=backend, prefix=prefix)

        # test `allow_symlink` attribute
        assert not petrel_backend.allow_symlink

        # input path is Path object
        img_bytes = petrel_backend.get(self.img_path)
        img = mmcv.imfrombytes(img_bytes)
        assert img.shape == self.img_shape
        # input path is str
        img_bytes = petrel_backend.get(str(self.img_path))
        img = mmcv.imfrombytes(img_bytes)
        assert img.shape == self.img_shape

        # `path_mapping` is either None or dict
        with pytest.raises(AssertionError):
            FileClient('petrel', path_mapping=1)

        # test `_map_path`
        petrel_dir = 's3://user/data'
        petrel_backend = FileClient(
            'petrel', path_mapping={str(self.test_data_dir): petrel_dir})
        assert petrel_backend.client._map_path(str(self.img_path)) == \
            str(self.img_path).replace(str(self.test_data_dir), petrel_dir)

        petrel_path = f'{petrel_dir}/test.jpg'
        petrel_backend = FileClient('petrel')

        # test `_format_path`
        assert petrel_backend.client._format_path('s3://user\\data\\test.jpg')\
            == petrel_path

        # test `get`
        with patch.object(
                petrel_backend.client._client, 'Get',
                return_value=b'petrel') as mock_get:
            assert petrel_backend.get(petrel_path) == b'petrel'
            mock_get.assert_called_once_with(petrel_path)

        # test `get_text`
        with patch.object(
                petrel_backend.client._client, 'Get',
                return_value=b'petrel') as mock_get:
            assert petrel_backend.get_text(petrel_path) == 'petrel'
            mock_get.assert_called_once_with(petrel_path)

        # test `put`
        with patch.object(petrel_backend.client._client, 'put') as mock_put:
            petrel_backend.put(b'petrel', petrel_path)
            mock_put.assert_called_once_with(petrel_path, b'petrel')

        # test `put_text`
        with patch.object(petrel_backend.client._client, 'put') as mock_put:
            petrel_backend.put_text('petrel', petrel_path)
            mock_put.assert_called_once_with(petrel_path, b'petrel')

        # test `remove`
        assert has_method(petrel_backend.client._client, 'delete')
        # raise Exception if `delete` is not implemented
        with delete_and_reset_method(petrel_backend.client._client, 'delete'):
            assert not has_method(petrel_backend.client._client, 'delete')
            with pytest.raises(NotImplementedError):
                petrel_backend.remove(petrel_path)

        with patch.object(petrel_backend.client._client,
                          'delete') as mock_delete:
            petrel_backend.remove(petrel_path)
            mock_delete.assert_called_once_with(petrel_path)

        # test `exists`
        assert has_method(petrel_backend.client._client, 'contains')
        assert has_method(petrel_backend.client._client, 'isdir')
        # raise Exception if `delete` is not implemented
        with delete_and_reset_method(petrel_backend.client._client,
                                     'contains'), delete_and_reset_method(
                                         petrel_backend.client._client,
                                         'isdir'):
            assert not has_method(petrel_backend.client._client, 'contains')
            assert not has_method(petrel_backend.client._client, 'isdir')
            with pytest.raises(NotImplementedError):
                petrel_backend.exists(petrel_path)

        with patch.object(
                petrel_backend.client._client, 'contains',
                return_value=True) as mock_contains:
            assert petrel_backend.exists(petrel_path)
            mock_contains.assert_called_once_with(petrel_path)

        # test `isdir`
        assert has_method(petrel_backend.client._client, 'isdir')
        with delete_and_reset_method(petrel_backend.client._client, 'isdir'):
            assert not has_method(petrel_backend.client._client, 'isdir')
            with pytest.raises(NotImplementedError):
                petrel_backend.isdir(petrel_path)

        with patch.object(
                petrel_backend.client._client, 'isdir',
                return_value=True) as mock_isdir:
            assert petrel_backend.isdir(petrel_dir)
            mock_isdir.assert_called_once_with(petrel_dir)

        # test `isfile`
        assert has_method(petrel_backend.client._client, 'contains')
        with delete_and_reset_method(petrel_backend.client._client,
                                     'contains'):
            assert not has_method(petrel_backend.client._client, 'contains')
            with pytest.raises(NotImplementedError):
                petrel_backend.isfile(petrel_path)

        with patch.object(
                petrel_backend.client._client, 'contains',
                return_value=True) as mock_contains:
            assert petrel_backend.isfile(petrel_path)
            mock_contains.assert_called_once_with(petrel_path)

        # test `join_path`
        assert petrel_backend.join_path(petrel_dir, 'file') == \
            f'{petrel_dir}/file'
        assert petrel_backend.join_path(f'{petrel_dir}/', 'file') == \
            f'{petrel_dir}/file'
        assert petrel_backend.join_path(petrel_dir, 'dir', 'file') == \
            f'{petrel_dir}/dir/file'

        # test `get_local_path`
        with patch.object(petrel_backend.client._client, 'Get',
                          return_value=b'petrel') as mock_get, \
             patch.object(petrel_backend.client._client, 'contains',
                          return_value=True) as mock_contains:
            with petrel_backend.get_local_path(petrel_path) as path:
                assert Path(path).open('rb').read() == b'petrel'
            # exist the with block and path will be released
            assert not osp.isfile(path)
            mock_get.assert_called_once_with(petrel_path)
            mock_contains.assert_called_once_with(petrel_path)

        # test `list_dir_or_file`
        assert has_method(petrel_backend.client._client, 'list')
        with delete_and_reset_method(petrel_backend.client._client, 'list'):
            assert not has_method(petrel_backend.client._client, 'list')
            with pytest.raises(NotImplementedError):
                list(petrel_backend.list_dir_or_file(petrel_dir))

        with build_temporary_directory() as tmp_dir:
            # 1. list directories and files
            assert set(petrel_backend.list_dir_or_file(tmp_dir)) == set(
                ['dir1', 'dir2', 'text1.txt', 'text2.txt'])
            # 2. list directories and files recursively
            assert set(
                petrel_backend.list_dir_or_file(
                    tmp_dir, recursive=True)) == set([
                        'dir1', '/'.join(('dir1', 'text3.txt')), 'dir2',
                        '/'.join(('dir2', 'dir3')), '/'.join(
                            ('dir2', 'dir3', 'text4.txt')), '/'.join(
                                ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt'
                    ])
            # 3. only list directories
            assert set(
                petrel_backend.list_dir_or_file(
                    tmp_dir, list_file=False)) == set(['dir1', 'dir2'])
            with pytest.raises(
                    TypeError,
                    match=('`list_dir` should be False when `suffix` is not '
                           'None')):
                # Exception is raised among the `list_dir_or_file` of client,
                # so we need to invode the client to trigger the exception
                petrel_backend.client.list_dir_or_file(
                    tmp_dir, list_file=False, suffix='.txt')
            # 4. only list directories recursively
            assert set(
                petrel_backend.list_dir_or_file(
                    tmp_dir, list_file=False, recursive=True)) == set(
                        ['dir1', 'dir2', '/'.join(('dir2', 'dir3'))])
            # 5. only list files
            assert set(
                petrel_backend.list_dir_or_file(tmp_dir,
                                                list_dir=False)) == set(
                                                    ['text1.txt', 'text2.txt'])
            # 6. only list files recursively
            assert set(
                petrel_backend.list_dir_or_file(
                    tmp_dir, list_dir=False, recursive=True)) == set([
                        '/'.join(('dir1', 'text3.txt')), '/'.join(
                            ('dir2', 'dir3', 'text4.txt')), '/'.join(
                                ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt'
                    ])
            # 7. only list files ending with suffix
            assert set(
                petrel_backend.list_dir_or_file(
                    tmp_dir, list_dir=False,
                    suffix='.txt')) == set(['text1.txt', 'text2.txt'])
            assert set(
                petrel_backend.list_dir_or_file(
                    tmp_dir, list_dir=False,
                    suffix=('.txt',
                            '.jpg'))) == set(['text1.txt', 'text2.txt'])
            with pytest.raises(
                    TypeError,
                    match='`suffix` must be a string or tuple of strings'):
                petrel_backend.client.list_dir_or_file(
                    tmp_dir, list_dir=False, suffix=['.txt', '.jpg'])
            # 8. only list files ending with suffix recursively
            assert set(
                petrel_backend.list_dir_or_file(
                    tmp_dir, list_dir=False, suffix='.txt',
                    recursive=True)) == set([
                        '/'.join(('dir1', 'text3.txt')), '/'.join(
                            ('dir2', 'dir3', 'text4.txt')), 'text1.txt',
                        'text2.txt'
                    ])
            # 7. only list files ending with suffix
            assert set(
                petrel_backend.list_dir_or_file(
                    tmp_dir,
                    list_dir=False,
                    suffix=('.txt', '.jpg'),
                    recursive=True)) == set([
                        '/'.join(('dir1', 'text3.txt')), '/'.join(
                            ('dir2', 'dir3', 'text4.txt')), '/'.join(
                                ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt'
                    ])

    @patch('mc.MemcachedClient.GetInstance', MockMemcachedClient)
    @patch('mc.pyvector', MagicMock)
    @patch('mc.ConvertBuffer', lambda x: x.content)
    def test_memcached_backend(self):
        mc_cfg = dict(server_list_cfg='', client_cfg='', sys_path=None)
        mc_backend = FileClient('memcached', **mc_cfg)

        # test `allow_symlink` attribute
        assert not mc_backend.allow_symlink

        # input path is Path object
        with pytest.raises(NotImplementedError):
            mc_backend.get_text(self.text_path)
        # input path is str
        with pytest.raises(NotImplementedError):
            mc_backend.get_text(str(self.text_path))

        # input path is Path object
        img_bytes = mc_backend.get(self.img_path)
        img = mmcv.imfrombytes(img_bytes)
        assert img.shape == self.img_shape
        # input path is str
        img_bytes = mc_backend.get(str(self.img_path))
        img = mmcv.imfrombytes(img_bytes)
        assert img.shape == self.img_shape

    def test_lmdb_backend(self):
        lmdb_path = self.test_data_dir / 'demo.lmdb'

        # db_path is Path object
        lmdb_backend = FileClient('lmdb', db_path=lmdb_path)

        # test `allow_symlink` attribute
        assert not lmdb_backend.allow_symlink

        with pytest.raises(NotImplementedError):
            lmdb_backend.get_text(self.text_path)

        img_bytes = lmdb_backend.get('baboon')
        img = mmcv.imfrombytes(img_bytes)
        assert img.shape == (120, 125, 3)

        # db_path is str
        lmdb_backend = FileClient('lmdb', db_path=str(lmdb_path))
        with pytest.raises(NotImplementedError):
            lmdb_backend.get_text(str(self.text_path))
        img_bytes = lmdb_backend.get('baboon')
        img = mmcv.imfrombytes(img_bytes)
        assert img.shape == (120, 125, 3)

    @pytest.mark.parametrize('backend,prefix', [('http', None),
                                                (None, 'http')])
    def test_http_backend(self, backend, prefix):
        http_backend = FileClient(backend=backend, prefix=prefix)
        img_url = 'https://raw.githubusercontent.com/open-mmlab/mmcv/' \
            'master/tests/data/color.jpg'
        text_url = 'https://raw.githubusercontent.com/open-mmlab/mmcv/' \
            'master/tests/data/filelist.txt'

        # test `allow_symlink` attribute
        assert not http_backend.allow_symlink

        # input is path or Path object
        with pytest.raises(Exception):
            http_backend.get(self.img_path)
        with pytest.raises(Exception):
            http_backend.get(str(self.img_path))
        with pytest.raises(Exception):
            http_backend.get_text(self.text_path)
        with pytest.raises(Exception):
            http_backend.get_text(str(self.text_path))

        # input url is http image
        img_bytes = http_backend.get(img_url)
        img = mmcv.imfrombytes(img_bytes)
        assert img.shape == self.img_shape

        # input url is http text
        value_buf = http_backend.get_text(text_url)
        assert self.text_path.open('r').read() == value_buf

        # test `_get_local_path`
        # exist the with block and path will be released
        with http_backend.get_local_path(img_url) as path:
            assert mmcv.imread(path).shape == self.img_shape
        assert not osp.isfile(path)

    def test_new_magic_method(self):

        class DummyBackend1(BaseStorageBackend):

            def get(self, filepath):
                return filepath

            def get_text(self, filepath, encoding='utf-8'):
                return filepath

        FileClient.register_backend('dummy_backend', DummyBackend1)
        client1 = FileClient(backend='dummy_backend')
        client2 = FileClient(backend='dummy_backend')
        assert client1 is client2

        # if a backend is overwrote, it will disable the singleton pattern for
        # the backend
        class DummyBackend2(BaseStorageBackend):

            def get(self, filepath):
                pass

            def get_text(self, filepath):
                pass

        FileClient.register_backend('dummy_backend', DummyBackend2, force=True)
        client3 = FileClient(backend='dummy_backend')
        client4 = FileClient(backend='dummy_backend')
        assert client3 is not client4

    def test_parse_uri_prefix(self):
        # input path is None
        with pytest.raises(AssertionError):
            FileClient.parse_uri_prefix(None)
        # input path is list
        with pytest.raises(AssertionError):
            FileClient.parse_uri_prefix([])

        # input path is Path object
        assert FileClient.parse_uri_prefix(self.img_path) is None
        # input path is str
        assert FileClient.parse_uri_prefix(str(self.img_path)) is None

        # input path starts with https
        img_url = 'https://raw.githubusercontent.com/open-mmlab/mmcv/' \
            'master/tests/data/color.jpg'
        assert FileClient.parse_uri_prefix(img_url) == 'https'

        # input path starts with s3
        img_url = 's3://your_bucket/img.png'
        assert FileClient.parse_uri_prefix(img_url) == 's3'

        # input path starts with clusterName:s3
        img_url = 'clusterName:s3://your_bucket/img.png'
        assert FileClient.parse_uri_prefix(img_url) == 's3'

    def test_infer_client(self):
        # HardDiskBackend
        file_client_args = {'backend': 'disk'}
        client = FileClient.infer_client(file_client_args)
        assert client.name == 'HardDiskBackend'
        client = FileClient.infer_client(uri=self.img_path)
        assert client.name == 'HardDiskBackend'

        # PetrelBackend
        file_client_args = {'backend': 'petrel'}
        client = FileClient.infer_client(file_client_args)
        assert client.name == 'PetrelBackend'
        uri = 's3://user_data'
        client = FileClient.infer_client(uri=uri)
        assert client.name == 'PetrelBackend'

    def test_register_backend(self):

        # name must be a string
        with pytest.raises(TypeError):

            class TestClass1:
                pass

            FileClient.register_backend(1, TestClass1)

        # module must be a class
        with pytest.raises(TypeError):
            FileClient.register_backend('int', 0)

        # module must be a subclass of BaseStorageBackend
        with pytest.raises(TypeError):

            class TestClass1:
                pass

            FileClient.register_backend('TestClass1', TestClass1)

        class ExampleBackend(BaseStorageBackend):

            def get(self, filepath):
                return filepath

            def get_text(self, filepath, encoding='utf-8'):
                return filepath

        FileClient.register_backend('example', ExampleBackend)
        example_backend = FileClient('example')
        assert example_backend.get(self.img_path) == self.img_path
        assert example_backend.get_text(self.text_path) == self.text_path
        assert 'example' in FileClient._backends

        class Example2Backend(BaseStorageBackend):

            def get(self, filepath):
                return b'bytes2'

            def get_text(self, filepath, encoding='utf-8'):
                return 'text2'

        # force=False
        with pytest.raises(KeyError):
            FileClient.register_backend('example', Example2Backend)

        FileClient.register_backend('example', Example2Backend, force=True)
        example_backend = FileClient('example')
        assert example_backend.get(self.img_path) == b'bytes2'
        assert example_backend.get_text(self.text_path) == 'text2'

        @FileClient.register_backend(name='example3')
        class Example3Backend(BaseStorageBackend):

            def get(self, filepath):
                return b'bytes3'

            def get_text(self, filepath, encoding='utf-8'):
                return 'text3'

        example_backend = FileClient('example3')
        assert example_backend.get(self.img_path) == b'bytes3'
        assert example_backend.get_text(self.text_path) == 'text3'
        assert 'example3' in FileClient._backends

        # force=False
        with pytest.raises(KeyError):

            @FileClient.register_backend(name='example3')
            class Example4Backend(BaseStorageBackend):

                def get(self, filepath):
                    return b'bytes4'

                def get_text(self, filepath, encoding='utf-8'):
                    return 'text4'

        @FileClient.register_backend(name='example3', force=True)
        class Example5Backend(BaseStorageBackend):

            def get(self, filepath):
                return b'bytes5'

            def get_text(self, filepath, encoding='utf-8'):
                return 'text5'

        example_backend = FileClient('example3')
        assert example_backend.get(self.img_path) == b'bytes5'
        assert example_backend.get_text(self.text_path) == 'text5'

        # prefixes is a str
        class Example6Backend(BaseStorageBackend):

            def get(self, filepath):
                return b'bytes6'

            def get_text(self, filepath, encoding='utf-8'):
                return 'text6'

        FileClient.register_backend(
            'example4',
            Example6Backend,
            force=True,
            prefixes='example4_prefix')
        example_backend = FileClient('example4')
        assert example_backend.get(self.img_path) == b'bytes6'
        assert example_backend.get_text(self.text_path) == 'text6'
        example_backend = FileClient(prefix='example4_prefix')
        assert example_backend.get(self.img_path) == b'bytes6'
        assert example_backend.get_text(self.text_path) == 'text6'
        example_backend = FileClient('example4', prefix='example4_prefix')
        assert example_backend.get(self.img_path) == b'bytes6'
        assert example_backend.get_text(self.text_path) == 'text6'

        # prefixes is a list of str
        class Example7Backend(BaseStorageBackend):

            def get(self, filepath):
                return b'bytes7'

            def get_text(self, filepath, encoding='utf-8'):
                return 'text7'

        FileClient.register_backend(
            'example5',
            Example7Backend,
            force=True,
            prefixes=['example5_prefix1', 'example5_prefix2'])
        example_backend = FileClient('example5')
        assert example_backend.get(self.img_path) == b'bytes7'
        assert example_backend.get_text(self.text_path) == 'text7'
        example_backend = FileClient(prefix='example5_prefix1')
        assert example_backend.get(self.img_path) == b'bytes7'
        assert example_backend.get_text(self.text_path) == 'text7'
        example_backend = FileClient(prefix='example5_prefix2')
        assert example_backend.get(self.img_path) == b'bytes7'
        assert example_backend.get_text(self.text_path) == 'text7'

        # backend has a higher priority than prefixes
        class Example8Backend(BaseStorageBackend):

            def get(self, filepath):
                return b'bytes8'

            def get_text(self, filepath, encoding='utf-8'):
                return 'text8'

        FileClient.register_backend(
            'example6',
            Example8Backend,
            force=True,
            prefixes='example6_prefix')
        example_backend = FileClient('example6')
        assert example_backend.get(self.img_path) == b'bytes8'
        assert example_backend.get_text(self.text_path) == 'text8'
        example_backend = FileClient('example6', prefix='example4_prefix')
        assert example_backend.get(self.img_path) == b'bytes8'
        assert example_backend.get_text(self.text_path) == 'text8'


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_fileio.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import sys
import tempfile
from unittest.mock import MagicMock, patch

import pytest

import mmcv
from mmcv.fileio.file_client import HTTPBackend, PetrelBackend

sys.modules['petrel_client'] = MagicMock()
sys.modules['petrel_client.client'] = MagicMock()


def _test_handler(file_format, test_obj, str_checker, mode='r+'):
    # dump to a string
    dump_str = mmcv.dump(test_obj, file_format=file_format)
    str_checker(dump_str)

    # load/dump with filenames from disk
    tmp_filename = osp.join(tempfile.gettempdir(), 'mmcv_test_dump')
    mmcv.dump(test_obj, tmp_filename, file_format=file_format)
    assert osp.isfile(tmp_filename)
    load_obj = mmcv.load(tmp_filename, file_format=file_format)
    assert load_obj == test_obj
    os.remove(tmp_filename)

    # load/dump with filename from petrel
    method = 'put' if 'b' in mode else 'put_text'
    with patch.object(PetrelBackend, method, return_value=None) as mock_method:
        filename = 's3://path/of/your/file'
        mmcv.dump(test_obj, filename, file_format=file_format)
    mock_method.assert_called()

    # json load/dump with a file-like object
    with tempfile.NamedTemporaryFile(mode, delete=False) as f:
        tmp_filename = f.name
        mmcv.dump(test_obj, f, file_format=file_format)
    assert osp.isfile(tmp_filename)
    with open(tmp_filename, mode) as f:
        load_obj = mmcv.load(f, file_format=file_format)
    assert load_obj == test_obj
    os.remove(tmp_filename)

    # automatically inference the file format from the given filename
    tmp_filename = osp.join(tempfile.gettempdir(),
                            'mmcv_test_dump.' + file_format)
    mmcv.dump(test_obj, tmp_filename)
    assert osp.isfile(tmp_filename)
    load_obj = mmcv.load(tmp_filename)
    assert load_obj == test_obj
    os.remove(tmp_filename)


obj_for_test = [{'a': 'abc', 'b': 1}, 2, 'c']


def test_json():

    def json_checker(dump_str):
        assert dump_str in [
            '[{"a": "abc", "b": 1}, 2, "c"]', '[{"b": 1, "a": "abc"}, 2, "c"]'
        ]

    _test_handler('json', obj_for_test, json_checker)


def test_yaml():

    def yaml_checker(dump_str):
        assert dump_str in [
            '- {a: abc, b: 1}\n- 2\n- c\n', '- {b: 1, a: abc}\n- 2\n- c\n',
            '- a: abc\n  b: 1\n- 2\n- c\n', '- b: 1\n  a: abc\n- 2\n- c\n'
        ]

    _test_handler('yaml', obj_for_test, yaml_checker)


def test_pickle():

    def pickle_checker(dump_str):
        import pickle
        assert pickle.loads(dump_str) == obj_for_test

    _test_handler('pickle', obj_for_test, pickle_checker, mode='rb+')


def test_exception():
    test_obj = [{'a': 'abc', 'b': 1}, 2, 'c']

    with pytest.raises(ValueError):
        mmcv.dump(test_obj)

    with pytest.raises(TypeError):
        mmcv.dump(test_obj, 'tmp.txt')


def test_register_handler():

    @mmcv.register_handler('txt')
    class TxtHandler1(mmcv.BaseFileHandler):

        def load_from_fileobj(self, file):
            return file.read()

        def dump_to_fileobj(self, obj, file):
            file.write(str(obj))

        def dump_to_str(self, obj, **kwargs):
            return str(obj)

    @mmcv.register_handler(['txt1', 'txt2'])
    class TxtHandler2(mmcv.BaseFileHandler):

        def load_from_fileobj(self, file):
            return file.read()

        def dump_to_fileobj(self, obj, file):
            file.write('\n')
            file.write(str(obj))

        def dump_to_str(self, obj, **kwargs):
            return str(obj)

    content = mmcv.load(osp.join(osp.dirname(__file__), 'data/filelist.txt'))
    assert content == '1.jpg\n2.jpg\n3.jpg\n4.jpg\n5.jpg'
    tmp_filename = osp.join(tempfile.gettempdir(), 'mmcv_test.txt2')
    mmcv.dump(content, tmp_filename)
    with open(tmp_filename, 'r') as f:
        written = f.read()
    os.remove(tmp_filename)
    assert written == '\n' + content


def test_list_from_file():
    # get list from disk
    filename = osp.join(osp.dirname(__file__), 'data/filelist.txt')
    filelist = mmcv.list_from_file(filename)
    assert filelist == ['1.jpg', '2.jpg', '3.jpg', '4.jpg', '5.jpg']
    filelist = mmcv.list_from_file(filename, prefix='a/')
    assert filelist == ['a/1.jpg', 'a/2.jpg', 'a/3.jpg', 'a/4.jpg', 'a/5.jpg']
    filelist = mmcv.list_from_file(filename, offset=2)
    assert filelist == ['3.jpg', '4.jpg', '5.jpg']
    filelist = mmcv.list_from_file(filename, max_num=2)
    assert filelist == ['1.jpg', '2.jpg']
    filelist = mmcv.list_from_file(filename, offset=3, max_num=3)
    assert filelist == ['4.jpg', '5.jpg']

    # get list from http
    with patch.object(
            HTTPBackend, 'get_text', return_value='1.jpg\n2.jpg\n3.jpg'):
        filename = 'http://path/of/your/file'
        filelist = mmcv.list_from_file(
            filename, file_client_args={'backend': 'http'})
        assert filelist == ['1.jpg', '2.jpg', '3.jpg']
        filelist = mmcv.list_from_file(
            filename, file_client_args={'prefix': 'http'})
        assert filelist == ['1.jpg', '2.jpg', '3.jpg']
        filelist = mmcv.list_from_file(filename)
        assert filelist == ['1.jpg', '2.jpg', '3.jpg']

    # get list from petrel
    with patch.object(
            PetrelBackend, 'get_text', return_value='1.jpg\n2.jpg\n3.jpg'):
        filename = 's3://path/of/your/file'
        filelist = mmcv.list_from_file(
            filename, file_client_args={'backend': 'petrel'})
        assert filelist == ['1.jpg', '2.jpg', '3.jpg']
        filelist = mmcv.list_from_file(
            filename, file_client_args={'prefix': 's3'})
        assert filelist == ['1.jpg', '2.jpg', '3.jpg']
        filelist = mmcv.list_from_file(filename)
        assert filelist == ['1.jpg', '2.jpg', '3.jpg']


def test_dict_from_file():
    # get dict from disk
    filename = osp.join(osp.dirname(__file__), 'data/mapping.txt')
    mapping = mmcv.dict_from_file(filename)
    assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}
    mapping = mmcv.dict_from_file(filename, key_type=int)
    assert mapping == {1: 'cat', 2: ['dog', 'cow'], 3: 'panda'}

    # get dict from http
    with patch.object(
            HTTPBackend, 'get_text', return_value='1 cat\n2 dog cow\n3 panda'):
        filename = 'http://path/of/your/file'
        mapping = mmcv.dict_from_file(
            filename, file_client_args={'backend': 'http'})
        assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}
        mapping = mmcv.dict_from_file(
            filename, file_client_args={'prefix': 'http'})
        assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}
        mapping = mmcv.dict_from_file(filename)
        assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}

    # get dict from petrel
    with patch.object(
            PetrelBackend, 'get_text',
            return_value='1 cat\n2 dog cow\n3 panda'):
        filename = 's3://path/of/your/file'
        mapping = mmcv.dict_from_file(
            filename, file_client_args={'backend': 'petrel'})
        assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}
        mapping = mmcv.dict_from_file(
            filename, file_client_args={'prefix': 's3'})
        assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}
        mapping = mmcv.dict_from_file(filename)
        assert mapping == {'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'}


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_image/test_colorspace.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np
import pytest
from numpy.testing import assert_array_almost_equal, assert_array_equal

import mmcv
from mmcv.image.colorspace import (_convert_input_type_range,
                                   _convert_output_type_range)


def test_bgr2gray():
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.bgr2gray(in_img)
    computed_gray = (
        in_img[:, :, 0] * 0.114 + in_img[:, :, 1] * 0.587 +
        in_img[:, :, 2] * 0.299)
    assert_array_almost_equal(out_img, computed_gray, decimal=4)
    out_img_3d = mmcv.bgr2gray(in_img, True)
    assert out_img_3d.shape == (10, 10, 1)
    assert_array_almost_equal(out_img_3d[..., 0], out_img, decimal=4)


def test_rgb2gray():
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.rgb2gray(in_img)
    computed_gray = (
        in_img[:, :, 0] * 0.299 + in_img[:, :, 1] * 0.587 +
        in_img[:, :, 2] * 0.114)
    assert_array_almost_equal(out_img, computed_gray, decimal=4)
    out_img_3d = mmcv.rgb2gray(in_img, True)
    assert out_img_3d.shape == (10, 10, 1)
    assert_array_almost_equal(out_img_3d[..., 0], out_img, decimal=4)


def test_gray2bgr():
    in_img = np.random.rand(10, 10).astype(np.float32)
    out_img = mmcv.gray2bgr(in_img)
    assert out_img.shape == (10, 10, 3)
    for i in range(3):
        assert_array_almost_equal(out_img[..., i], in_img, decimal=4)


def test_gray2rgb():
    in_img = np.random.rand(10, 10).astype(np.float32)
    out_img = mmcv.gray2rgb(in_img)
    assert out_img.shape == (10, 10, 3)
    for i in range(3):
        assert_array_almost_equal(out_img[..., i], in_img, decimal=4)


def test_bgr2rgb():
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.bgr2rgb(in_img)
    assert out_img.shape == in_img.shape
    assert_array_equal(out_img[..., 0], in_img[..., 2])
    assert_array_equal(out_img[..., 1], in_img[..., 1])
    assert_array_equal(out_img[..., 2], in_img[..., 0])


def test_rgb2bgr():
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.rgb2bgr(in_img)
    assert out_img.shape == in_img.shape
    assert_array_equal(out_img[..., 0], in_img[..., 2])
    assert_array_equal(out_img[..., 1], in_img[..., 1])
    assert_array_equal(out_img[..., 2], in_img[..., 0])


def test_bgr2hsv():
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.bgr2hsv(in_img)
    argmax = in_img.argmax(axis=2)
    computed_hsv = np.empty_like(in_img)
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            b, g, r = in_img[i, j]
            v = max(r, g, b)
            s = (v - min(r, g, b)) / v if v != 0 else 0
            if argmax[i, j] == 0:
                h = 240 + 60 * (r - g) / (v - min(r, g, b))
            elif argmax[i, j] == 1:
                h = 120 + 60 * (b - r) / (v - min(r, g, b))
            else:
                h = 60 * (g - b) / (v - min(r, g, b))
            if h < 0:
                h += 360
            computed_hsv[i, j, :] = [h, s, v]
    assert_array_almost_equal(out_img, computed_hsv, decimal=2)


def test_convert_input_type_range():
    with pytest.raises(TypeError):
        # The img type should be np.float32 or np.uint8
        in_img = np.random.rand(10, 10, 3).astype(np.uint64)
        _convert_input_type_range(in_img)
    # np.float32
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = _convert_input_type_range(in_img)
    assert out_img.dtype == np.float32
    assert np.absolute(out_img).mean() < 1
    # np.uint8
    in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8)
    out_img = _convert_input_type_range(in_img)
    assert out_img.dtype == np.float32
    assert np.absolute(out_img).mean() < 1


def test_convert_output_type_range():
    with pytest.raises(TypeError):
        # The dst_type should be np.float32 or np.uint8
        in_img = np.random.rand(10, 10, 3).astype(np.float32)
        _convert_output_type_range(in_img, np.uint64)
    # np.float32
    in_img = (np.random.rand(10, 10, 3) * 255).astype(np.float32)
    out_img = _convert_output_type_range(in_img, np.float32)
    assert out_img.dtype == np.float32
    assert np.absolute(out_img).mean() < 1
    # np.uint8
    in_img = (np.random.rand(10, 10, 3) * 255).astype(np.float32)
    out_img = _convert_output_type_range(in_img, np.uint8)
    assert out_img.dtype == np.uint8
    assert np.absolute(out_img).mean() > 1


def assert_image_almost_equal(x, y, atol=1):
    assert x.dtype == np.uint8
    assert y.dtype == np.uint8
    assert np.all(np.abs(x.astype(np.int32) - y.astype(np.int32)) <= atol)


def test_rgb2ycbcr():
    with pytest.raises(TypeError):
        # The img type should be np.float32 or np.uint8
        in_img = np.random.rand(10, 10, 3).astype(np.uint64)
        mmcv.rgb2ycbcr(in_img)

    # float32
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.rgb2ycbcr(in_img)
    computed_ycbcr = np.empty_like(in_img)
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            r, g, b = in_img[i, j]
            y = 16 + r * 65.481 + g * 128.553 + b * 24.966
            cb = 128 - r * 37.797 - g * 74.203 + b * 112.0
            cr = 128 + r * 112.0 - g * 93.786 - b * 18.214
            computed_ycbcr[i, j, :] = [y, cb, cr]
    computed_ycbcr /= 255.
    assert_array_almost_equal(out_img, computed_ycbcr, decimal=2)
    # y_only=True
    out_img = mmcv.rgb2ycbcr(in_img, y_only=True)
    computed_y = np.empty_like(out_img, dtype=out_img.dtype)
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            r, g, b = in_img[i, j]
            y = 16 + r * 65.481 + g * 128.553 + b * 24.966
            computed_y[i, j] = y
    computed_y /= 255.
    assert_array_almost_equal(out_img, computed_y, decimal=2)

    # uint8
    in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8)
    out_img = mmcv.rgb2ycbcr(in_img)
    computed_ycbcr = np.empty_like(in_img)
    in_img = in_img / 255.
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            r, g, b = in_img[i, j]
            y = 16 + r * 65.481 + g * 128.553 + b * 24.966
            cb = 128 - r * 37.797 - g * 74.203 + b * 112.0
            cr = 128 + r * 112.0 - g * 93.786 - b * 18.214
            y, cb, cr = y.round(), cb.round(), cr.round()
            computed_ycbcr[i, j, :] = [y, cb, cr]
    assert_image_almost_equal(out_img, computed_ycbcr)
    # y_only=True
    in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8)
    out_img = mmcv.rgb2ycbcr(in_img, y_only=True)
    computed_y = np.empty_like(out_img, dtype=out_img.dtype)
    in_img = in_img / 255.
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            r, g, b = in_img[i, j]
            y = 16 + r * 65.481 + g * 128.553 + b * 24.966
            y = y.round()
            computed_y[i, j] = y
    assert_image_almost_equal(out_img, computed_y)


def test_bgr2ycbcr():
    # float32
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.bgr2ycbcr(in_img)
    computed_ycbcr = np.empty_like(in_img)
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            b, g, r = in_img[i, j]
            y = 16 + r * 65.481 + g * 128.553 + b * 24.966
            cb = 128 - r * 37.797 - g * 74.203 + b * 112.0
            cr = 128 + r * 112.0 - g * 93.786 - b * 18.214
            computed_ycbcr[i, j, :] = [y, cb, cr]
    computed_ycbcr /= 255.
    assert_array_almost_equal(out_img, computed_ycbcr, decimal=2)
    # y_only=True
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.bgr2ycbcr(in_img, y_only=True)
    computed_y = np.empty_like(out_img, dtype=out_img.dtype)
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            b, g, r = in_img[i, j]
            y = 16 + r * 65.481 + g * 128.553 + b * 24.966
            computed_y[i, j] = y
    computed_y /= 255.
    assert_array_almost_equal(out_img, computed_y, decimal=2)

    # uint8
    in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8)
    out_img = mmcv.bgr2ycbcr(in_img)
    computed_ycbcr = np.empty_like(in_img)
    in_img = in_img / 255.
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            b, g, r = in_img[i, j]
            y = 16 + r * 65.481 + g * 128.553 + b * 24.966
            cb = 128 - r * 37.797 - g * 74.203 + b * 112.0
            cr = 128 + r * 112.0 - g * 93.786 - b * 18.214
            y, cb, cr = y.round(), cb.round(), cr.round()
            computed_ycbcr[i, j, :] = [y, cb, cr]
    assert_image_almost_equal(out_img, computed_ycbcr)
    # y_only = True
    in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8)
    out_img = mmcv.bgr2ycbcr(in_img, y_only=True)
    computed_y = np.empty_like(out_img, dtype=out_img.dtype)
    in_img = in_img / 255.
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            b, g, r = in_img[i, j]
            y = 16 + r * 65.481 + g * 128.553 + b * 24.966
            y = y.round()
            computed_y[i, j] = y
    assert_image_almost_equal(out_img, computed_y)


def test_ycbcr2rgb():
    with pytest.raises(TypeError):
        # The img type should be np.float32 or np.uint8
        in_img = np.random.rand(10, 10, 3).astype(np.uint64)
        mmcv.ycbcr2rgb(in_img)

    # float32
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.ycbcr2rgb(in_img)
    computed_rgb = np.empty_like(in_img)
    in_img *= 255.
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            y, cb, cr = in_img[i, j]
            r = -222.921 + y * 0.00456621 * 255 + cr * 0.00625893 * 255
            g = 135.576 + y * 0.00456621 * 255 - cb * 0.00153632 * 255 - \
                cr * 0.00318811 * 255
            b = -276.836 + y * 0.00456621 * 255. + cb * 0.00791071 * 255
            computed_rgb[i, j, :] = [r, g, b]
    computed_rgb /= 255.
    assert_array_almost_equal(out_img, computed_rgb, decimal=2)

    # uint8
    in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8)
    out_img = mmcv.ycbcr2rgb(in_img)
    computed_rgb = np.empty_like(in_img)
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            y, cb, cr = in_img[i, j]
            r = -222.921 + y * 0.00456621 * 255 + cr * 0.00625893 * 255
            g = 135.576 + y * 0.00456621 * 255 - cb * 0.00153632 * 255 - \
                cr * 0.00318811 * 255
            b = -276.836 + y * 0.00456621 * 255. + cb * 0.00791071 * 255
            r, g, b = r.round(), g.round(), b.round()
            computed_rgb[i, j, :] = [r, g, b]
    assert_image_almost_equal(out_img, computed_rgb)


def test_ycbcr2bgr():
    # float32
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.ycbcr2bgr(in_img)
    computed_bgr = np.empty_like(in_img)
    in_img *= 255.
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            y, cb, cr = in_img[i, j]
            r = -222.921 + y * 0.00456621 * 255 + cr * 0.00625893 * 255
            g = 135.576 + y * 0.00456621 * 255 - cb * 0.00153632 * 255 - \
                cr * 0.00318811 * 255
            b = -276.836 + y * 0.00456621 * 255. + cb * 0.00791071 * 255
            computed_bgr[i, j, :] = [b, g, r]
    computed_bgr /= 255.
    assert_array_almost_equal(out_img, computed_bgr, decimal=2)

    # uint8
    in_img = (np.random.rand(10, 10, 3) * 255).astype(np.uint8)
    out_img = mmcv.ycbcr2bgr(in_img)
    computed_bgr = np.empty_like(in_img)
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            y, cb, cr = in_img[i, j]
            r = -222.921 + y * 0.00456621 * 255 + cr * 0.00625893 * 255
            g = 135.576 + y * 0.00456621 * 255 - cb * 0.00153632 * 255 - \
                cr * 0.00318811 * 255
            b = -276.836 + y * 0.00456621 * 255. + cb * 0.00791071 * 255
            r, g, b = r.round(), g.round(), b.round()
            computed_bgr[i, j, :] = [b, g, r]
    assert_image_almost_equal(out_img, computed_bgr)


def test_bgr2hls():
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    out_img = mmcv.bgr2hls(in_img)
    argmax = in_img.argmax(axis=2)
    computed_hls = np.empty_like(in_img)
    for i in range(in_img.shape[0]):
        for j in range(in_img.shape[1]):
            b, g, r = in_img[i, j]
            maxc = max(r, g, b)
            minc = min(r, g, b)
            _l = (minc + maxc) / 2.0
            if minc == maxc:
                h = 0.0
                s = 0.0
            if _l <= 0.5:
                s = (maxc - minc) / (maxc + minc)
            else:
                s = (maxc - minc) / (2.0 - maxc - minc)
            if argmax[i, j] == 2:
                h = 60 * (g - b) / (maxc - minc)
            elif argmax[i, j] == 1:
                h = 60 * (2.0 + (b - r) / (maxc - minc))
            else:
                h = 60 * (4.0 + (r - g) / (maxc - minc))
            if h < 0:
                h += 360
            computed_hls[i, j, :] = [h, _l, s]
    assert_array_almost_equal(out_img, computed_hls, decimal=2)


@pytest.mark.parametrize('src,dst,ref', [('bgr', 'gray', cv2.COLOR_BGR2GRAY),
                                         ('rgb', 'gray', cv2.COLOR_RGB2GRAY),
                                         ('bgr', 'rgb', cv2.COLOR_BGR2RGB),
                                         ('rgb', 'bgr', cv2.COLOR_RGB2BGR),
                                         ('bgr', 'hsv', cv2.COLOR_BGR2HSV),
                                         ('hsv', 'bgr', cv2.COLOR_HSV2BGR),
                                         ('bgr', 'hls', cv2.COLOR_BGR2HLS),
                                         ('hls', 'bgr', cv2.COLOR_HLS2BGR)])
def test_imconvert(src, dst, ref):
    img = np.random.rand(10, 10, 3).astype(np.float32)
    assert_array_equal(mmcv.imconvert(img, src, dst), cv2.cvtColor(img, ref))


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_image/test_geometric.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp

import cv2
import numpy as np
import pytest
from numpy.testing import assert_array_equal

import mmcv


class TestGeometric:

    @classmethod
    def setup_class(cls):
        cls.data_dir = osp.join(osp.dirname(__file__), '../data')
        # the test img resolution is 400x300
        cls.img_path = osp.join(cls.data_dir, 'color.jpg')
        cls.img = cv2.imread(cls.img_path)

    def test_imresize(self):
        resized_img = mmcv.imresize(self.img, (1000, 600))
        assert resized_img.shape == (600, 1000, 3)
        resized_img, w_scale, h_scale = mmcv.imresize(self.img, (1000, 600),
                                                      True)
        assert (resized_img.shape == (600, 1000, 3) and w_scale == 2.5
                and h_scale == 2.0)
        resized_img_dst = np.empty((600, 1000, 3), dtype=self.img.dtype)
        resized_img = mmcv.imresize(self.img, (1000, 600), out=resized_img_dst)
        assert id(resized_img_dst) == id(resized_img)
        assert_array_equal(resized_img_dst,
                           mmcv.imresize(self.img, (1000, 600)))
        for mode in ['nearest', 'bilinear', 'bicubic', 'area', 'lanczos']:
            resized_img = mmcv.imresize(
                self.img, (1000, 600), interpolation=mode)
            assert resized_img.shape == (600, 1000, 3)

        # test pillow resize
        for mode in [
                'nearest', 'bilinear', 'bicubic', 'box', 'lanczos', 'hamming'
        ]:
            resized_img = mmcv.imresize(
                self.img, (1000, 600), interpolation=mode, backend='pillow')
            assert resized_img.shape == (600, 1000, 3)

        # resize backend must be 'cv2' or 'pillow'
        with pytest.raises(ValueError):
            mmcv.imresize(self.img, (1000, 600), backend='not support')

    def test_imresize_to_multiple(self):
        # test size and keep_ratio = False
        resized_img = mmcv.imresize_to_multiple(
            self.img, divisor=16, size=(511, 513), keep_ratio=False)
        assert resized_img.shape == (528, 512, 3)
        resized_img = mmcv.imresize_to_multiple(
            self.img, divisor=(16, 32), size=(511, 513), keep_ratio=False)
        assert resized_img.shape == (544, 512, 3)

        # test size, keep_ratio = True, and return_scale
        resized_img, w_scale, h_scale = mmcv.imresize_to_multiple(
            self.img,
            divisor=16,
            size=(1000, 600),
            keep_ratio=True,
            return_scale=True)
        assert resized_img.shape == (
            608, 800, 3) and h_scale == 608 / 300 and w_scale == 800 / 400
        resized_img, w_scale, h_scale = mmcv.imresize_to_multiple(
            self.img,
            divisor=(18, 16),
            size=(1000, 600),
            keep_ratio=True,
            return_scale=True)
        assert resized_img.shape == (
            608, 810, 3) and h_scale == 608 / 300 and w_scale == 810 / 400

        # test scale_factor and return_scale
        resized_img, w_scale, h_scale = mmcv.imresize_to_multiple(
            self.img, divisor=16, scale_factor=2, return_scale=True)
        assert resized_img.shape == (
            608, 800, 3) and h_scale == 608 / 300 and w_scale == 800 / 400
        resized_img, w_scale, h_scale = mmcv.imresize_to_multiple(
            self.img, divisor=16, scale_factor=(2, 3), return_scale=True)
        assert resized_img.shape == (
            912, 800, 3) and h_scale == 912 / 300 and w_scale == 800 / 400
        resized_img, w_scale, h_scale = mmcv.imresize_to_multiple(
            self.img, divisor=(18, 16), scale_factor=(2, 3), return_scale=True)
        assert resized_img.shape == (
            912, 810, 3) and h_scale == 912 / 300 and w_scale == 810 / 400

        # one of size and scale_factor should be given
        with pytest.raises(ValueError):
            mmcv.imresize_to_multiple(
                self.img, divisor=16, size=(1000, 600), scale_factor=2)
        with pytest.raises(ValueError):
            mmcv.imresize_to_multiple(
                self.img, divisor=16, size=None, scale_factor=None)

    def test_imresize_like(self):
        a = np.zeros((100, 200, 3))
        resized_img = mmcv.imresize_like(self.img, a)
        assert resized_img.shape == (100, 200, 3)

    def test_rescale_size(self):
        new_size, scale_factor = mmcv.rescale_size((400, 300), 1.5, True)
        assert new_size == (600, 450) and scale_factor == 1.5
        new_size, scale_factor = mmcv.rescale_size((400, 300), 0.934, True)
        assert new_size == (374, 280) and scale_factor == 0.934

        new_size = mmcv.rescale_size((400, 300), 1.5)
        assert new_size == (600, 450)
        new_size = mmcv.rescale_size((400, 300), 0.934)
        assert new_size == (374, 280)

        new_size, scale_factor = mmcv.rescale_size((400, 300), (1000, 600),
                                                   True)
        assert new_size == (800, 600) and scale_factor == 2.0
        new_size, scale_factor = mmcv.rescale_size((400, 300), (180, 200),
                                                   True)
        assert new_size == (200, 150) and scale_factor == 0.5

        new_size = mmcv.rescale_size((400, 300), (1000, 600))
        assert new_size == (800, 600)
        new_size = mmcv.rescale_size((400, 300), (180, 200))
        assert new_size == (200, 150)

        with pytest.raises(ValueError):
            mmcv.rescale_size((400, 300), -0.5)
        with pytest.raises(TypeError):
            mmcv.rescale_size()((400, 300), [100, 100])

    def test_imrescale(self):
        # rescale by a certain factor
        resized_img = mmcv.imrescale(self.img, 1.5)
        assert resized_img.shape == (450, 600, 3)
        resized_img = mmcv.imrescale(self.img, 0.934)
        assert resized_img.shape == (280, 374, 3)

        # rescale by a certain max_size
        # resize (400, 300) to (max_1000, max_600)
        resized_img = mmcv.imrescale(self.img, (1000, 600))
        assert resized_img.shape == (600, 800, 3)
        resized_img, scale = mmcv.imrescale(
            self.img, (1000, 600), return_scale=True)
        assert resized_img.shape == (600, 800, 3) and scale == 2.0
        # resize (400, 300) to (max_200, max_180)
        resized_img = mmcv.imrescale(self.img, (180, 200))
        assert resized_img.shape == (150, 200, 3)
        resized_img, scale = mmcv.imrescale(
            self.img, (180, 200), return_scale=True)
        assert resized_img.shape == (150, 200, 3) and scale == 0.5

        # test exceptions
        with pytest.raises(ValueError):
            mmcv.imrescale(self.img, -0.5)
        with pytest.raises(TypeError):
            mmcv.imrescale(self.img, [100, 100])

    def test_imflip(self):
        # direction must be "horizontal" or "vertical" or "diagonal"
        with pytest.raises(AssertionError):
            mmcv.imflip(np.random.rand(80, 60, 3), direction='random')

        # test horizontal flip (color image)
        img = np.random.rand(80, 60, 3)
        h, w, c = img.shape
        flipped_img = mmcv.imflip(img)
        assert flipped_img.shape == img.shape
        for i in range(h):
            for j in range(w):
                for k in range(c):
                    assert flipped_img[i, j, k] == img[i, w - 1 - j, k]

        # test vertical flip (color image)
        flipped_img = mmcv.imflip(img, direction='vertical')
        assert flipped_img.shape == img.shape
        for i in range(h):
            for j in range(w):
                for k in range(c):
                    assert flipped_img[i, j, k] == img[h - 1 - i, j, k]

        # test diagonal flip (color image)
        flipped_img = mmcv.imflip(img, direction='diagonal')
        assert flipped_img.shape == img.shape
        for i in range(h):
            for j in range(w):
                for k in range(c):
                    assert flipped_img[i, j, k] == img[h - 1 - i, w - 1 - j, k]

        # test horizontal flip (grayscale image)
        img = np.random.rand(80, 60)
        h, w = img.shape
        flipped_img = mmcv.imflip(img)
        assert flipped_img.shape == img.shape
        for i in range(h):
            for j in range(w):
                assert flipped_img[i, j] == img[i, w - 1 - j]

        # test vertical flip (grayscale image)
        flipped_img = mmcv.imflip(img, direction='vertical')
        assert flipped_img.shape == img.shape
        for i in range(h):
            for j in range(w):
                assert flipped_img[i, j] == img[h - 1 - i, j]

        # test diagonal flip (grayscale image)
        flipped_img = mmcv.imflip(img, direction='diagonal')
        assert flipped_img.shape == img.shape
        for i in range(h):
            for j in range(w):
                assert flipped_img[i, j] == img[h - 1 - i, w - 1 - j]

    def test_imflip_(self):
        # direction must be "horizontal" or "vertical" or "diagonal"
        with pytest.raises(AssertionError):
            mmcv.imflip_(np.random.rand(80, 60, 3), direction='random')

        # test horizontal flip (color image)
        img = np.random.rand(80, 60, 3)
        h, w, c = img.shape
        img_for_flip = img.copy()
        flipped_img = mmcv.imflip_(img_for_flip)
        assert flipped_img.shape == img.shape
        assert flipped_img.shape == img_for_flip.shape
        assert id(flipped_img) == id(img_for_flip)
        for i in range(h):
            for j in range(w):
                for k in range(c):
                    assert flipped_img[i, j, k] == img[i, w - 1 - j, k]
                    assert flipped_img[i, j, k] == img_for_flip[i, j, k]

        # test vertical flip (color image)
        img_for_flip = img.copy()
        flipped_img = mmcv.imflip_(img_for_flip, direction='vertical')
        assert flipped_img.shape == img.shape
        assert flipped_img.shape == img_for_flip.shape
        assert id(flipped_img) == id(img_for_flip)
        for i in range(h):
            for j in range(w):
                for k in range(c):
                    assert flipped_img[i, j, k] == img[h - 1 - i, j, k]
                    assert flipped_img[i, j, k] == img_for_flip[i, j, k]

        # test diagonal flip (color image)
        img_for_flip = img.copy()
        flipped_img = mmcv.imflip_(img_for_flip, direction='diagonal')
        assert flipped_img.shape == img.shape
        assert flipped_img.shape == img_for_flip.shape
        assert id(flipped_img) == id(img_for_flip)
        for i in range(h):
            for j in range(w):
                for k in range(c):
                    assert flipped_img[i, j, k] == img[h - 1 - i, w - 1 - j, k]
                    assert flipped_img[i, j, k] == img_for_flip[i, j, k]

        # test horizontal flip (grayscale image)
        img = np.random.rand(80, 60)
        h, w = img.shape
        img_for_flip = img.copy()
        flipped_img = mmcv.imflip_(img_for_flip)
        assert flipped_img.shape == img.shape
        assert flipped_img.shape == img_for_flip.shape
        assert id(flipped_img) == id(img_for_flip)
        for i in range(h):
            for j in range(w):
                assert flipped_img[i, j] == img[i, w - 1 - j]
                assert flipped_img[i, j] == img_for_flip[i, j]

        # test vertical flip (grayscale image)
        img_for_flip = img.copy()
        flipped_img = mmcv.imflip_(img_for_flip, direction='vertical')
        assert flipped_img.shape == img.shape
        assert flipped_img.shape == img_for_flip.shape
        assert id(flipped_img) == id(img_for_flip)
        for i in range(h):
            for j in range(w):
                assert flipped_img[i, j] == img[h - 1 - i, j]
                assert flipped_img[i, j] == img_for_flip[i, j]

        # test diagonal flip (grayscale image)
        img_for_flip = img.copy()
        flipped_img = mmcv.imflip_(img_for_flip, direction='diagonal')
        assert flipped_img.shape == img.shape
        assert flipped_img.shape == img_for_flip.shape
        assert id(flipped_img) == id(img_for_flip)
        for i in range(h):
            for j in range(w):
                assert flipped_img[i, j] == img[h - 1 - i, w - 1 - j]
                assert flipped_img[i, j] == img_for_flip[i, j]

    def test_imcrop(self):
        # yapf: disable
        bboxes = np.array([[100, 100, 199, 199],  # center
                           [0, 0, 150, 100],  # left-top corner
                           [250, 200, 399, 299],  # right-bottom corner
                           [0, 100, 399, 199],  # wide
                           [150, 0, 299, 299]])  # tall
        # yapf: enable

        # crop one bbox
        patch = mmcv.imcrop(self.img, bboxes[0, :])
        patches = mmcv.imcrop(self.img, bboxes[[0], :])
        assert patch.shape == (100, 100, 3)
        patch_path = osp.join(self.data_dir, 'patches')
        ref_patch = np.load(patch_path + '/0.npy')
        assert_array_equal(patch, ref_patch)
        assert isinstance(patches, list) and len(patches) == 1
        assert_array_equal(patches[0], ref_patch)

        # crop with no scaling and padding
        patches = mmcv.imcrop(self.img, bboxes)
        assert len(patches) == bboxes.shape[0]
        for i in range(len(patches)):
            ref_patch = np.load(patch_path + f'/{i}.npy')
            assert_array_equal(patches[i], ref_patch)

        # crop with scaling and no padding
        patches = mmcv.imcrop(self.img, bboxes, 1.2)
        for i in range(len(patches)):
            ref_patch = np.load(patch_path + f'/scale_{i}.npy')
            assert_array_equal(patches[i], ref_patch)

        # crop with scaling and padding
        patches = mmcv.imcrop(self.img, bboxes, 1.2, pad_fill=[255, 255, 0])
        for i in range(len(patches)):
            ref_patch = np.load(patch_path + f'/pad_{i}.npy')
            assert_array_equal(patches[i], ref_patch)
        patches = mmcv.imcrop(self.img, bboxes, 1.2, pad_fill=0)
        for i in range(len(patches)):
            ref_patch = np.load(patch_path + f'/pad0_{i}.npy')
            assert_array_equal(patches[i], ref_patch)

    def test_impad(self):
        # grayscale image
        img = np.random.rand(10, 10).astype(np.float32)
        padded_img = mmcv.impad(img, padding=(0, 0, 2, 5), pad_val=0)
        assert_array_equal(img, padded_img[:10, :10])
        assert_array_equal(
            np.zeros((5, 12), dtype='float32'), padded_img[10:, :])
        assert_array_equal(
            np.zeros((15, 2), dtype='float32'), padded_img[:, 10:])

        # RGB image
        img = np.random.rand(10, 10, 3).astype(np.float32)
        padded_img = mmcv.impad(img, padding=(0, 0, 2, 5), pad_val=0)
        assert_array_equal(img, padded_img[:10, :10, :])
        assert_array_equal(
            np.zeros((5, 12, 3), dtype='float32'), padded_img[10:, :, :])
        assert_array_equal(
            np.zeros((15, 2, 3), dtype='float32'), padded_img[:, 10:, :])

        # RGB image with different values for three channels.
        img = np.random.randint(256, size=(10, 10, 3)).astype('uint8')
        padded_img = mmcv.impad(
            img, padding=(0, 0, 2, 5), pad_val=(100, 110, 120))
        assert_array_equal(img, padded_img[:10, :10, :])
        assert_array_equal(
            np.array([100, 110, 120], dtype='uint8') * np.ones(
                (5, 12, 3), dtype='uint8'), padded_img[10:, :, :])
        assert_array_equal(
            np.array([100, 110, 120], dtype='uint8') * np.ones(
                (15, 2, 3), dtype='uint8'), padded_img[:, 10:, :])

        # Pad the grayscale image to shape (15, 12)
        img = np.random.rand(10, 10).astype(np.float32)
        padded_img = mmcv.impad(img, shape=(15, 12))
        assert_array_equal(img, padded_img[:10, :10])
        assert_array_equal(
            np.zeros((5, 12), dtype='float32'), padded_img[10:, :])
        assert_array_equal(
            np.zeros((15, 2), dtype='float32'), padded_img[:, 10:])

        # Pad the RGB image to shape (15, 12)
        img = np.random.rand(10, 10, 3).astype(np.float32)
        padded_img = mmcv.impad(img, shape=(15, 12))
        assert_array_equal(img, padded_img[:10, :10, :])
        assert_array_equal(
            np.zeros((5, 12, 3), dtype='float32'), padded_img[10:, :, :])
        assert_array_equal(
            np.zeros((15, 2, 3), dtype='float32'), padded_img[:, 10:, :])

        # Pad the RGB image to shape (15, 12) with different values for
        # three channels.
        img = np.random.randint(256, size=(10, 10, 3)).astype('uint8')
        padded_img = mmcv.impad(img, shape=(15, 12), pad_val=(100, 110, 120))
        assert_array_equal(img, padded_img[:10, :10, :])
        assert_array_equal(
            np.array([100, 110, 120], dtype='uint8') * np.ones(
                (5, 12, 3), dtype='uint8'), padded_img[10:, :, :])
        assert_array_equal(
            np.array([100, 110, 120], dtype='uint8') * np.ones(
                (15, 2, 3), dtype='uint8'), padded_img[:, 10:, :])

        # RGB image with padding=[5, 2]
        img = np.random.rand(10, 10, 3).astype(np.float32)
        padded_img = mmcv.impad(img, padding=(5, 2), pad_val=0)

        assert padded_img.shape == (14, 20, 3)
        assert_array_equal(img, padded_img[2:12, 5:15, :])
        assert_array_equal(
            np.zeros((2, 5, 3), dtype='float32'), padded_img[:2, :5, :])
        assert_array_equal(
            np.zeros((2, 5, 3), dtype='float32'), padded_img[12:, :5, :])
        assert_array_equal(
            np.zeros((2, 5, 3), dtype='float32'), padded_img[:2, 15:, :])
        assert_array_equal(
            np.zeros((2, 5, 3), dtype='float32'), padded_img[12:, 15:, :])

        # RGB image with type(pad_val) = tuple
        pad_val = (0, 1, 2)
        img = np.random.rand(10, 10, 3).astype(np.float32)
        padded_img = mmcv.impad(img, padding=(0, 0, 5, 2), pad_val=pad_val)

        assert padded_img.shape == (12, 15, 3)
        assert_array_equal(img, padded_img[:10, :10, :])
        assert_array_equal(pad_val[0] * np.ones((2, 15, 1), dtype='float32'),
                           padded_img[10:, :, 0:1])
        assert_array_equal(pad_val[1] * np.ones((2, 15, 1), dtype='float32'),
                           padded_img[10:, :, 1:2])
        assert_array_equal(pad_val[2] * np.ones((2, 15, 1), dtype='float32'),
                           padded_img[10:, :, 2:3])

        assert_array_equal(pad_val[0] * np.ones((12, 5, 1), dtype='float32'),
                           padded_img[:, 10:, 0:1])
        assert_array_equal(pad_val[1] * np.ones((12, 5, 1), dtype='float32'),
                           padded_img[:, 10:, 1:2])
        assert_array_equal(pad_val[2] * np.ones((12, 5, 1), dtype='float32'),
                           padded_img[:, 10:, 2:3])

        # test different padding mode with channel number = 3
        for mode in ['constant', 'edge', 'reflect', 'symmetric']:
            img = np.random.rand(10, 10, 3).astype(np.float32)
            padded_img = mmcv.impad(
                img, padding=(0, 0, 5, 2), pad_val=pad_val, padding_mode=mode)
            assert padded_img.shape == (12, 15, 3)

        # test different padding mode with channel number = 1
        for mode in ['constant', 'edge', 'reflect', 'symmetric']:
            img = np.random.rand(10, 10).astype(np.float32)
            padded_img = mmcv.impad(
                img, padding=(0, 0, 5, 2), pad_val=0, padding_mode=mode)
            assert padded_img.shape == (12, 15)

        # Padding must be a int or a 2, or 4 element tuple.
        with pytest.raises(ValueError):
            mmcv.impad(img, padding=(1, 1, 1))

        # pad_val must be a int or a tuple
        with pytest.raises(TypeError):
            mmcv.impad(img, padding=(1, 1, 1, 1), pad_val='wrong')

        # When pad_val is a tuple,
        # len(pad_val) should be equal to img.shape[-1]
        img = np.random.rand(10, 10, 3).astype(np.float32)
        with pytest.raises(AssertionError):
            mmcv.impad(img, padding=3, pad_val=(100, 200))

        with pytest.raises(AssertionError):
            mmcv.impad(img, padding=2, pad_val=0, padding_mode='unknown')

        with pytest.raises(AssertionError):
            mmcv.impad(img, shape=(12, 15), padding=(0, 0, 5, 2))

    def test_impad_to_multiple(self):
        img = np.random.rand(11, 14, 3).astype(np.float32)
        padded_img = mmcv.impad_to_multiple(img, 4)
        assert padded_img.shape == (12, 16, 3)
        img = np.random.rand(20, 12).astype(np.float32)
        padded_img = mmcv.impad_to_multiple(img, 5)
        assert padded_img.shape == (20, 15)
        img = np.random.rand(20, 12).astype(np.float32)
        padded_img = mmcv.impad_to_multiple(img, 2)
        assert padded_img.shape == (20, 12)

    def test_cutout(self):
        img = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.uint8)

        # shape must be int or tuple
        with pytest.raises(AssertionError):
            mmcv.cutout(img, 2.5)
        # pad_val must be int or float or tuple with the same length
        # of img channels
        with pytest.raises(AssertionError):
            mmcv.cutout(img, 1, (1, 2, 3))
        with pytest.raises(TypeError):
            mmcv.cutout(img, 1, None)

        # test cutout the whole img
        assert_array_equal(mmcv.cutout(img, 6), np.zeros_like(img))
        # test not cutout
        assert_array_equal(mmcv.cutout(img, 0), img)
        # test cutout when shape is int
        np.random.seed(0)
        img_cutout = np.array([[1, 2, 3], [4, 0, 6], [7, 8,
                                                      9]]).astype(np.uint8)
        assert_array_equal(mmcv.cutout(img, 1), img_cutout)
        img_cutout = np.array([[1, 2, 3], [4, 10, 6], [7, 8,
                                                       9]]).astype(np.uint8)
        assert_array_equal(mmcv.cutout(img, 1, pad_val=10), img_cutout)
        # test cutout when shape is tuple
        np.random.seed(0)
        img_cutout = np.array([[1, 2, 3], [0, 0, 6], [7, 8,
                                                      9]]).astype(np.uint8)
        assert_array_equal(mmcv.cutout(img, (1, 2)), img_cutout)
        img_cutout = np.array([[1, 2, 3], [10, 10, 6], [7, 8,
                                                        9]]).astype(np.uint8)
        assert_array_equal(mmcv.cutout(img, (1, 2), pad_val=10), img_cutout)

    def test_imrotate(self):
        img = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.uint8)
        assert_array_equal(mmcv.imrotate(img, 0), img)
        img_r = np.array([[7, 4, 1], [8, 5, 2], [9, 6, 3]])
        assert_array_equal(mmcv.imrotate(img, 90), img_r)
        img_r = np.array([[3, 6, 9], [2, 5, 8], [1, 4, 7]])
        assert_array_equal(mmcv.imrotate(img, -90), img_r)

        img = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]).astype(np.uint8)
        img_r = np.array([[0, 6, 2, 0], [0, 7, 3, 0]])
        assert_array_equal(mmcv.imrotate(img, 90), img_r)
        img_r = np.array([[1, 0, 0, 0], [2, 0, 0, 0]])
        assert_array_equal(mmcv.imrotate(img, 90, center=(0, 0)), img_r)
        img_r = np.array([[255, 6, 2, 255], [255, 7, 3, 255]])
        assert_array_equal(mmcv.imrotate(img, 90, border_value=255), img_r)
        img_r = np.array([[5, 1], [6, 2], [7, 3], [8, 4]])
        assert_array_equal(mmcv.imrotate(img, 90, auto_bound=True), img_r)

        with pytest.raises(ValueError):
            mmcv.imrotate(img, 90, center=(0, 0), auto_bound=True)

    def test_imshear(self):
        img = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.uint8)
        assert_array_equal(mmcv.imshear(img, 0), img)
        # magnitude=1, horizontal
        img_sheared = np.array([[1, 2, 3], [0, 4, 5], [0, 0, 7]],
                               dtype=np.uint8)
        assert_array_equal(mmcv.imshear(img, 1), img_sheared)
        # magnitude=-1, vertical
        img_sheared = np.array([[1, 5, 9], [4, 8, 0], [7, 0, 0]],
                               dtype=np.uint8)
        assert_array_equal(mmcv.imshear(img, -1, 'vertical'), img_sheared)
        # magnitude=1, vertical, borderValue=100
        borderValue = 100
        img_sheared = np.array(
            [[1, borderValue, borderValue], [4, 2, borderValue], [7, 5, 3]],
            dtype=np.uint8)
        assert_array_equal(
            mmcv.imshear(img, 1, 'vertical', borderValue), img_sheared)
        # magnitude=1, vertical, borderValue=100, img shape (h,w,3)
        img = np.stack([img, img, img], axis=-1)
        img_sheared = np.stack([img_sheared, img_sheared, img_sheared],
                               axis=-1)
        assert_array_equal(
            mmcv.imshear(img, 1, 'vertical', borderValue), img_sheared)
        # test tuple format of borderValue
        assert_array_equal(
            mmcv.imshear(img, 1, 'vertical',
                         (borderValue, borderValue, borderValue)), img_sheared)

        # test invalid length of borderValue
        with pytest.raises(AssertionError):
            mmcv.imshear(img, 0.5, 'horizontal', (borderValue, ))

        # test invalid type of borderValue
        with pytest.raises(ValueError):
            mmcv.imshear(img, 0.5, 'horizontal', [borderValue])

        # test invalid value of direction
        with pytest.raises(AssertionError):
            mmcv.imshear(img, 0.5, 'diagonal')

    def test_imtranslate(self):
        img = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.uint8)
        assert_array_equal(mmcv.imtranslate(img, 0), img)
        # offset=1, horizontal
        img_translated = np.array([[128, 1, 2], [128, 4, 5], [128, 7, 8]],
                                  dtype=np.uint8)
        assert_array_equal(
            mmcv.imtranslate(img, 1, border_value=128), img_translated)
        # offset=-1, vertical
        img_translated = np.array([[4, 5, 6], [7, 8, 9], [0, 0, 0]],
                                  dtype=np.uint8)
        assert_array_equal(
            mmcv.imtranslate(img, -1, 'vertical'), img_translated)
        # offset=-2, horizontal
        img = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.uint8)
        img = np.stack([img, img, img], axis=-1)
        img_translated = [[3, 4, 128, 128], [7, 8, 128, 128]]
        img_translated = np.stack(
            [img_translated, img_translated, img_translated], axis=-1)
        assert_array_equal(
            mmcv.imtranslate(img, -2, border_value=128), img_translated)
        # offset=2, vertical
        border_value = (110, 120, 130)
        img_translated = np.stack([
            np.ones((2, 4)) * border_value[0],
            np.ones((2, 4)) * border_value[1],
            np.ones((2, 4)) * border_value[2]
        ],
                                  axis=-1).astype(np.uint8)
        assert_array_equal(
            mmcv.imtranslate(img, 2, 'vertical', border_value), img_translated)
        # test invalid number elements in border_value
        with pytest.raises(AssertionError):
            mmcv.imtranslate(img, 1, border_value=(1, ))
        # test invalid type of border_value
        with pytest.raises(ValueError):
            mmcv.imtranslate(img, 1, border_value=[1, 2, 3])
        # test invalid value of direction
        with pytest.raises(AssertionError):
            mmcv.imtranslate(img, 1, 'diagonal')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_image/test_image_misc.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import pytest
from numpy.testing import assert_array_equal

import mmcv

try:
    import torch
except ImportError:
    torch = None


@pytest.mark.skipif(torch is None, reason='requires torch library')
def test_tensor2imgs():

    # test tensor obj
    with pytest.raises(AssertionError):
        tensor = np.random.rand(2, 3, 3)
        mmcv.tensor2imgs(tensor)

    # test tensor ndim
    with pytest.raises(AssertionError):
        tensor = torch.randn(2, 3, 3)
        mmcv.tensor2imgs(tensor)

    # test tensor dim-1
    with pytest.raises(AssertionError):
        tensor = torch.randn(2, 4, 3, 3)
        mmcv.tensor2imgs(tensor)

    # test mean length
    with pytest.raises(AssertionError):
        tensor = torch.randn(2, 3, 5, 5)
        mmcv.tensor2imgs(tensor, mean=(1, ))
        tensor = torch.randn(2, 1, 5, 5)
        mmcv.tensor2imgs(tensor, mean=(0, 0, 0))

    # test std length
    with pytest.raises(AssertionError):
        tensor = torch.randn(2, 3, 5, 5)
        mmcv.tensor2imgs(tensor, std=(1, ))
        tensor = torch.randn(2, 1, 5, 5)
        mmcv.tensor2imgs(tensor, std=(1, 1, 1))

    # test to_rgb
    with pytest.raises(AssertionError):
        tensor = torch.randn(2, 1, 5, 5)
        mmcv.tensor2imgs(tensor, mean=(0, ), std=(1, ), to_rgb=True)

    # test rgb=True
    tensor = torch.randn(2, 3, 5, 5)
    gts = [
        t.cpu().numpy().transpose(1, 2, 0).astype(np.uint8)
        for t in tensor.flip(1)
    ]
    outputs = mmcv.tensor2imgs(tensor, to_rgb=True)
    for gt, output in zip(gts, outputs):
        assert_array_equal(gt, output)

    # test rgb=False
    tensor = torch.randn(2, 3, 5, 5)
    gts = [t.cpu().numpy().transpose(1, 2, 0).astype(np.uint8) for t in tensor]
    outputs = mmcv.tensor2imgs(tensor, to_rgb=False)
    for gt, output in zip(gts, outputs):
        assert_array_equal(gt, output)

    # test tensor channel 1 and rgb=False
    tensor = torch.randn(2, 1, 5, 5)
    gts = [t.squeeze(0).cpu().numpy().astype(np.uint8) for t in tensor]
    outputs = mmcv.tensor2imgs(tensor, to_rgb=False)
    for gt, output in zip(gts, outputs):
        assert_array_equal(gt, output)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_image/test_io.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import sys
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch

import cv2
import numpy as np
import pytest
from numpy.testing import assert_allclose, assert_array_equal

import mmcv
from mmcv.fileio.file_client import HTTPBackend, PetrelBackend


class TestIO:

    @classmethod
    def setup_class(cls):
        cls.data_dir = osp.join(osp.dirname(__file__), '../data')
        # the test img resolution is 400x300
        cls.img_path = osp.join(cls.data_dir, 'color.jpg')
        cls.img_path_obj = Path(cls.img_path)
        cls.gray_img_path = osp.join(cls.data_dir, 'grayscale.jpg')
        cls.gray_img_path_obj = Path(cls.gray_img_path)
        cls.gray_img_dim3_path = osp.join(cls.data_dir, 'grayscale_dim3.jpg')
        cls.gray_alpha_img_path = osp.join(cls.data_dir, 'gray_alpha.png')
        cls.palette_img_path = osp.join(cls.data_dir, 'palette.gif')
        cls.exif_img_path = osp.join(cls.data_dir, 'color_exif.jpg')
        cls.img = cv2.imread(cls.img_path)
        cls.tiff_path = osp.join(cls.data_dir, 'uint16-5channel.tif')
        # petrel s3 path
        cls.s3_path = 's3://path/of/your/file.jpg'
        # http path
        cls.http_path = 'http://path/of/your/file.jpg'
        # add mock package
        sys.modules['petrel_client'] = MagicMock()
        sys.modules['petrel_client.client'] = MagicMock()

    @classmethod
    def teardown_class(cls):
        # clean instances avoid to influence other unittest
        mmcv.FileClient._instances = {}

    def assert_img_equal(self, img, ref_img, ratio_thr=0.999):
        assert img.shape == ref_img.shape
        assert img.dtype == ref_img.dtype
        area = ref_img.shape[0] * ref_img.shape[1]
        diff = np.abs(img.astype('int32') - ref_img.astype('int32'))
        assert np.sum(diff <= 1) / float(area) > ratio_thr

    def test_imread(self):
        # backend cv2
        mmcv.use_backend('cv2')

        # HardDiskBackend
        img_cv2_color_bgr = mmcv.imread(self.img_path)
        assert img_cv2_color_bgr.shape == (300, 400, 3)
        img_cv2_color_rgb = mmcv.imread(self.img_path, channel_order='rgb')
        assert img_cv2_color_rgb.shape == (300, 400, 3)
        assert_array_equal(img_cv2_color_rgb[:, :, ::-1], img_cv2_color_bgr)
        img_cv2_grayscale1 = mmcv.imread(self.img_path, 'grayscale')
        assert img_cv2_grayscale1.shape == (300, 400)
        img_cv2_grayscale2 = mmcv.imread(self.gray_img_path)
        assert img_cv2_grayscale2.shape == (300, 400, 3)
        img_cv2_unchanged = mmcv.imread(self.gray_img_path, 'unchanged')
        assert img_cv2_unchanged.shape == (300, 400)
        img_cv2_unchanged = mmcv.imread(img_cv2_unchanged)
        assert_array_equal(img_cv2_unchanged, mmcv.imread(img_cv2_unchanged))

        img_cv2_color_bgr = mmcv.imread(self.img_path_obj)
        assert img_cv2_color_bgr.shape == (300, 400, 3)
        img_cv2_color_rgb = mmcv.imread(self.img_path_obj, channel_order='rgb')
        assert img_cv2_color_rgb.shape == (300, 400, 3)
        assert_array_equal(img_cv2_color_rgb[:, :, ::-1], img_cv2_color_bgr)
        img_cv2_grayscale1 = mmcv.imread(self.img_path_obj, 'grayscale')
        assert img_cv2_grayscale1.shape == (300, 400)
        img_cv2_grayscale2 = mmcv.imread(self.gray_img_path_obj)
        assert img_cv2_grayscale2.shape == (300, 400, 3)
        img_cv2_unchanged = mmcv.imread(self.gray_img_path_obj, 'unchanged')
        assert img_cv2_unchanged.shape == (300, 400)
        with pytest.raises(TypeError):
            mmcv.imread(1)

        # PetrelBackend
        img_cv2_color_bgr = mmcv.imread(self.img_path)
        with patch.object(
                PetrelBackend, 'get',
                return_value=img_cv2_color_bgr) as mock_method:
            img_cv2_color_bgr_petrel = mmcv.imread(self.s3_path, backend='cv2')
            img_cv2_color_bgr_petrel_with_args = mmcv.imread(
                self.s3_path,
                backend='cv2',
                file_client_args={'backend': 'petrel'})
            mock_method.assert_called()
            assert_array_equal(img_cv2_color_bgr_petrel,
                               img_cv2_color_bgr_petrel_with_args)

        # HTTPBackend
        img_cv2_color_bgr = mmcv.imread(self.img_path)
        with patch.object(
                HTTPBackend, 'get',
                return_value=img_cv2_color_bgr) as mock_method:
            img_cv2_color_bgr_http = mmcv.imread(self.http_path, backend='cv2')
            img_cv2_color_bgr_http_with_args = mmcv.imread(
                self.http_path,
                backend='cv2',
                file_client_args={'backend': 'http'})
            mock_method.assert_called()
            assert_array_equal(img_cv2_color_bgr_http,
                               img_cv2_color_bgr_http_with_args)

        with pytest.raises(FileNotFoundError):
            mmcv.imread('/not/exists/' + self.img_path)

        # test arg backend pillow
        img_pil_gray_alpha = mmcv.imread(
            self.gray_alpha_img_path, 'grayscale', backend='pillow')
        assert img_pil_gray_alpha.shape == (400, 500)
        mean = img_pil_gray_alpha[300:, 400:].mean()
        assert_allclose(img_pil_gray_alpha[300:, 400:] - mean, 0)
        img_pil_gray_alpha = mmcv.imread(
            self.gray_alpha_img_path, backend='pillow')
        mean = img_pil_gray_alpha[300:, 400:].mean(axis=(0, 1))
        assert_allclose(img_pil_gray_alpha[300:, 400:] - mean, 0)
        assert img_pil_gray_alpha.shape == (400, 500, 3)
        img_pil_gray_alpha = mmcv.imread(
            self.gray_alpha_img_path, 'unchanged', backend='pillow')
        assert img_pil_gray_alpha.shape == (400, 500, 2)
        img_pil_palette = mmcv.imread(
            self.palette_img_path, 'grayscale', backend='pillow')
        assert img_pil_palette.shape == (300, 400)
        img_pil_palette = mmcv.imread(self.palette_img_path, backend='pillow')
        assert img_pil_palette.shape == (300, 400, 3)
        img_pil_palette = mmcv.imread(
            self.palette_img_path, 'unchanged', backend='pillow')
        assert img_pil_palette.shape == (300, 400)

        # backend pillow
        mmcv.use_backend('pillow')
        img_pil_grayscale1 = mmcv.imread(self.img_path, 'grayscale')
        assert img_pil_grayscale1.shape == (300, 400)
        img_pil_gray_alpha = mmcv.imread(self.gray_alpha_img_path, 'grayscale')
        assert img_pil_gray_alpha.shape == (400, 500)
        mean = img_pil_gray_alpha[300:, 400:].mean()
        assert_allclose(img_pil_gray_alpha[300:, 400:] - mean, 0)
        img_pil_gray_alpha = mmcv.imread(self.gray_alpha_img_path)
        mean = img_pil_gray_alpha[300:, 400:].mean(axis=(0, 1))
        assert_allclose(img_pil_gray_alpha[300:, 400:] - mean, 0)
        assert img_pil_gray_alpha.shape == (400, 500, 3)
        img_pil_gray_alpha = mmcv.imread(self.gray_alpha_img_path, 'unchanged')
        assert img_pil_gray_alpha.shape == (400, 500, 2)
        img_pil_palette = mmcv.imread(self.palette_img_path, 'grayscale')
        assert img_pil_palette.shape == (300, 400)
        img_pil_palette = mmcv.imread(self.palette_img_path)
        assert img_pil_palette.shape == (300, 400, 3)
        img_pil_palette = mmcv.imread(self.palette_img_path, 'unchanged')
        assert img_pil_palette.shape == (300, 400)
        img_pil_grayscale2 = mmcv.imread(self.gray_img_path)
        assert img_pil_grayscale2.shape == (300, 400, 3)
        img_pil_unchanged = mmcv.imread(self.gray_img_path, 'unchanged')
        assert img_pil_unchanged.shape == (300, 400)
        img_pil_unchanged = mmcv.imread(img_pil_unchanged)
        assert_array_equal(img_pil_unchanged, mmcv.imread(img_pil_unchanged))

        img_pil_color_bgr = mmcv.imread(self.img_path_obj)
        assert img_pil_color_bgr.shape == (300, 400, 3)
        img_pil_color_rgb = mmcv.imread(self.img_path_obj, channel_order='rgb')
        assert img_pil_color_rgb.shape == (300, 400, 3)
        assert (img_pil_color_rgb == img_cv2_color_rgb).sum() / float(
            img_cv2_color_rgb.size) > 0.5
        assert_array_equal(img_pil_color_rgb[:, :, ::-1], img_pil_color_bgr)
        img_pil_grayscale1 = mmcv.imread(self.img_path_obj, 'grayscale')
        assert img_pil_grayscale1.shape == (300, 400)
        img_pil_grayscale2 = mmcv.imread(self.gray_img_path_obj)
        assert img_pil_grayscale2.shape == (300, 400, 3)
        img_pil_unchanged = mmcv.imread(self.gray_img_path_obj, 'unchanged')
        assert img_pil_unchanged.shape == (300, 400)
        with pytest.raises(TypeError):
            mmcv.imread(1)

        # backend turbojpeg
        mmcv.use_backend('turbojpeg')

        img_turbojpeg_color_bgr = mmcv.imread(self.img_path)
        assert img_turbojpeg_color_bgr.shape == (300, 400, 3)
        assert_array_equal(img_turbojpeg_color_bgr, img_cv2_color_bgr)

        img_turbojpeg_color_rgb = mmcv.imread(
            self.img_path, channel_order='rgb')
        assert img_turbojpeg_color_rgb.shape == (300, 400, 3)
        assert_array_equal(img_turbojpeg_color_rgb, img_cv2_color_rgb)

        with pytest.raises(ValueError):
            mmcv.imread(self.img_path, channel_order='unsupport_order')

        img_turbojpeg_grayscale1 = mmcv.imread(self.img_path, flag='grayscale')
        assert img_turbojpeg_grayscale1.shape == (300, 400)
        assert_array_equal(img_turbojpeg_grayscale1, img_cv2_grayscale1)

        img_turbojpeg_grayscale2 = mmcv.imread(self.gray_img_path)
        assert img_turbojpeg_grayscale2.shape == (300, 400, 3)
        assert_array_equal(img_turbojpeg_grayscale2, img_cv2_grayscale2)

        img_turbojpeg_grayscale2 = mmcv.imread(img_turbojpeg_grayscale2)
        assert_array_equal(img_turbojpeg_grayscale2,
                           mmcv.imread(img_turbojpeg_grayscale2))

        with pytest.raises(ValueError):
            mmcv.imread(self.gray_img_path, 'unchanged')

        with pytest.raises(TypeError):
            mmcv.imread(1)

        with pytest.raises(AssertionError):
            mmcv.use_backend('unsupport_backend')

        with pytest.raises(ValueError):
            mmcv.imread(self.img_path, 'unsupported_backend')

        # backend tifffile, multi channel tiff file(> 4 channels).
        mmcv.use_backend('tifffile')
        img_tifffile = mmcv.imread(self.tiff_path)
        assert img_tifffile.shape == (200, 150, 5)

        mmcv.use_backend('cv2')

        # consistent exif behaviour
        img_cv2_exif = mmcv.imread(self.exif_img_path)
        img_pil_exif = mmcv.imread(self.exif_img_path, backend='pillow')
        assert img_cv2_exif.shape == (400, 300, 3)
        assert img_pil_exif.shape == (400, 300, 3)
        img_cv2_exif_unchanged = mmcv.imread(
            self.exif_img_path, flag='unchanged')
        img_pil_exif_unchanged = mmcv.imread(
            self.exif_img_path, backend='pillow', flag='unchanged')
        assert img_cv2_exif_unchanged.shape == (300, 400, 3)
        assert img_pil_exif_unchanged.shape == (300, 400, 3)
        img_cv2_color_ignore_exif = mmcv.imread(
            self.exif_img_path, flag='color_ignore_orientation')
        img_pil_color_ignore_exif = mmcv.imread(
            self.exif_img_path,
            backend='pillow',
            flag='color_ignore_orientation')
        assert img_cv2_color_ignore_exif.shape == (300, 400, 3)
        assert img_pil_color_ignore_exif.shape == (300, 400, 3)
        img_cv2_grayscale_ignore_exif = mmcv.imread(
            self.exif_img_path, flag='grayscale_ignore_orientation')
        img_pil_grayscale_ignore_exif = mmcv.imread(
            self.exif_img_path,
            backend='pillow',
            flag='grayscale_ignore_orientation')
        assert img_cv2_grayscale_ignore_exif.shape == (300, 400)
        assert img_pil_grayscale_ignore_exif.shape == (300, 400)

    def test_imfrombytes(self):
        # backend cv2, channel order: bgr
        mmcv.use_backend('cv2')
        with open(self.img_path, 'rb') as f:
            img_bytes = f.read()
        img_cv2 = mmcv.imfrombytes(img_bytes)
        assert img_cv2.shape == (300, 400, 3)

        # backend cv2, channel order: rgb
        mmcv.use_backend('cv2')
        with open(self.img_path, 'rb') as f:
            img_bytes = f.read()
        img_rgb_cv2 = mmcv.imfrombytes(img_bytes, channel_order='rgb')
        assert img_rgb_cv2.shape == (300, 400, 3)
        assert_array_equal(img_rgb_cv2, img_cv2[:, :, ::-1])

        # backend cv2, grayscale, decode as 3 channels
        with open(self.gray_img_path, 'rb') as f:
            img_bytes = f.read()
        gray_img_rgb_cv2 = mmcv.imfrombytes(img_bytes)
        assert gray_img_rgb_cv2.shape == (300, 400, 3)

        # backend cv2, grayscale
        with open(self.gray_img_path, 'rb') as f:
            img_bytes = f.read()
        gray_img_cv2 = mmcv.imfrombytes(img_bytes, flag='grayscale')
        assert gray_img_cv2.shape == (300, 400)

        # backend cv2, grayscale dim3
        with open(self.gray_img_dim3_path, 'rb') as f:
            img_bytes = f.read()
        gray_img_dim3_cv2 = mmcv.imfrombytes(img_bytes, flag='grayscale')
        assert gray_img_dim3_cv2.shape == (300, 400)

        # arg backend pillow, channel order: bgr
        with open(self.img_path, 'rb') as f:
            img_bytes = f.read()
        img_pillow = mmcv.imfrombytes(img_bytes, backend='pillow')
        assert img_pillow.shape == (300, 400, 3)
        # Pillow and opencv decoding may not be the same
        assert (img_cv2 == img_pillow).sum() / float(img_cv2.size) > 0.5

        # backend pillow, channel order: bgr
        mmcv.use_backend('pillow')
        with open(self.img_path, 'rb') as f:
            img_bytes = f.read()
        img_pillow = mmcv.imfrombytes(img_bytes)
        assert img_pillow.shape == (300, 400, 3)
        # Pillow and opencv decoding may not be the same
        assert (img_cv2 == img_pillow).sum() / float(img_cv2.size) > 0.5

        # backend turbojpeg, channel order: bgr
        mmcv.use_backend('turbojpeg')
        with open(self.img_path, 'rb') as f:
            img_bytes = f.read()
        img_turbojpeg = mmcv.imfrombytes(img_bytes)
        assert img_turbojpeg.shape == (300, 400, 3)
        assert_array_equal(img_cv2, img_turbojpeg)

        # backend turbojpeg, channel order: rgb
        with open(self.img_path, 'rb') as f:
            img_bytes = f.read()
        img_rgb_turbojpeg = mmcv.imfrombytes(img_bytes, channel_order='rgb')
        assert img_rgb_turbojpeg.shape == (300, 400, 3)
        assert_array_equal(img_rgb_turbojpeg, img_cv2[:, :, ::-1])

        # backend turbojpeg, grayscale, decode as 3 channels
        with open(self.gray_img_path, 'rb') as f:
            img_bytes = f.read()
        gray_img_turbojpeg = mmcv.imfrombytes(img_bytes)
        assert gray_img_turbojpeg.shape == (300, 400, 3)
        assert_array_equal(gray_img_rgb_cv2, gray_img_turbojpeg)

        # backend turbojpeg, grayscale
        with open(self.gray_img_path, 'rb') as f:
            img_bytes = f.read()
        gray_img_turbojpeg = mmcv.imfrombytes(img_bytes, flag='grayscale')
        assert gray_img_turbojpeg.shape == (300, 400)
        assert_array_equal(gray_img_cv2, gray_img_turbojpeg)

        # backend turbojpeg, grayscale dim3
        with open(self.gray_img_dim3_path, 'rb') as f:
            img_bytes = f.read()
        gray_img_dim3_turbojpeg = mmcv.imfrombytes(img_bytes, flag='grayscale')
        assert gray_img_dim3_turbojpeg.shape == (300, 400)
        assert_array_equal(gray_img_dim3_cv2, gray_img_dim3_turbojpeg)

        mmcv.use_backend('cv2')

        with pytest.raises(ValueError):
            with open(self.img_path, 'rb') as f:
                img_bytes = f.read()
            mmcv.imfrombytes(img_bytes, backend='unsupported_backend')

    def test_imwrite(self):
        img = mmcv.imread(self.img_path)
        out_file = osp.join(tempfile.gettempdir(), 'mmcv_test.jpg')
        mmcv.imwrite(img, out_file)
        rewrite_img = mmcv.imread(out_file)
        os.remove(out_file)
        self.assert_img_equal(img, rewrite_img)

        # test petrel client
        with patch.object(
                PetrelBackend, 'put', return_value=None) as mock_method:
            ret = mmcv.imwrite(img, self.s3_path)
            ret_with_args = mmcv.imwrite(
                img, self.s3_path, file_client_args={'backend': 'petrel'})
            assert ret
            assert ret_with_args
            mock_method.assert_called()

        with pytest.raises(cv2.error):
            mmcv.imwrite(img, 'error_file.jppg')

    @patch('mmcv.image.io.TurboJPEG', None)
    def test_no_turbojpeg(self):
        with pytest.raises(ImportError):
            mmcv.use_backend('turbojpeg')

        mmcv.use_backend('cv2')

    @patch('mmcv.image.io.Image', None)
    def test_no_pillow(self):
        with pytest.raises(ImportError):
            mmcv.use_backend('pillow')

        mmcv.use_backend('cv2')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_image/test_photometric.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp

import cv2
import numpy as np
import pytest
from numpy.testing import assert_array_equal

import mmcv


class TestPhotometric:

    @classmethod
    def setup_class(cls):
        # the test img resolution is 400x300
        cls.img_path = osp.join(osp.dirname(__file__), '../data/color.jpg')
        cls.img = cv2.imread(cls.img_path)
        cls.mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
        cls.std = np.array([58.395, 57.12, 57.375], dtype=np.float32)

    def test_imnormalize(self):
        rgb_img = self.img[:, :, ::-1]
        baseline = (rgb_img - self.mean) / self.std
        img = mmcv.imnormalize(self.img, self.mean, self.std)
        assert np.allclose(img, baseline)
        assert id(img) != id(self.img)
        img = mmcv.imnormalize(rgb_img, self.mean, self.std, to_rgb=False)
        assert np.allclose(img, baseline)
        assert id(img) != id(rgb_img)

    def test_imnormalize_(self):
        img_for_normalize = np.float32(self.img)
        rgb_img_for_normalize = np.float32(self.img[:, :, ::-1])
        baseline = (rgb_img_for_normalize - self.mean) / self.std
        img = mmcv.imnormalize_(img_for_normalize, self.mean, self.std)
        assert np.allclose(img_for_normalize, baseline)
        assert id(img) == id(img_for_normalize)
        img = mmcv.imnormalize_(
            rgb_img_for_normalize, self.mean, self.std, to_rgb=False)
        assert np.allclose(img, baseline)
        assert id(img) == id(rgb_img_for_normalize)

    def test_imdenormalize(self):
        norm_img = (self.img[:, :, ::-1] - self.mean) / self.std
        rgb_baseline = (norm_img * self.std + self.mean)
        bgr_baseline = rgb_baseline[:, :, ::-1]
        img = mmcv.imdenormalize(norm_img, self.mean, self.std)
        assert np.allclose(img, bgr_baseline)
        img = mmcv.imdenormalize(norm_img, self.mean, self.std, to_bgr=False)
        assert np.allclose(img, rgb_baseline)

    def test_iminvert(self):
        img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]],
                       dtype=np.uint8)
        img_r = np.array([[255, 127, 0], [254, 128, 1], [253, 126, 2]],
                         dtype=np.uint8)
        assert_array_equal(mmcv.iminvert(img), img_r)

    def test_solarize(self):
        img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]],
                       dtype=np.uint8)
        img_r = np.array([[0, 127, 0], [1, 127, 1], [2, 126, 2]],
                         dtype=np.uint8)
        assert_array_equal(mmcv.solarize(img), img_r)
        img_r = np.array([[0, 127, 0], [1, 128, 1], [2, 126, 2]],
                         dtype=np.uint8)
        assert_array_equal(mmcv.solarize(img, 100), img_r)

    def test_posterize(self):
        img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]],
                       dtype=np.uint8)
        img_r = np.array([[0, 128, 128], [0, 0, 128], [0, 128, 128]],
                         dtype=np.uint8)
        assert_array_equal(mmcv.posterize(img, 1), img_r)
        img_r = np.array([[0, 128, 224], [0, 96, 224], [0, 128, 224]],
                         dtype=np.uint8)
        assert_array_equal(mmcv.posterize(img, 3), img_r)

    def test_adjust_color(self):
        img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]],
                       dtype=np.uint8)
        img = np.stack([img, img, img], axis=-1)
        assert_array_equal(mmcv.adjust_color(img), img)
        img_gray = mmcv.bgr2gray(img)
        img_r = np.stack([img_gray, img_gray, img_gray], axis=-1)
        assert_array_equal(mmcv.adjust_color(img, 0), img_r)
        assert_array_equal(mmcv.adjust_color(img, 0, 1), img_r)
        assert_array_equal(
            mmcv.adjust_color(img, 0.5, 0.5),
            np.round(np.clip((img * 0.5 + img_r * 0.5), 0,
                             255)).astype(img.dtype))
        assert_array_equal(
            mmcv.adjust_color(img, 1, 1.5),
            np.round(np.clip(img * 1 + img_r * 1.5, 0, 255)).astype(img.dtype))
        assert_array_equal(
            mmcv.adjust_color(img, 0.8, -0.6, gamma=2),
            np.round(np.clip(img * 0.8 - 0.6 * img_r + 2, 0,
                             255)).astype(img.dtype))
        assert_array_equal(
            mmcv.adjust_color(img, 0.8, -0.6, gamma=-0.6),
            np.round(np.clip(img * 0.8 - 0.6 * img_r - 0.6, 0,
                             255)).astype(img.dtype))

        # test float type of image
        img = img.astype(np.float32)
        assert_array_equal(
            np.round(mmcv.adjust_color(img, 0.8, -0.6, gamma=-0.6)),
            np.round(np.clip(img * 0.8 - 0.6 * img_r - 0.6, 0, 255)))

    def test_imequalize(self, nb_rand_test=100):

        def _imequalize(img):
            # equalize the image using PIL.ImageOps.equalize
            from PIL import ImageOps, Image
            img = Image.fromarray(img)
            equalized_img = np.asarray(ImageOps.equalize(img))
            return equalized_img

        img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]],
                       dtype=np.uint8)
        img = np.stack([img, img, img], axis=-1)
        equalized_img = mmcv.imequalize(img)
        assert_array_equal(equalized_img, _imequalize(img))

        # test equalize with case step=0
        img = np.array([[0, 0, 0], [120, 120, 120], [255, 255, 255]],
                       dtype=np.uint8)
        img = np.stack([img, img, img], axis=-1)
        assert_array_equal(mmcv.imequalize(img), img)

        # test equalize with randomly sampled image.
        for _ in range(nb_rand_test):
            img = np.clip(np.random.normal(0, 1, (256, 256, 3)) * 260, 0,
                          255).astype(np.uint8)
            equalized_img = mmcv.imequalize(img)
            assert_array_equal(equalized_img, _imequalize(img))

    def test_adjust_brightness(self, nb_rand_test=100):

        def _adjust_brightness(img, factor):
            # adjust the brightness of image using
            # PIL.ImageEnhance.Brightness
            from PIL.ImageEnhance import Brightness
            from PIL import Image
            img = Image.fromarray(img)
            brightened_img = Brightness(img).enhance(factor)
            return np.asarray(brightened_img)

        img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]],
                       dtype=np.uint8)
        img = np.stack([img, img, img], axis=-1)
        # test case with factor 1.0
        assert_array_equal(mmcv.adjust_brightness(img, 1.), img)
        # test case with factor 0.0
        assert_array_equal(mmcv.adjust_brightness(img, 0.), np.zeros_like(img))
        # test adjust_brightness with randomly sampled images and factors.
        for _ in range(nb_rand_test):
            img = np.clip(
                np.random.uniform(0, 1, (1000, 1200, 3)) * 260, 0,
                255).astype(np.uint8)
            factor = np.random.uniform() + np.random.choice([0, 1])
            np.testing.assert_allclose(
                mmcv.adjust_brightness(img, factor).astype(np.int32),
                _adjust_brightness(img, factor).astype(np.int32),
                rtol=0,
                atol=1)

    def test_adjust_contrast(self, nb_rand_test=100):

        def _adjust_contrast(img, factor):
            from PIL.ImageEnhance import Contrast
            from PIL import Image
            # Image.fromarray defaultly supports RGB, not BGR.
            # convert from BGR to RGB
            img = Image.fromarray(img[..., ::-1], mode='RGB')
            contrasted_img = Contrast(img).enhance(factor)
            # convert from RGB to BGR
            return np.asarray(contrasted_img)[..., ::-1]

        img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]],
                       dtype=np.uint8)
        img = np.stack([img, img, img], axis=-1)
        # test case with factor 1.0
        assert_array_equal(mmcv.adjust_contrast(img, 1.), img)
        # test case with factor 0.0
        assert_array_equal(
            mmcv.adjust_contrast(img, 0.), _adjust_contrast(img, 0.))
        # test adjust_contrast with randomly sampled images and factors.
        for _ in range(nb_rand_test):
            img = np.clip(
                np.random.uniform(0, 1, (1200, 1000, 3)) * 260, 0,
                255).astype(np.uint8)
            factor = np.random.uniform() + np.random.choice([0, 1])
            # Note the gap (less_equal 1) between PIL.ImageEnhance.Contrast
            # and mmcv.adjust_contrast comes from the gap that converts from
            # a color image to gray image using mmcv or PIL.
            np.testing.assert_allclose(
                mmcv.adjust_contrast(img, factor).astype(np.int32),
                _adjust_contrast(img, factor).astype(np.int32),
                rtol=0,
                atol=1)

    def test_auto_contrast(self, nb_rand_test=100):

        def _auto_contrast(img, cutoff=0):
            from PIL.ImageOps import autocontrast
            from PIL import Image
            # Image.fromarray defaultly supports RGB, not BGR.
            # convert from BGR to RGB
            img = Image.fromarray(img[..., ::-1], mode='RGB')
            contrasted_img = autocontrast(img, cutoff)
            # convert from RGB to BGR
            return np.asarray(contrasted_img)[..., ::-1]

        img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]],
                       dtype=np.uint8)
        img = np.stack([img, img, img], axis=-1)

        # test case without cut-off
        assert_array_equal(mmcv.auto_contrast(img), _auto_contrast(img))
        # test case with cut-off as int
        assert_array_equal(
            mmcv.auto_contrast(img, 10), _auto_contrast(img, 10))
        # test case with cut-off as float
        assert_array_equal(
            mmcv.auto_contrast(img, 12.5), _auto_contrast(img, 12.5))
        # test case with cut-off as tuple
        assert_array_equal(
            mmcv.auto_contrast(img, (10, 10)), _auto_contrast(img, 10))
        # test case with cut-off with sum over 100
        assert_array_equal(
            mmcv.auto_contrast(img, 60), _auto_contrast(img, 60))

        # test auto_contrast with randomly sampled images and factors.
        for _ in range(nb_rand_test):
            img = np.clip(
                np.random.uniform(0, 1, (1200, 1000, 3)) * 260, 0,
                255).astype(np.uint8)
            # cut-offs are not set as tuple since in `build.yml`, pillow 6.2.2
            # is installed, which does not support setting low cut-off and high
            #  cut-off differently.
            # With pillow above 8.0.0, cutoff can be set as tuple
            cutoff = np.random.rand() * 100
            assert_array_equal(
                mmcv.auto_contrast(img, cutoff), _auto_contrast(img, cutoff))

    def test_adjust_sharpness(self, nb_rand_test=100):

        def _adjust_sharpness(img, factor):
            # adjust the sharpness of image using
            # PIL.ImageEnhance.Sharpness
            from PIL.ImageEnhance import Sharpness
            from PIL import Image
            img = Image.fromarray(img)
            sharpened_img = Sharpness(img).enhance(factor)
            return np.asarray(sharpened_img)

        img = np.array([[0, 128, 255], [1, 127, 254], [2, 129, 253]],
                       dtype=np.uint8)
        img = np.stack([img, img, img], axis=-1)

        # test case with invalid type of kernel
        with pytest.raises(AssertionError):
            mmcv.adjust_sharpness(img, 1., kernel=1.)
        # test case with invalid shape of kernel
        kernel = np.ones((3, 3, 3))
        with pytest.raises(AssertionError):
            mmcv.adjust_sharpness(img, 1., kernel=kernel)
        # test case with all-zero kernel, factor 0.0
        kernel = np.zeros((3, 3))
        assert_array_equal(
            mmcv.adjust_sharpness(img, 0., kernel=kernel), np.zeros_like(img))

        # test case with factor 1.0
        assert_array_equal(mmcv.adjust_sharpness(img, 1.), img)
        # test adjust_sharpness with randomly sampled images and factors.
        for _ in range(nb_rand_test):
            img = np.clip(
                np.random.uniform(0, 1, (1000, 1200, 3)) * 260, 0,
                255).astype(np.uint8)
            factor = np.random.uniform()
            # Note the gap between PIL.ImageEnhance.Sharpness and
            # mmcv.adjust_sharpness mainly comes from the difference ways of
            # handling img edges when applying filters
            np.testing.assert_allclose(
                mmcv.adjust_sharpness(img, factor).astype(np.int32)[1:-1,
                                                                    1:-1],
                _adjust_sharpness(img, factor).astype(np.int32)[1:-1, 1:-1],
                rtol=0,
                atol=1)

    def test_adjust_lighting(self):
        img = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.uint8)
        img = np.stack([img, img, img], axis=-1)

        # eigval and eigvec must be np.ndarray
        with pytest.raises(AssertionError):
            mmcv.adjust_lighting(img, 1, np.ones((3, 1)))
        with pytest.raises(AssertionError):
            mmcv.adjust_lighting(img, np.array([1]), (1, 1, 1))
        # we must have the same number of eigval and eigvec
        with pytest.raises(AssertionError):
            mmcv.adjust_lighting(img, np.array([1]), np.eye(2))
        with pytest.raises(AssertionError):
            mmcv.adjust_lighting(img, np.array([1]), np.array([1]))

        img_adjusted = mmcv.adjust_lighting(
            img,
            np.random.normal(0, 1, 2),
            np.random.normal(0, 1, (3, 2)),
            alphastd=0.)
        assert_array_equal(img_adjusted, img)

    def test_lut_transform(self):
        lut_table = np.array(list(range(256)))

        # test assertion image values should between 0 and 255.
        with pytest.raises(AssertionError):
            mmcv.lut_transform(np.array([256]), lut_table)
        with pytest.raises(AssertionError):
            mmcv.lut_transform(np.array([-1]), lut_table)

        # test assertion lut_table should be ndarray with shape (256, )
        with pytest.raises(AssertionError):
            mmcv.lut_transform(np.array([0]), list(range(256)))
        with pytest.raises(AssertionError):
            mmcv.lut_transform(np.array([1]), np.array(list(range(257))))

        img = mmcv.lut_transform(self.img, lut_table)
        baseline = cv2.LUT(self.img, lut_table)
        assert np.allclose(img, baseline)

        input_img = np.array(
            [[[0, 128, 255], [255, 128, 0]], [[0, 128, 255], [255, 128, 0]]],
            dtype=float)
        img = mmcv.lut_transform(input_img, lut_table)
        baseline = cv2.LUT(np.array(input_img, dtype=np.uint8), lut_table)
        assert np.allclose(img, baseline)

        input_img = np.random.randint(0, 256, size=(7, 8, 9, 10, 11))
        img = mmcv.lut_transform(input_img, lut_table)
        baseline = cv2.LUT(np.array(input_img, dtype=np.uint8), lut_table)
        assert np.allclose(img, baseline)

    def test_clahe(self):

        def _clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)):
            clahe = cv2.createCLAHE(clip_limit, tile_grid_size)
            return clahe.apply(np.array(img, dtype=np.uint8))

        # test assertion image should have the right shape
        with pytest.raises(AssertionError):
            mmcv.clahe(self.img)

        # test assertion tile_grid_size should be a tuple with 2 integers
        with pytest.raises(AssertionError):
            mmcv.clahe(self.img[:, :, 0], tile_grid_size=(8.0, 8.0))
        with pytest.raises(AssertionError):
            mmcv.clahe(self.img[:, :, 0], tile_grid_size=(8, 8, 8))
        with pytest.raises(AssertionError):
            mmcv.clahe(self.img[:, :, 0], tile_grid_size=[8, 8])

        # test with different channels
        for i in range(self.img.shape[-1]):
            img = mmcv.clahe(self.img[:, :, i])
            img_std = _clahe(self.img[:, :, i])
            assert np.allclose(img, img_std)
            assert id(img) != id(self.img[:, :, i])
            assert id(img_std) != id(self.img[:, :, i])

        # test case with clip_limit=1.2
        for i in range(self.img.shape[-1]):
            img = mmcv.clahe(self.img[:, :, i], 1.2)
            img_std = _clahe(self.img[:, :, i], 1.2)
            assert np.allclose(img, img_std)
            assert id(img) != id(self.img[:, :, i])
            assert id(img_std) != id(self.img[:, :, i])


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_load_model_zoo.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
from unittest.mock import patch

import pytest

import mmcv
from mmcv.runner.checkpoint import (DEFAULT_CACHE_DIR, ENV_MMCV_HOME,
                                    ENV_XDG_CACHE_HOME, _get_mmcv_home,
                                    _load_checkpoint,
                                    get_deprecated_model_names,
                                    get_external_models)
from mmcv.utils import TORCH_VERSION


@patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')])
def test_set_mmcv_home():
    os.environ.pop(ENV_MMCV_HOME, None)
    mmcv_home = osp.join(osp.dirname(__file__), 'data/model_zoo/mmcv_home/')
    os.environ[ENV_MMCV_HOME] = mmcv_home
    assert _get_mmcv_home() == mmcv_home


@patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')])
def test_default_mmcv_home():
    os.environ.pop(ENV_MMCV_HOME, None)
    os.environ.pop(ENV_XDG_CACHE_HOME, None)
    assert _get_mmcv_home() == os.path.expanduser(
        os.path.join(DEFAULT_CACHE_DIR, 'mmcv'))
    model_urls = get_external_models()
    assert model_urls == mmcv.load(
        osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json'))


@patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')])
def test_get_external_models():
    os.environ.pop(ENV_MMCV_HOME, None)
    mmcv_home = osp.join(osp.dirname(__file__), 'data/model_zoo/mmcv_home/')
    os.environ[ENV_MMCV_HOME] = mmcv_home
    ext_urls = get_external_models()
    assert ext_urls == {
        'train': 'https://localhost/train.pth',
        'test': 'test.pth',
        'val': 'val.pth',
        'train_empty': 'train.pth'
    }


@patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')])
def test_get_deprecated_models():
    os.environ.pop(ENV_MMCV_HOME, None)
    mmcv_home = osp.join(osp.dirname(__file__), 'data/model_zoo/mmcv_home/')
    os.environ[ENV_MMCV_HOME] = mmcv_home
    dep_urls = get_deprecated_model_names()
    assert dep_urls == {
        'train_old': 'train',
        'test_old': 'test',
    }


def load_from_http(url, map_location=None):
    return 'url:' + url


def load_url(url, map_location=None, model_dir=None):
    return load_from_http(url)


def load(filepath, map_location=None):
    return 'local:' + filepath


@patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')])
@patch('mmcv.runner.checkpoint.load_from_http', load_from_http)
@patch('mmcv.runner.checkpoint.load_url', load_url)
@patch('torch.load', load)
def test_load_external_url():
    # test modelzoo://
    url = _load_checkpoint('modelzoo://resnet50')
    if TORCH_VERSION < '1.9.0':
        assert url == ('url:https://download.pytorch.org/models/resnet50-19c8e'
                       '357.pth')
    else:
        # filename of checkpoint is renamed in torch1.9.0
        assert url == ('url:https://download.pytorch.org/models/resnet50-0676b'
                       'a61.pth')

    # test torchvision://
    url = _load_checkpoint('torchvision://resnet50')
    if TORCH_VERSION < '1.9.0':
        assert url == ('url:https://download.pytorch.org/models/resnet50-19c8e'
                       '357.pth')
    else:
        # filename of checkpoint is renamed in torch1.9.0
        assert url == ('url:https://download.pytorch.org/models/resnet50-0676b'
                       'a61.pth')

    # test open-mmlab:// with default MMCV_HOME
    os.environ.pop(ENV_MMCV_HOME, None)
    os.environ.pop(ENV_XDG_CACHE_HOME, None)
    url = _load_checkpoint('open-mmlab://train')
    assert url == 'url:https://localhost/train.pth'

    # test open-mmlab:// with deprecated model name
    os.environ.pop(ENV_MMCV_HOME, None)
    os.environ.pop(ENV_XDG_CACHE_HOME, None)
    with pytest.warns(
            Warning,
            match='open-mmlab://train_old is deprecated in favor of '
            'open-mmlab://train'):
        url = _load_checkpoint('open-mmlab://train_old')
        assert url == 'url:https://localhost/train.pth'

    # test openmmlab:// with deprecated model name
    os.environ.pop(ENV_MMCV_HOME, None)
    os.environ.pop(ENV_XDG_CACHE_HOME, None)
    with pytest.warns(
            Warning,
            match='openmmlab://train_old is deprecated in favor of '
            'openmmlab://train'):
        url = _load_checkpoint('openmmlab://train_old')
        assert url == 'url:https://localhost/train.pth'

    # test open-mmlab:// with user-defined MMCV_HOME
    os.environ.pop(ENV_MMCV_HOME, None)
    mmcv_home = osp.join(osp.dirname(__file__), 'data/model_zoo/mmcv_home')
    os.environ[ENV_MMCV_HOME] = mmcv_home
    url = _load_checkpoint('open-mmlab://train')
    assert url == 'url:https://localhost/train.pth'
    with pytest.raises(FileNotFoundError, match='train.pth can not be found.'):
        _load_checkpoint('open-mmlab://train_empty')
    url = _load_checkpoint('open-mmlab://test')
    assert url == f'local:{osp.join(_get_mmcv_home(), "test.pth")}'
    url = _load_checkpoint('open-mmlab://val')
    assert url == f'local:{osp.join(_get_mmcv_home(), "val.pth")}'

    # test http:// https://
    url = _load_checkpoint('http://localhost/train.pth')
    assert url == 'url:http://localhost/train.pth'

    # test local file
    with pytest.raises(FileNotFoundError, match='train.pth can not be found.'):
        _load_checkpoint('train.pth')
    url = _load_checkpoint(osp.join(_get_mmcv_home(), 'test.pth'))
    assert url == f'local:{osp.join(_get_mmcv_home(), "test.pth")}'


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_active_rotated_filter.py
================================================
import numpy as np
import pytest
import torch

from mmcv.ops import active_rotated_filter

np_feature = np.array([[[[[-1.4934e-01, 1.1341e+00, -1.6241e-01],
                          [-1.0986e+00, -1.1463e+00, -1.3176e+00],
                          [1.4808e+00, 7.6572e-01, -1.4548e+00]]]],
                       [[[[1.9370e+00, 6.2799e-01, 2.5834e-02],
                          [-1.4242e+00, 7.6566e-01, 1.0015e+00],
                          [9.8669e-01, 4.1356e-01, 6.1068e-01]]]],
                       [[[[1.4565e+00, 1.4960e+00, 2.4339e-01],
                          [-2.2484e-01, 7.5942e-01, -8.1184e-01],
                          [-1.7077e+00, 1.0658e+00, 3.8311e-01]]]],
                       [[[[8.4734e-01, 1.0904e+00, 2.4356e+00],
                          [9.5822e-01, 2.2260e-01, -2.4450e-01],
                          [-1.5078e+00, 7.0902e-02, -1.5921e+00]]]],
                       [[[[2.1173e+00, -7.3524e-01, 1.8888e+00],
                          [1.0169e+00, 4.7033e-01, -1.0875e+00],
                          [-1.0736e+00, -5.2245e-01, -2.8733e-01]]]],
                       [[[[-5.6433e-01, 1.5835e+00, -1.5826e+00],
                          [-8.8974e-01, -4.3128e-01, -2.2423e-01],
                          [1.6552e-03, -1.7292e+00, 2.6639e-01]]]],
                       [[[[-1.2951e-01, 1.3493e+00, -1.9329e+00],
                          [5.6248e-01, -5.1189e-01, 1.3614e+00],
                          [3.3680e-01, -8.7148e-01, 5.0592e-01]]]],
                       [[[[1.6781e-02, -8.3929e-01, 1.2060e+00],
                          [-1.0764e+00, 4.7821e-01, 1.5342e+00],
                          [-4.4542e-01, -1.8606e+00, 3.0827e-01]]]]])

np_indices = np.array([[[[1, 2, 3, 6, 9, 8, 7, 4], [2, 3, 6, 9, 8, 7, 4, 1],
                         [3, 6, 9, 8, 7, 4, 1, 2]],
                        [[4, 1, 2, 3, 6, 9, 8, 7], [5, 5, 5, 5, 5, 5, 5, 5],
                         [6, 9, 8, 7, 4, 1, 2, 3]],
                        [[7, 4, 1, 2, 3, 6, 9, 8], [8, 7, 4, 1, 2, 3, 6, 9],
                         [9, 8, 7, 4, 1, 2, 3, 6]]]])

expected_output = np.array([[[[-1.4934e-01, 1.1341e+00, -1.6241e-01],
                              [-1.0986e+00, -1.1463e+00, -1.3176e+00],
                              [1.4808e+00, 7.6572e-01, -1.4548e+00]]],
                            [[[-1.0986e+00, -1.4934e-01, 1.1341e+00],
                              [1.4808e+00, -1.1463e+00, -1.6241e-01],
                              [7.6572e-01, -1.4548e+00, -1.3176e+00]]],
                            [[[1.4808e+00, -1.0986e+00, -1.4934e-01],
                              [7.6572e-01, -1.1463e+00, 1.1341e+00],
                              [-1.4548e+00, -1.3176e+00, -1.6241e-01]]],
                            [[[7.6572e-01, 1.4808e+00, -1.0986e+00],
                              [-1.4548e+00, -1.1463e+00, -1.4934e-01],
                              [-1.3176e+00, -1.6241e-01, 1.1341e+00]]],
                            [[[-1.4548e+00, 7.6572e-01, 1.4808e+00],
                              [-1.3176e+00, -1.1463e+00, -1.0986e+00],
                              [-1.6241e-01, 1.1341e+00, -1.4934e-01]]],
                            [[[-1.3176e+00, -1.4548e+00, 7.6572e-01],
                              [-1.6241e-01, -1.1463e+00, 1.4808e+00],
                              [1.1341e+00, -1.4934e-01, -1.0986e+00]]],
                            [[[-1.6241e-01, -1.3176e+00, -1.4548e+00],
                              [1.1341e+00, -1.1463e+00, 7.6572e-01],
                              [-1.4934e-01, -1.0986e+00, 1.4808e+00]]],
                            [[[1.1341e+00, -1.6241e-01, -1.3176e+00],
                              [-1.4934e-01, -1.1463e+00, -1.4548e+00],
                              [-1.0986e+00, 1.4808e+00, 7.6572e-01]]],
                            [[[1.9370e+00, 6.2799e-01, 2.5834e-02],
                              [-1.4242e+00, 7.6566e-01, 1.0015e+00],
                              [9.8669e-01, 4.1356e-01, 6.1068e-01]]],
                            [[[-1.4242e+00, 1.9370e+00, 6.2799e-01],
                              [9.8669e-01, 7.6566e-01, 2.5834e-02],
                              [4.1356e-01, 6.1068e-01, 1.0015e+00]]],
                            [[[9.8669e-01, -1.4242e+00, 1.9370e+00],
                              [4.1356e-01, 7.6566e-01, 6.2799e-01],
                              [6.1068e-01, 1.0015e+00, 2.5834e-02]]],
                            [[[4.1356e-01, 9.8669e-01, -1.4242e+00],
                              [6.1068e-01, 7.6566e-01, 1.9370e+00],
                              [1.0015e+00, 2.5834e-02, 6.2799e-01]]],
                            [[[6.1068e-01, 4.1356e-01, 9.8669e-01],
                              [1.0015e+00, 7.6566e-01, -1.4242e+00],
                              [2.5834e-02, 6.2799e-01, 1.9370e+00]]],
                            [[[1.0015e+00, 6.1068e-01, 4.1356e-01],
                              [2.5834e-02, 7.6566e-01, 9.8669e-01],
                              [6.2799e-01, 1.9370e+00, -1.4242e+00]]],
                            [[[2.5834e-02, 1.0015e+00, 6.1068e-01],
                              [6.2799e-01, 7.6566e-01, 4.1356e-01],
                              [1.9370e+00, -1.4242e+00, 9.8669e-01]]],
                            [[[6.2799e-01, 2.5834e-02, 1.0015e+00],
                              [1.9370e+00, 7.6566e-01, 6.1068e-01],
                              [-1.4242e+00, 9.8669e-01, 4.1356e-01]]],
                            [[[1.4565e+00, 1.4960e+00, 2.4339e-01],
                              [-2.2484e-01, 7.5942e-01, -8.1184e-01],
                              [-1.7077e+00, 1.0658e+00, 3.8311e-01]]],
                            [[[-2.2484e-01, 1.4565e+00, 1.4960e+00],
                              [-1.7077e+00, 7.5942e-01, 2.4339e-01],
                              [1.0658e+00, 3.8311e-01, -8.1184e-01]]],
                            [[[-1.7077e+00, -2.2484e-01, 1.4565e+00],
                              [1.0658e+00, 7.5942e-01, 1.4960e+00],
                              [3.8311e-01, -8.1184e-01, 2.4339e-01]]],
                            [[[1.0658e+00, -1.7077e+00, -2.2484e-01],
                              [3.8311e-01, 7.5942e-01, 1.4565e+00],
                              [-8.1184e-01, 2.4339e-01, 1.4960e+00]]],
                            [[[3.8311e-01, 1.0658e+00, -1.7077e+00],
                              [-8.1184e-01, 7.5942e-01, -2.2484e-01],
                              [2.4339e-01, 1.4960e+00, 1.4565e+00]]],
                            [[[-8.1184e-01, 3.8311e-01, 1.0658e+00],
                              [2.4339e-01, 7.5942e-01, -1.7077e+00],
                              [1.4960e+00, 1.4565e+00, -2.2484e-01]]],
                            [[[2.4339e-01, -8.1184e-01, 3.8311e-01],
                              [1.4960e+00, 7.5942e-01, 1.0658e+00],
                              [1.4565e+00, -2.2484e-01, -1.7077e+00]]],
                            [[[1.4960e+00, 2.4339e-01, -8.1184e-01],
                              [1.4565e+00, 7.5942e-01, 3.8311e-01],
                              [-2.2484e-01, -1.7077e+00, 1.0658e+00]]],
                            [[[8.4734e-01, 1.0904e+00, 2.4356e+00],
                              [9.5822e-01, 2.2260e-01, -2.4450e-01],
                              [-1.5078e+00, 7.0902e-02, -1.5921e+00]]],
                            [[[9.5822e-01, 8.4734e-01, 1.0904e+00],
                              [-1.5078e+00, 2.2260e-01, 2.4356e+00],
                              [7.0902e-02, -1.5921e+00, -2.4450e-01]]],
                            [[[-1.5078e+00, 9.5822e-01, 8.4734e-01],
                              [7.0902e-02, 2.2260e-01, 1.0904e+00],
                              [-1.5921e+00, -2.4450e-01, 2.4356e+00]]],
                            [[[7.0902e-02, -1.5078e+00, 9.5822e-01],
                              [-1.5921e+00, 2.2260e-01, 8.4734e-01],
                              [-2.4450e-01, 2.4356e+00, 1.0904e+00]]],
                            [[[-1.5921e+00, 7.0902e-02, -1.5078e+00],
                              [-2.4450e-01, 2.2260e-01, 9.5822e-01],
                              [2.4356e+00, 1.0904e+00, 8.4734e-01]]],
                            [[[-2.4450e-01, -1.5921e+00, 7.0902e-02],
                              [2.4356e+00, 2.2260e-01, -1.5078e+00],
                              [1.0904e+00, 8.4734e-01, 9.5822e-01]]],
                            [[[2.4356e+00, -2.4450e-01, -1.5921e+00],
                              [1.0904e+00, 2.2260e-01, 7.0902e-02],
                              [8.4734e-01, 9.5822e-01, -1.5078e+00]]],
                            [[[1.0904e+00, 2.4356e+00, -2.4450e-01],
                              [8.4734e-01, 2.2260e-01, -1.5921e+00],
                              [9.5822e-01, -1.5078e+00, 7.0902e-02]]],
                            [[[2.1173e+00, -7.3524e-01, 1.8888e+00],
                              [1.0169e+00, 4.7033e-01, -1.0875e+00],
                              [-1.0736e+00, -5.2245e-01, -2.8733e-01]]],
                            [[[1.0169e+00, 2.1173e+00, -7.3524e-01],
                              [-1.0736e+00, 4.7033e-01, 1.8888e+00],
                              [-5.2245e-01, -2.8733e-01, -1.0875e+00]]],
                            [[[-1.0736e+00, 1.0169e+00, 2.1173e+00],
                              [-5.2245e-01, 4.7033e-01, -7.3524e-01],
                              [-2.8733e-01, -1.0875e+00, 1.8888e+00]]],
                            [[[-5.2245e-01, -1.0736e+00, 1.0169e+00],
                              [-2.8733e-01, 4.7033e-01, 2.1173e+00],
                              [-1.0875e+00, 1.8888e+00, -7.3524e-01]]],
                            [[[-2.8733e-01, -5.2245e-01, -1.0736e+00],
                              [-1.0875e+00, 4.7033e-01, 1.0169e+00],
                              [1.8888e+00, -7.3524e-01, 2.1173e+00]]],
                            [[[-1.0875e+00, -2.8733e-01, -5.2245e-01],
                              [1.8888e+00, 4.7033e-01, -1.0736e+00],
                              [-7.3524e-01, 2.1173e+00, 1.0169e+00]]],
                            [[[1.8888e+00, -1.0875e+00, -2.8733e-01],
                              [-7.3524e-01, 4.7033e-01, -5.2245e-01],
                              [2.1173e+00, 1.0169e+00, -1.0736e+00]]],
                            [[[-7.3524e-01, 1.8888e+00, -1.0875e+00],
                              [2.1173e+00, 4.7033e-01, -2.8733e-01],
                              [1.0169e+00, -1.0736e+00, -5.2245e-01]]],
                            [[[-5.6433e-01, 1.5835e+00, -1.5826e+00],
                              [-8.8974e-01, -4.3128e-01, -2.2423e-01],
                              [1.6552e-03, -1.7292e+00, 2.6639e-01]]],
                            [[[-8.8974e-01, -5.6433e-01, 1.5835e+00],
                              [1.6552e-03, -4.3128e-01, -1.5826e+00],
                              [-1.7292e+00, 2.6639e-01, -2.2423e-01]]],
                            [[[1.6552e-03, -8.8974e-01, -5.6433e-01],
                              [-1.7292e+00, -4.3128e-01, 1.5835e+00],
                              [2.6639e-01, -2.2423e-01, -1.5826e+00]]],
                            [[[-1.7292e+00, 1.6552e-03, -8.8974e-01],
                              [2.6639e-01, -4.3128e-01, -5.6433e-01],
                              [-2.2423e-01, -1.5826e+00, 1.5835e+00]]],
                            [[[2.6639e-01, -1.7292e+00, 1.6552e-03],
                              [-2.2423e-01, -4.3128e-01, -8.8974e-01],
                              [-1.5826e+00, 1.5835e+00, -5.6433e-01]]],
                            [[[-2.2423e-01, 2.6639e-01, -1.7292e+00],
                              [-1.5826e+00, -4.3128e-01, 1.6552e-03],
                              [1.5835e+00, -5.6433e-01, -8.8974e-01]]],
                            [[[-1.5826e+00, -2.2423e-01, 2.6639e-01],
                              [1.5835e+00, -4.3128e-01, -1.7292e+00],
                              [-5.6433e-01, -8.8974e-01, 1.6552e-03]]],
                            [[[1.5835e+00, -1.5826e+00, -2.2423e-01],
                              [-5.6433e-01, -4.3128e-01, 2.6639e-01],
                              [-8.8974e-01, 1.6552e-03, -1.7292e+00]]],
                            [[[-1.2951e-01, 1.3493e+00, -1.9329e+00],
                              [5.6248e-01, -5.1189e-01, 1.3614e+00],
                              [3.3680e-01, -8.7148e-01, 5.0592e-01]]],
                            [[[5.6248e-01, -1.2951e-01, 1.3493e+00],
                              [3.3680e-01, -5.1189e-01, -1.9329e+00],
                              [-8.7148e-01, 5.0592e-01, 1.3614e+00]]],
                            [[[3.3680e-01, 5.6248e-01, -1.2951e-01],
                              [-8.7148e-01, -5.1189e-01, 1.3493e+00],
                              [5.0592e-01, 1.3614e+00, -1.9329e+00]]],
                            [[[-8.7148e-01, 3.3680e-01, 5.6248e-01],
                              [5.0592e-01, -5.1189e-01, -1.2951e-01],
                              [1.3614e+00, -1.9329e+00, 1.3493e+00]]],
                            [[[5.0592e-01, -8.7148e-01, 3.3680e-01],
                              [1.3614e+00, -5.1189e-01, 5.6248e-01],
                              [-1.9329e+00, 1.3493e+00, -1.2951e-01]]],
                            [[[1.3614e+00, 5.0592e-01, -8.7148e-01],
                              [-1.9329e+00, -5.1189e-01, 3.3680e-01],
                              [1.3493e+00, -1.2951e-01, 5.6248e-01]]],
                            [[[-1.9329e+00, 1.3614e+00, 5.0592e-01],
                              [1.3493e+00, -5.1189e-01, -8.7148e-01],
                              [-1.2951e-01, 5.6248e-01, 3.3680e-01]]],
                            [[[1.3493e+00, -1.9329e+00, 1.3614e+00],
                              [-1.2951e-01, -5.1189e-01, 5.0592e-01],
                              [5.6248e-01, 3.3680e-01, -8.7148e-01]]],
                            [[[1.6781e-02, -8.3929e-01, 1.2060e+00],
                              [-1.0764e+00, 4.7821e-01, 1.5342e+00],
                              [-4.4542e-01, -1.8606e+00, 3.0827e-01]]],
                            [[[-1.0764e+00, 1.6781e-02, -8.3929e-01],
                              [-4.4542e-01, 4.7821e-01, 1.2060e+00],
                              [-1.8606e+00, 3.0827e-01, 1.5342e+00]]],
                            [[[-4.4542e-01, -1.0764e+00, 1.6781e-02],
                              [-1.8606e+00, 4.7821e-01, -8.3929e-01],
                              [3.0827e-01, 1.5342e+00, 1.2060e+00]]],
                            [[[-1.8606e+00, -4.4542e-01, -1.0764e+00],
                              [3.0827e-01, 4.7821e-01, 1.6781e-02],
                              [1.5342e+00, 1.2060e+00, -8.3929e-01]]],
                            [[[3.0827e-01, -1.8606e+00, -4.4542e-01],
                              [1.5342e+00, 4.7821e-01, -1.0764e+00],
                              [1.2060e+00, -8.3929e-01, 1.6781e-02]]],
                            [[[1.5342e+00, 3.0827e-01, -1.8606e+00],
                              [1.2060e+00, 4.7821e-01, -4.4542e-01],
                              [-8.3929e-01, 1.6781e-02, -1.0764e+00]]],
                            [[[1.2060e+00, 1.5342e+00, 3.0827e-01],
                              [-8.3929e-01, 4.7821e-01, -1.8606e+00],
                              [1.6781e-02, -1.0764e+00, -4.4542e-01]]],
                            [[[-8.3929e-01, 1.2060e+00, 1.5342e+00],
                              [1.6781e-02, 4.7821e-01, 3.0827e-01],
                              [-1.0764e+00, -4.4542e-01, -1.8606e+00]]]])

expected_grad = np.array([[[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]],
                          [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]],
                          [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]],
                          [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]],
                          [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]],
                          [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]],
                          [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]],
                          [[[[8., 8., 8.], [8., 8., 8.], [8., 8., 8.]]]]])


@pytest.mark.parametrize('device', [
    'cpu',
    pytest.param(
        'cuda',
        marks=pytest.mark.skipif(
            not torch.cuda.is_available(), reason='requires CUDA support')),
])
def test_active_rotated_filter(device):
    feature = torch.tensor(
        np_feature, dtype=torch.float, device=device, requires_grad=True)
    indices = torch.tensor(np_indices, dtype=torch.int, device=device)
    output = active_rotated_filter(feature, indices)
    output.backward(torch.ones_like(output))
    assert np.allclose(output.data.cpu().numpy(), expected_output, atol=1e-3)
    assert np.allclose(
        feature.grad.data.cpu().numpy(), expected_grad, atol=1e-3)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_assign_score_withk.py
================================================
import pytest
import torch

from mmcv.ops import assign_score_withk


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_paconv_assign_scores():
    scores = torch.tensor([[[[0.06947571, 0.6065746], [0.28462553, 0.8378516],
                             [0.7595994, 0.97220325], [0.519155, 0.766185]],
                            [[0.15348864, 0.6051019], [0.21510637, 0.31916398],
                             [0.00236845, 0.5842595], [0.6783676, 0.5216348]]],
                           [[[0.23089725, 0.5568468], [0.7405102, 0.06438422],
                             [0.6887394, 0.22089851], [0.0502342, 0.79228795]],
                            [[0.44883424, 0.15427643],
                             [0.13817799, 0.34856772], [0.7989621, 0.33788306],
                             [0.15699774, 0.7693662]]]]).float().cuda()
    scores.requires_grad_()
    points = torch.tensor([[[[0.06001121, 0.92963666, 0.5753327, 0.7251477],
                             [0.53563064, 0.23129565, 0.92366195, 0.44261628]],
                            [[0.5770022, 0.56625944, 0.23560429, 0.11178821],
                             [0.7735967, 0.95678777, 0.25468266, 0.02895975]],
                            [[0.0589869, 0.09017515, 0.5977862, 0.02797985],
                             [0.603862, 0.35991007, 0.85761684, 0.3096559]],
                            [[0.22359002, 0.13983732, 0.5544243, 0.68863827],
                             [0.85646236, 0.75651926, 0.8638947, 0.83600986]],
                            [[0.45424145, 0.27458847, 0.6456112, 0.47162914],
                             [0.15773582, 0.47645122, 0.79964715, 0.3323908]],
                            [[0.8351399, 0.84696376, 0.9431732, 0.29418713],
                             [0.77168906, 0.6996871, 0.19354361, 0.03392768]],
                            [[0.30976456, 0.7074133, 0.581795, 0.976677],
                             [0.69656056, 0.07199162, 0.4708506, 0.29117996]],
                            [[0.5829035, 0.30201727, 0.76556486, 0.0935446],
                             [0.88030535, 0.16129416, 0.9242525, 0.49545723]]],
                           [[[0.50899494, 0.06482804, 0.44939405, 0.37704808],
                             [0.47028124, 0.11969638, 0.62823206, 0.28560323]],
                            [[0.40690207, 0.689753, 0.51636654, 0.23040164],
                             [0.06935787, 0.00488842, 0.22462702, 0.09182382]],
                            [[0.26611632, 0.00184339, 0.7730655, 0.5228131],
                             [0.87776035, 0.77895886, 0.2787183, 0.16620636]],
                            [[0.502574, 0.04039001, 0.5368497, 0.98379374],
                             [0.40973026, 0.3238272, 0.9733018, 0.13988364]],
                            [[0.04586202, 0.20983845, 0.20662665, 0.22270602],
                             [0.60387236, 0.5155574, 0.51237285, 0.6528438]],
                            [[0.45735973, 0.86821306, 0.61054605, 0.8370336],
                             [0.45193362, 0.3734138, 0.7825672, 0.5699416]],
                            [[0.44591594, 0.12447512, 0.09282011, 0.7055254],
                             [0.25223452, 0.46696228, 0.7051136, 0.892151]],
                            [[0.49615085, 0.47321403, 0.93138885, 0.7652197],
                             [0.38766378, 0.30332977, 0.23131835,
                              0.02863514]]]]).float().cuda()
    points.requires_grad_()
    centers = torch.tensor([[[[0.83878064, 0.96658987, 0.8033424, 0.9598312],
                              [0.45035273, 0.8768925, 0.977736, 0.54547966]],
                             [[0.01041394, 0.597893, 0.36212963, 0.4410367],
                              [0.94879234, 0.8372817, 0.21237361, 0.67945415]],
                             [[0.5096087, 0.26401454, 0.60034937, 0.5417416],
                              [0.87591463, 0.546456, 0.4096033, 0.16373193]],
                             [[0.79547447, 0.1482386, 0.12840575, 0.45384115],
                              [0.5640288, 0.944541, 0.5745328, 0.73229736]],
                             [[0.93011934, 0.7406011, 0.62621707, 0.8677915],
                              [0.91563636, 0.3595413, 0.6678378, 0.6085383]],
                             [[0.22431666, 0.65617776, 0.7483924, 0.6263364],
                              [0.30968404, 0.78204364, 0.14899081,
                               0.09628749]],
                             [[0.73675203, 0.72104895, 0.4648038, 0.6101647],
                              [0.7817645, 0.16572917, 0.3311919, 0.43407398]],
                             [[0.8193154, 0.09559608, 0.05978829, 0.90262103],
                              [0.4256065, 0.8165596, 0.8206446, 0.6604721]]],
                            [[[0.7159653, 0.18600845, 0.21433902, 0.3159626],
                              [0.3921569, 0.33221376, 0.5061177, 0.7961841]],
                             [[0.95338356, 0.04785997, 0.67185795, 0.6538394],
                              [0.4729132, 0.33404195, 0.17750603, 0.8445621]],
                             [[0.6755793, 0.16193843, 0.75943846, 0.92123103],
                              [0.2781859, 0.03114432, 0.710638, 0.52729136]],
                             [[0.8376105, 0.10858494, 0.13208169, 0.365772],
                              [0.5930795, 0.27390373, 0.14036089, 0.170403]],
                             [[0.3479789, 0.89855295, 0.04844379, 0.9871029],
                              [0.29781651, 0.0244137, 0.9179047, 0.8081611]],
                             [[0.12460887, 0.44991326, 0.19382608, 0.35037738],
                              [0.2773472, 0.4362057, 0.36757517, 0.5993509]],
                             [[0.29630446, 0.90046406, 0.5417113, 0.13510644],
                              [0.09623539, 0.04226565, 0.32001644,
                               0.44358212]],
                             [[0.5274848, 0.82096446, 0.9415489, 0.7123748],
                              [0.7537517, 0.8086482, 0.85345286,
                               0.7472754]]]]).float().cuda()
    centers.requires_grad_()
    knn_idx = torch.tensor([[[6, 7, 4, 6], [2, 4, 2, 4]],
                            [[7, 1, 3, 2], [6, 0, 2, 6]]]).long().cuda()
    aggregate = 'sum'
    expected_output = torch.tensor(
        [[[[-0.08134781, 0.03877336, -0.8212776, -0.2869547],
           [-0.23378491, -0.24112664, -0.1600166, -0.4121864]],
          [[-0.05780616, -0.12298299, -0.0370461, -0.07889931],
           [-0.13956165, -0.02006848, -0.10940295, -0.0293439]],
          [[0.09284145, 0.58250105, 0.5927749, 0.16774094],
           [0.27070042, 0.13422406, 0.2617501, 0.23416464]],
          [[-0.06121218, -0.09561322, -0.20408826, 0.08079343],
           [0.00944228, 0.03874819, 0.08404065, 0.04041629]]],
         [[[-0.2110898, -0.13335688, -0.09315082, 0.08512095],
           [0.09121774, 0.15976946, 0.23994486, 0.14350912]],
          [[-0.36167958, -0.14891288, -0.64470863, -0.0646704],
           [-0.28276974, -0.08847666, -0.46904767, 0.20491874]],
          [[-0.34877953, -0.35533834, -0.25225785, -0.4638189],
           [-0.1420663, 0.09467781, 0.17088932, 0.22580585]],
          [[-0.3879708, -0.3991068, 0.05276498, -0.46989647],
           [0.32522714, -0.02163534, 0.21604237, 0.4346682]]]]).float()

    # test forward
    output = assign_score_withk(scores, points, centers, knn_idx, aggregate)
    assert torch.allclose(output.detach().cpu(), expected_output, atol=1e-6)

    # test backward
    loss = output.sum()
    loss.backward()
    expected_scores_grad = torch.tensor([[[[0.04288036, -0.18217683],
                                           [-0.78873926, 0.7485497],
                                           [-0.6866992, 0.05346543],
                                           [0.04288036, -0.18217683]],
                                          [[-1.1407862, 0.13533896],
                                           [-0.06964391, -0.22948086],
                                           [-1.1407862, 0.13533896],
                                           [-0.06964391, -0.22948086]]],
                                         [[[-0.3363995, -2.212181],
                                           [-1.1589496, -2.7724311],
                                           [-0.9387654, -1.3163853],
                                           [-1.4385346, -1.0614843]],
                                          [[-0.5048497, 1.4143617],
                                           [-0.47332114, 0.6017133],
                                           [-0.30974793, 1.1995442],
                                           [-0.5048497, 1.4143617]]]]).float()
    expected_points_grad = torch.tensor(
        [[[[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0.15585709, 0.15585709, 0.15585709, 0.15585709],
           [1.1893613, 1.1893613, 1.1893613, 1.1893613]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[1.6530733, 1.6530733, 1.6530733, 1.6530733],
           [1.8130021, 1.8130021, 1.8130021, 1.8130021]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0.58863074, 0.58863074, 0.58863074, 0.58863074],
           [1.3727596, 1.3727596, 1.3727596, 1.3727596]],
          [[0.28462553, 0.28462553, 0.28462553, 0.28462553],
           [0.8378516, 0.8378516, 0.8378516, 0.8378516]]],
         [[[0.13817799, 0.13817799, 0.13817799, 0.13817799],
           [0.34856772, 0.34856772, 0.34856772, 0.34856772]],
          [[0.7405102, 0.7405102, 0.7405102, 0.7405102],
           [0.06438422, 0.06438422, 0.06438422, 0.06438422]],
          [[0.8491963, 0.8491963, 0.8491963, 0.8491963],
           [1.1301711, 1.1301711, 1.1301711, 1.1301711]],
          [[0.6887394, 0.6887394, 0.6887394, 0.6887394],
           [0.22089851, 0.22089851, 0.22089851, 0.22089851]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0.605832, 0.605832, 0.605832, 0.605832],
           [0.92364264, 0.92364264, 0.92364264, 0.92364264]],
          [[0.23089725, 0.23089725, 0.23089725, 0.23089725],
           [0.5568468, 0.5568468, 0.5568468, 0.5568468]]]]).float()
    expected_centers_grad = torch.tensor(
        [[[[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[-1.0493311, -1.0493311, -1.0493311, -1.0493311],
           [-2.0301602, -2.0301602, -2.0301602, -2.0301602]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[-1.6328557, -1.6328557, -1.6328557, -1.6328557],
           [-3.1828144, -3.1828144, -3.1828144, -3.1828144]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]]],
         [[[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[0., 0., 0., 0.], [0., 0., 0., 0.]],
          [[-1.5429721, -1.5429721, -1.5429721, -1.5429721],
           [-1.6100934, -1.6100934, -1.6100934, -1.6100934]],
          [[-1.7103812, -1.7103812, -1.7103812, -1.7103812],
           [-1.6344175, -1.6344175, -1.6344175, -1.6344175]]]]).float()
    assert torch.allclose(
        scores.grad.detach().cpu(), expected_scores_grad, atol=1e-6)
    assert torch.allclose(
        points.grad.detach().cpu(), expected_points_grad, atol=1e-6)
    assert torch.allclose(
        centers.grad.detach().cpu(), expected_centers_grad, atol=1e-6)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_ball_query.py
================================================
import pytest
import torch

from mmcv.ops import ball_query


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_ball_query():
    new_xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625],
                             [-2.2769, 2.7817, -0.2334],
                             [-0.4003, 2.4666, -0.5116],
                             [-0.0740, 1.3147, -1.3625],
                             [-0.0740, 1.3147, -1.3625]],
                            [[-2.0289, 2.4952, -0.1708],
                             [-2.0668, 6.0278, -0.4875],
                             [0.4066, 1.4211, -0.2947],
                             [-2.0289, 2.4952, -0.1708],
                             [-2.0289, 2.4952, -0.1708]]]).cuda()

    xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634],
                         [-0.4003, 2.4666,
                          -0.5116], [-0.5251, 2.4379, -0.8466],
                         [-0.9691, 1.1418,
                          -1.3733], [-0.2232, 0.9561, -1.3626],
                         [-2.2769, 2.7817, -0.2334],
                         [-0.2822, 1.3192, -1.3645], [0.1533, 1.5024, -1.0432],
                         [0.4917, 1.1529, -1.3496]],
                        [[-2.0289, 2.4952,
                          -0.1708], [-0.7188, 0.9956, -0.5096],
                         [-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610],
                         [0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791],
                         [-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947],
                         [0.3220, 1.4447, 0.3548], [-0.9744, 2.3856,
                                                    -1.2000]]]).cuda()

    idx = ball_query(0, 0.2, 5, xyz, new_xyz)
    expected_idx = torch.tensor([[[0, 0, 0, 0, 0], [6, 6, 6, 6, 6],
                                  [2, 2, 2, 2, 2], [0, 0, 0, 0, 0],
                                  [0, 0, 0, 0, 0]],
                                 [[0, 0, 0, 0, 0], [2, 2, 2, 2, 2],
                                  [7, 7, 7, 7, 7], [0, 0, 0, 0, 0],
                                  [0, 0, 0, 0, 0]]]).cuda()
    assert torch.all(idx == expected_idx)

    # test dilated ball query
    idx = ball_query(0.2, 0.4, 5, xyz, new_xyz)
    expected_idx = torch.tensor([[[0, 5, 7, 0, 0], [6, 6, 6, 6, 6],
                                  [2, 3, 2, 2, 2], [0, 5, 7, 0, 0],
                                  [0, 5, 7, 0, 0]],
                                 [[0, 0, 0, 0, 0], [2, 2, 2, 2, 2],
                                  [7, 7, 7, 7, 7], [0, 0, 0, 0, 0],
                                  [0, 0, 0, 0, 0]]]).cuda()
    assert torch.all(idx == expected_idx)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_bbox.py
================================================
import numpy as np
import pytest
import torch


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
class TestBBox(object):

    def _test_bbox_overlaps(self, dtype=torch.float):

        from mmcv.ops import bbox_overlaps
        b1 = torch.tensor([[1.0, 1.0, 3.0, 4.0], [2.0, 2.0, 3.0, 4.0],
                           [7.0, 7.0, 8.0, 8.0]]).cuda().type(dtype)
        b2 = torch.tensor([[0.0, 2.0, 2.0, 5.0], [2.0, 1.0, 3.0,
                                                  3.0]]).cuda().type(dtype)
        should_output = np.array([[0.33333334, 0.5], [0.2, 0.5], [0.0, 0.0]])
        out = bbox_overlaps(b1, b2, offset=1)
        assert np.allclose(out.cpu().numpy(), should_output, 1e-2)

        b1 = torch.tensor([[1.0, 1.0, 3.0, 4.0], [2.0, 2.0, 3.0,
                                                  4.0]]).cuda().type(dtype)
        b2 = torch.tensor([[0.0, 2.0, 2.0, 5.0], [2.0, 1.0, 3.0,
                                                  3.0]]).cuda().type(dtype)
        should_output = np.array([0.33333334, 0.5])
        out = bbox_overlaps(b1, b2, aligned=True, offset=1)
        assert np.allclose(out.cpu().numpy(), should_output, 1e-2)

        b1 = torch.tensor([[0.0, 0.0, 3.0, 3.0]]).cuda().type(dtype)
        b1 = torch.tensor([[0.0, 0.0, 3.0, 3.0]]).cuda().type(dtype)
        b2 = torch.tensor([[4.0, 0.0, 5.0, 3.0], [3.0, 0.0, 4.0, 3.0],
                           [2.0, 0.0, 3.0, 3.0], [1.0, 0.0, 2.0,
                                                  3.0]]).cuda().type(dtype)
        should_output = np.array([0, 0.2, 0.5, 0.5])
        out = bbox_overlaps(b1, b2, offset=1)
        assert np.allclose(out.cpu().numpy(), should_output, 1e-2)

    def test_bbox_overlaps_float(self):
        self._test_bbox_overlaps(torch.float)

    def test_bbox_overlaps_half(self):
        self._test_bbox_overlaps(torch.half)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_bilinear_grid_sample.py
================================================
import numpy as np
import torch
import torch.nn.functional as F


class TestBilinearGridSample(object):

    def _test_bilinear_grid_sample(self,
                                   dtype=torch.float,
                                   align_corners=False,
                                   multiplier=1,
                                   precision=1e-3):
        from mmcv.ops.point_sample import bilinear_grid_sample

        input = torch.rand(1, 1, 20, 20, dtype=dtype)
        grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
        grid = F.affine_grid(
            grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input)
        grid *= multiplier

        out = bilinear_grid_sample(input, grid, align_corners=align_corners)
        ref_out = F.grid_sample(input, grid, align_corners=align_corners)

        assert np.allclose(out.data.detach().cpu().numpy(),
                           ref_out.data.detach().cpu().numpy(), precision)

    def test_bilinear_grid_sample(self):
        self._test_bilinear_grid_sample(torch.double, False)
        self._test_bilinear_grid_sample(torch.double, True)
        self._test_bilinear_grid_sample(torch.float, False)
        self._test_bilinear_grid_sample(torch.float, True)
        self._test_bilinear_grid_sample(torch.float, False)
        self._test_bilinear_grid_sample(torch.float, True, 5)
        self._test_bilinear_grid_sample(torch.float, False, 10)
        self._test_bilinear_grid_sample(torch.float, True, -6)
        self._test_bilinear_grid_sample(torch.float, False, -10)
        self._test_bilinear_grid_sample(torch.double, True, 5)
        self._test_bilinear_grid_sample(torch.double, False, 10)
        self._test_bilinear_grid_sample(torch.double, True, -6)
        self._test_bilinear_grid_sample(torch.double, False, -10)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_border_align.py
================================================
import copy

import numpy as np
import pytest
import torch

# [1,4c,h,w]
input_arr = [[[[1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11., 12.]],
              [[6, 7, 5, 8], [2, 1, 3, 4], [12, 9, 11, 10]],
              [[-2, -3, 2, 0], [-4, -5, 1, -1], [-1, -1, -1, -1]],
              [[0, -1, 2, 1], [-4, -3, -2, -1], [-1, -2, -3, -4]]]]
# [1,h*w,4]
boxes_arr = [[[0, 0, 2, 1], [1, 0, 3, 1], [1, 0, 2, 1], [0, 0, 3, 1],
              [0, 0, 1, 2], [0, 0, 2, 2], [1, 0, 2, 1], [1, 0, 3, 1],
              [0, 1, 1, 2], [0, 0, 3, 2], [1, 0, 3, 2], [2, 0, 3, 2]]]
output_dict = {
    # [1,c,h*w,4] for each value,
    # the output is manually checked for its correctness

    # pool_size=1
    1: [[[[3., 6., 1., 2.], [4., 7., -1., 1.], [3., 7., 1., 2.],
          [4., 6., -1., 1.], [2., 12., -1., -1.], [3., 12., -1., 2.],
          [3., 7., 1., 2.], [4., 7., -1., 1.], [6., 12., -1., -2.],
          [4., 12., -1., 1.], [4., 9., -1., 1.], [4., 11., -1., 1.]]]],

    # pool_size=2
    2: [[[[3., 6., 1., 2.], [4., 7., 1., 1.], [3., 7., 1., 2.],
          [4., 6., -1., 1.], [2., 12., -1., -1.], [3., 12., -1., 2.],
          [3., 7., 1., 2.], [4., 7., 1., 1.], [6., 12., -1., -2.],
          [4., 12., -1., 1.], [4., 9., -1., 1.], [4., 11., -1., 1.]]]],
}
input_grad_dict = {
    # [1,4c,h,w] for each value
    # the grad is manually checked for its correctness

    # pool_size=1
    1: [[[[0., 1., 4., 6.], [0., 1., 0., 0.], [0., 0., 0., 0.]],
         [[2., 4., 0., 0.], [0., 0., 0., 0.], [4., 1., 1., 0.]],
         [[0., 0., 0., 0.], [0., 0., 3., 3.], [0., 2., 1., 3.]],
         [[0., 1., 4., 6.], [0., 0., 0., 0.], [0., 1., 0., 0.]]]],

    # pool_size=2
    2: [[[[0., 1., 4., 6.], [0., 1., 0., 0.], [0., 0., 0., 0.]],
         [[2., 4., 0., 0.], [0., 0., 0., 0.], [4., 1., 1., 0.]],
         [[0., 0., 0., 0.], [0., 0., 5., 1.], [0., 2., 1., 3.]],
         [[0., 1., 4., 6.], [0., 0., 0., 0.], [0., 1., 0., 0.]]]],
}


def _test_border_align_allclose(device, dtype, pool_size):
    if not torch.cuda.is_available() and device == 'cuda':
        pytest.skip('test requires GPU')
    try:
        from mmcv.ops import border_align, BorderAlign
    except ModuleNotFoundError:
        pytest.skip('BorderAlign op is not successfully compiled')

    np_input = np.array(input_arr)
    np_boxes = np.array(boxes_arr)
    np_output = np.array(output_dict[pool_size])
    np_grad = np.array(input_grad_dict[pool_size])

    input = torch.tensor(
        np_input, dtype=dtype, device=device, requires_grad=True)
    boxes = torch.tensor(np_boxes, dtype=dtype, device=device)

    # test for border_align
    input_cp = copy.deepcopy(input)
    output = border_align(input_cp, boxes, pool_size)
    output.backward(torch.ones_like(output))
    assert np.allclose(
        output.data.type(dtype).cpu().numpy(), np_output, atol=1e-5)
    assert np.allclose(
        input_cp.grad.data.type(dtype).cpu().numpy(), np_grad, atol=1e-5)

    # test for BorderAlign
    pool_module = BorderAlign(pool_size)
    output = pool_module(input, boxes)
    output.backward(torch.ones_like(output))
    assert np.allclose(
        output.data.type(dtype).cpu().numpy(), np_output, atol=1e-5)
    assert np.allclose(
        input.grad.data.type(dtype).cpu().numpy(), np_grad, atol=1e-5)


@pytest.mark.parametrize('device', ['cuda'])
@pytest.mark.parametrize('dtype', [torch.float, torch.half, torch.double])
@pytest.mark.parametrize('pool_size', [1, 2])
def test_border_align(device, dtype, pool_size):
    _test_border_align_allclose(device, dtype, pool_size)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_box_iou_rotated.py
================================================
import numpy as np
import pytest
import torch


class TestBoxIoURotated(object):

    def test_box_iou_rotated_cpu(self):
        from mmcv.ops import box_iou_rotated
        np_boxes1 = np.asarray(
            [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6],
             [7.0, 7.0, 8.0, 8.0, 0.4]],
            dtype=np.float32)
        np_boxes2 = np.asarray(
            [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5],
             [5.0, 5.0, 6.0, 7.0, 0.4]],
            dtype=np.float32)
        np_expect_ious = np.asarray(
            [[0.3708, 0.4351, 0.0000], [0.1104, 0.4487, 0.0424],
             [0.0000, 0.0000, 0.3622]],
            dtype=np.float32)
        np_expect_ious_aligned = np.asarray([0.3708, 0.4487, 0.3622],
                                            dtype=np.float32)

        boxes1 = torch.from_numpy(np_boxes1)
        boxes2 = torch.from_numpy(np_boxes2)

        # test cw angle definition
        ious = box_iou_rotated(boxes1, boxes2)
        assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4)

        ious = box_iou_rotated(boxes1, boxes2, aligned=True)
        assert np.allclose(
            ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4)

        # test ccw angle definition
        boxes1[..., -1] *= -1
        boxes2[..., -1] *= -1
        ious = box_iou_rotated(boxes1, boxes2, clockwise=False)
        assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4)

        ious = box_iou_rotated(boxes1, boxes2, aligned=True, clockwise=False)
        assert np.allclose(
            ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4)

    @pytest.mark.skipif(
        not torch.cuda.is_available(), reason='requires CUDA support')
    def test_box_iou_rotated_cuda(self):
        from mmcv.ops import box_iou_rotated
        np_boxes1 = np.asarray(
            [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6],
             [7.0, 7.0, 8.0, 8.0, 0.4]],
            dtype=np.float32)
        np_boxes2 = np.asarray(
            [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5],
             [5.0, 5.0, 6.0, 7.0, 0.4]],
            dtype=np.float32)
        np_expect_ious = np.asarray(
            [[0.3708, 0.4351, 0.0000], [0.1104, 0.4487, 0.0424],
             [0.0000, 0.0000, 0.3622]],
            dtype=np.float32)
        np_expect_ious_aligned = np.asarray([0.3708, 0.4487, 0.3622],
                                            dtype=np.float32)

        boxes1 = torch.from_numpy(np_boxes1).cuda()
        boxes2 = torch.from_numpy(np_boxes2).cuda()

        # test cw angle definition
        ious = box_iou_rotated(boxes1, boxes2)
        assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4)

        ious = box_iou_rotated(boxes1, boxes2, aligned=True)
        assert np.allclose(
            ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4)

        # test ccw angle definition
        boxes1[..., -1] *= -1
        boxes2[..., -1] *= -1
        ious = box_iou_rotated(boxes1, boxes2, clockwise=False)
        assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4)

        ious = box_iou_rotated(boxes1, boxes2, aligned=True, clockwise=False)
        assert np.allclose(
            ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4)

    def test_box_iou_rotated_iof_cpu(self):
        from mmcv.ops import box_iou_rotated
        np_boxes1 = np.asarray(
            [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6],
             [7.0, 7.0, 8.0, 8.0, 0.4]],
            dtype=np.float32)
        np_boxes2 = np.asarray(
            [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5],
             [5.0, 5.0, 6.0, 7.0, 0.4]],
            dtype=np.float32)
        np_expect_ious = np.asarray(
            [[0.4959, 0.5306, 0.0000], [0.1823, 0.5420, 0.1832],
             [0.0000, 0.0000, 0.4404]],
            dtype=np.float32)
        np_expect_ious_aligned = np.asarray([0.4959, 0.5420, 0.4404],
                                            dtype=np.float32)

        boxes1 = torch.from_numpy(np_boxes1)
        boxes2 = torch.from_numpy(np_boxes2)

        # test cw angle definition
        ious = box_iou_rotated(boxes1, boxes2, mode='iof')
        assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4)
        ious = box_iou_rotated(boxes1, boxes2, mode='iof', aligned=True)
        assert np.allclose(
            ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4)

        # test ccw angle definition
        boxes1[..., -1] *= -1
        boxes2[..., -1] *= -1
        ious = box_iou_rotated(boxes1, boxes2, mode='iof', clockwise=False)
        assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4)
        ious = box_iou_rotated(
            boxes1, boxes2, mode='iof', aligned=True, clockwise=False)
        assert np.allclose(
            ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4)

    @pytest.mark.skipif(
        not torch.cuda.is_available(), reason='requires CUDA support')
    def test_box_iou_rotated_iof_cuda(self):
        from mmcv.ops import box_iou_rotated
        np_boxes1 = np.asarray(
            [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6],
             [7.0, 7.0, 8.0, 8.0, 0.4]],
            dtype=np.float32)
        np_boxes2 = np.asarray(
            [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5],
             [5.0, 5.0, 6.0, 7.0, 0.4]],
            dtype=np.float32)
        np_expect_ious = np.asarray(
            [[0.4959, 0.5306, 0.0000], [0.1823, 0.5420, 0.1832],
             [0.0000, 0.0000, 0.4404]],
            dtype=np.float32)
        np_expect_ious_aligned = np.asarray([0.4959, 0.5420, 0.4404],
                                            dtype=np.float32)

        boxes1 = torch.from_numpy(np_boxes1).cuda()
        boxes2 = torch.from_numpy(np_boxes2).cuda()

        # test cw angle definition
        ious = box_iou_rotated(boxes1, boxes2, mode='iof')
        assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4)

        ious = box_iou_rotated(boxes1, boxes2, mode='iof', aligned=True)
        assert np.allclose(
            ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4)

        # test ccw angle definition
        boxes1[..., -1] *= -1
        boxes2[..., -1] *= -1
        ious = box_iou_rotated(boxes1, boxes2, mode='iof', clockwise=False)
        assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4)

        ious = box_iou_rotated(
            boxes1, boxes2, mode='iof', aligned=True, clockwise=False)
        assert np.allclose(
            ious.cpu().numpy(), np_expect_ious_aligned, atol=1e-4)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_carafe.py
================================================
import torch
from torch.autograd import gradcheck


class TestCarafe(object):

    def test_carafe_naive_gradcheck(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import CARAFENaive
        feat = torch.randn(
            2, 64, 3, 3, requires_grad=True, device='cuda').double()
        mask = torch.randn(
            2, 100, 6, 6, requires_grad=True,
            device='cuda').sigmoid().double()
        gradcheck(CARAFENaive(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4)

    def test_carafe_gradcheck(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import CARAFE
        feat = torch.randn(
            2, 64, 3, 3, requires_grad=True, device='cuda').double()
        mask = torch.randn(
            2, 100, 6, 6, requires_grad=True,
            device='cuda').sigmoid().double()
        gradcheck(CARAFE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_cc_attention.py
================================================
import numpy as np
import torch
import torch.nn as nn


class Loss(nn.Module):

    def __init__(self):
        super().__init__()

    def forward(self, input, target):
        input = input.view(-1)
        target = target.view(-1)
        return torch.mean(input - target)


class TestCrissCrossAttention(object):

    def test_cc_attention(self):
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

        from mmcv.ops import CrissCrossAttention
        loss_func = Loss()

        input = np.fromfile(
            'tests/data/for_ccattention/ccattention_input.bin',
            dtype=np.float32)
        output = np.fromfile(
            'tests/data/for_ccattention/ccattention_output.bin',
            dtype=np.float32)
        input = input.reshape((1, 32, 45, 45))
        output = output.reshape((1, 32, 45, 45))
        label = torch.ones((1, 32, 45, 45))

        input = torch.FloatTensor(input)
        output = torch.FloatTensor(output)

        input.requires_grad = True

        shape = input.shape
        channel = shape[1]

        cca = CrissCrossAttention(channel)
        cca.to(device)
        input = input.to(device)
        label = label.to(device)
        cca.train()
        test_output = cca(input)
        test_loss = loss_func(test_output, label)
        test_loss.backward()
        test_output = test_output.detach().cpu().numpy()
        output = output.numpy()

        assert np.allclose(test_output, output)
        assert test_output.shape == shape


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_contour_expand.py
================================================
import numpy as np
import torch


def test_contour_expand():
    from mmcv.ops import contour_expand

    np_internal_kernel_label = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                         [0, 0, 1, 1, 0, 0, 0, 0, 2, 0],
                                         [0, 0, 1, 1, 0, 0, 0, 0, 2, 0],
                                         [0, 0, 1, 1, 0, 0, 0, 0, 2, 0],
                                         [0, 0, 1, 1, 0, 0, 0, 0, 2, 0],
                                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 0, 0, 0, 0,
                                          0]]).astype(np.int32)
    np_kernel_mask1 = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 1, 1, 1, 1, 1, 1, 1, 0],
                                [0, 0, 1, 1, 1, 1, 1, 1, 1, 0],
                                [0, 0, 1, 1, 1, 1, 1, 1, 1, 0],
                                [0, 0, 1, 1, 1, 1, 1, 1, 1, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0,
                                 0]]).astype(np.uint8)
    np_kernel_mask2 = (np_internal_kernel_label > 0).astype(np.uint8)

    np_kernel_mask = np.stack([np_kernel_mask1, np_kernel_mask2])
    min_area = 1
    kernel_region_num = 3
    result = contour_expand(np_kernel_mask, np_internal_kernel_label, min_area,
                            kernel_region_num)
    gt = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 2, 2, 2, 0],
          [0, 0, 1, 1, 1, 1, 2, 2, 2, 0], [0, 0, 1, 1, 1, 1, 2, 2, 2, 0],
          [0, 0, 1, 1, 1, 1, 2, 2, 2, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
    assert np.allclose(result, gt)

    np_kernel_mask_t = torch.from_numpy(np_kernel_mask)
    np_internal_kernel_label_t = torch.from_numpy(np_internal_kernel_label)
    result = contour_expand(np_kernel_mask_t, np_internal_kernel_label_t,
                            min_area, kernel_region_num)
    assert np.allclose(result, gt)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_convex_iou.py
================================================
import numpy as np
import pytest
import torch

from mmcv.ops import convex_giou, convex_iou

np_pointsets = np.asarray([[
    1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 1.0, 2.0, 3.0, 3.0,
    2.0, 1.5, 1.5
],
                           [
                               1.5, 1.5, 2.5, 2.5, 1.5, 2.5, 2.5, 1.5, 1.5,
                               3.5, 3.5, 1.5, 2.5, 3.5, 3.5, 2.5, 2.0, 2.0
                           ]])

np_polygons = np.asarray([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0],
                          [1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 1.0]])

np_expected_iou = np.asarray([[0.2857, 0.8750], [0.0588, 0.4286]])

np_expected_giou = np.asarray([0.2857, 0.3831])

np_expected_grad = np.asarray([[
    0.0204, 0.0408, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0612,
    -0.0408, -0.0408, 0.0816, -0.0408, -0.0816, -0.0816, -0.0408, 0.0000,
    0.0000
],
                               [
                                   -0.1848, -0.1848, 0.0000, 0.0000, 0.0000,
                                   0.0000, 0.0000, 0.0000, -0.1076, -0.0801,
                                   -0.0801, -0.1076, -0.0367, -0.0734, -0.0734,
                                   -0.0367, 0.0000, 0.0000
                               ]])


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_convex_iou():
    pointsets = torch.from_numpy(np_pointsets).cuda().float()
    polygons = torch.from_numpy(np_polygons).cuda().float()
    expected_iou = torch.from_numpy(np_expected_iou).cuda().float()
    assert torch.allclose(
        convex_iou(pointsets, polygons), expected_iou, atol=1e-3)


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_convex_giou():
    pointsets = torch.from_numpy(np_pointsets).cuda().float()
    polygons = torch.from_numpy(np_polygons).cuda().float()
    expected_giou = torch.from_numpy(np_expected_giou).cuda().float()
    expected_grad = torch.from_numpy(np_expected_grad).cuda().float()
    giou, grad = convex_giou(pointsets, polygons)
    assert torch.allclose(giou, expected_giou, atol=1e-3)
    assert torch.allclose(grad, expected_grad, atol=1e-3)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_corner_pool.py
================================================
"""
CommandLine:
    pytest tests/test_corner_pool.py
"""
import pytest
import torch

from mmcv.ops import CornerPool


def test_corner_pool_device_and_dtypes_cpu():
    """
    CommandLine:
        xdoctest -m tests/test_corner_pool.py \
            test_corner_pool_device_and_dtypes_cpu
    """
    with pytest.raises(AssertionError):
        # pool mode must in ['bottom', 'left', 'right', 'top']
        pool = CornerPool('corner')

    lr_tensor = torch.tensor([[[[0, 0, 0, 0, 0], [2, 1, 3, 0, 2],
                                [5, 4, 1, 1, 6], [0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0]]]])
    tb_tensor = torch.tensor([[[[0, 3, 1, 0, 0], [0, 1, 1, 0, 0],
                                [0, 3, 4, 0, 0], [0, 2, 2, 0, 0],
                                [0, 0, 2, 0, 0]]]])
    # Left Pool
    left_answer = torch.tensor([[[[0, 0, 0, 0, 0], [3, 3, 3, 2, 2],
                                  [6, 6, 6, 6, 6], [0, 0, 0, 0, 0],
                                  [0, 0, 0, 0, 0]]]])
    pool = CornerPool('left')
    left_tensor = pool(lr_tensor)
    assert left_tensor.type() == lr_tensor.type()
    assert torch.equal(left_tensor, left_answer)
    # Right Pool
    right_answer = torch.tensor([[[[0, 0, 0, 0, 0], [2, 2, 3, 3, 3],
                                   [5, 5, 5, 5, 6], [0, 0, 0, 0, 0],
                                   [0, 0, 0, 0, 0]]]])
    pool = CornerPool('right')
    right_tensor = pool(lr_tensor)
    assert right_tensor.type() == lr_tensor.type()
    assert torch.equal(right_tensor, right_answer)
    # Top Pool
    top_answer = torch.tensor([[[[0, 3, 4, 0, 0], [0, 3, 4, 0, 0],
                                 [0, 3, 4, 0, 0], [0, 2, 2, 0, 0],
                                 [0, 0, 2, 0, 0]]]])
    pool = CornerPool('top')
    top_tensor = pool(tb_tensor)
    assert top_tensor.type() == tb_tensor.type()
    assert torch.equal(top_tensor, top_answer)
    # Bottom Pool
    bottom_answer = torch.tensor([[[[0, 3, 1, 0, 0], [0, 3, 1, 0, 0],
                                    [0, 3, 4, 0, 0], [0, 3, 4, 0, 0],
                                    [0, 3, 4, 0, 0]]]])
    pool = CornerPool('bottom')
    bottom_tensor = pool(tb_tensor)
    assert bottom_tensor.type() == tb_tensor.type()
    assert torch.equal(bottom_tensor, bottom_answer)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_correlation.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch

from mmcv.ops import Correlation

_input1 = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
_input2 = [[[[1., 2., 3.], [3., 1., 2.], [8., 5., 2.]]]]

gt_out_shape = (1, 1, 1, 3, 3)
_gt_out = [[[[[1., 4., 9.], [0., 1., 4.], [24., 25., 4.]]]]]
gt_input1_grad = [[[[1., 2., 3.], [3., 1., 2.], [8., 5., 2.]]]]


def assert_equal_tensor(tensor_a, tensor_b):

    assert tensor_a.eq(tensor_b).all()


class TestCorrelation:

    def _test_correlation(self, dtype=torch.float):

        layer = Correlation(max_displacement=0)

        input1 = torch.tensor(_input1, dtype=dtype).cuda()
        input2 = torch.tensor(_input2, dtype=dtype).cuda()
        input1.requires_grad = True
        input2.requires_grad = True
        out = layer(input1, input2)
        out.backward(torch.ones_like(out))

        # `eq_cpu` is not implemented for 'Half' in torch1.5.0,
        # so we need to make a comparison for cuda tensor
        # rather than cpu tensor
        gt_out = torch.tensor(_gt_out, dtype=dtype).cuda()
        assert_equal_tensor(out, gt_out)
        assert_equal_tensor(input1.grad.detach(), input2)
        assert_equal_tensor(input2.grad.detach(), input1)

    @pytest.mark.skipif(
        not torch.cuda.is_available(), reason='requires CUDA support')
    def test_correlation(self):
        self._test_correlation(torch.float)
        self._test_correlation(torch.double)
        self._test_correlation(torch.half)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_deform_conv.py
================================================
import numpy as np
import pytest
import torch

from mmcv.utils import TORCH_VERSION, digit_version

try:
    # If PyTorch version >= 1.6.0 and fp16 is enabled, torch.cuda.amp.autocast
    # would be imported and used; we should test if our modules support it.
    from torch.cuda.amp import autocast
except ImportError:
    pass

input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]],
                 [[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]],
                 [[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]],
                 [[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]]
offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7]
deform_weight = [[[0.4, 0.2, 0.1, 0.9]]]

gt_out = [[[[1.650, 0.], [0.000, 0.]]]]
gt_x_grad = [[[[-0.666, 0.204, 0.000], [0.030, -0.416, 0.012],
               [0.000, 0.252, 0.129]]]]
gt_offset_weight_grad = [[[[1.44, 2.88], [0.00, 1.44]]],
                         [[[-0.72, -1.44], [0.00, -0.72]]],
                         [[[0.00, 0.00], [0.00, 0.00]]],
                         [[[0.00, 0.00], [0.00, 0.00]]],
                         [[[-0.10, -0.20], [0.00, -0.10]]],
                         [[[-0.08, -0.16], [0.00, -0.08]]],
                         [[[-0.54, -1.08], [0.00, -0.54]]],
                         [[[-0.54, -1.08], [0.00, -0.54]]]]
gt_offset_bias_grad = [1.44, -0.72, 0., 0., -0.10, -0.08, -0.54, -0.54],
gt_deform_weight_grad = [[[[3.62, 0.], [0.40, 0.18]]]]


class TestDeformconv(object):

    def _test_deformconv(self,
                         dtype=torch.float,
                         threshold=1e-3,
                         device='cuda',
                         batch_size=10,
                         im2col_step=2):
        if not torch.cuda.is_available() and device == 'cuda':
            pytest.skip('test requires GPU')
        from mmcv.ops import DeformConv2dPack
        c_in = 1
        c_out = 1
        batch_size = 10
        repeated_input = np.repeat(input, batch_size, axis=0)
        repeated_gt_out = np.repeat(gt_out, batch_size, axis=0)
        repeated_gt_x_grad = np.repeat(gt_x_grad, batch_size, axis=0)
        x = torch.tensor(repeated_input, device=device, dtype=dtype)
        x.requires_grad = True
        model = DeformConv2dPack(
            in_channels=c_in,
            out_channels=c_out,
            kernel_size=2,
            stride=1,
            padding=0,
            im2col_step=im2col_step)
        model.conv_offset.weight.data = torch.nn.Parameter(
            torch.Tensor(offset_weight).reshape(8, 1, 2, 2))
        model.conv_offset.bias.data = torch.nn.Parameter(
            torch.Tensor(offset_bias).reshape(8))
        model.weight.data = torch.nn.Parameter(
            torch.Tensor(deform_weight).reshape(1, 1, 2, 2))
        if device == 'cuda':
            model.cuda()
        model.type(dtype)

        out = model(x)
        out.backward(torch.ones_like(out))

        assert np.allclose(out.data.detach().cpu().numpy(), repeated_gt_out,
                           threshold)
        assert np.allclose(x.grad.detach().cpu().numpy(), repeated_gt_x_grad,
                           threshold)
        # the batch size of the input is increased which results in
        # a larger gradient so we need to divide by the batch_size
        assert np.allclose(
            model.conv_offset.weight.grad.detach().cpu().numpy() / batch_size,
            gt_offset_weight_grad, threshold)
        assert np.allclose(
            model.conv_offset.bias.grad.detach().cpu().numpy() / batch_size,
            gt_offset_bias_grad, threshold)
        assert np.allclose(
            model.weight.grad.detach().cpu().numpy() / batch_size,
            gt_deform_weight_grad, threshold)

        from mmcv.ops import DeformConv2d

        # test bias
        model = DeformConv2d(1, 1, 2, stride=1, padding=0)
        assert not hasattr(model, 'bias')
        # test bias=True
        with pytest.raises(AssertionError):
            model = DeformConv2d(1, 1, 2, stride=1, padding=0, bias=True)
        # test in_channels % group != 0
        with pytest.raises(AssertionError):
            model = DeformConv2d(3, 2, 3, groups=2)
        # test out_channels % group != 0
        with pytest.raises(AssertionError):
            model = DeformConv2d(3, 4, 3, groups=3)

    def _test_amp_deformconv(self,
                             input_dtype,
                             threshold=1e-3,
                             batch_size=10,
                             im2col_step=2):
        """The function to test amp released on pytorch 1.6.0.

        The type of input data might be torch.float or torch.half,
        so we should test deform_conv in both cases. With amp, the
        data type of model will NOT be set manually.

        Args:
            input_dtype: torch.float or torch.half.
            threshold: the same as above function.
        """
        if not torch.cuda.is_available():
            return
        from mmcv.ops import DeformConv2dPack
        c_in = 1
        c_out = 1
        repeated_input = np.repeat(input, batch_size, axis=0)
        repeated_gt_out = np.repeat(gt_out, batch_size, axis=0)
        repeated_gt_x_grad = np.repeat(gt_x_grad, batch_size, axis=0)
        x = torch.Tensor(repeated_input).cuda().type(input_dtype)
        x.requires_grad = True
        model = DeformConv2dPack(
            in_channels=c_in,
            out_channels=c_out,
            kernel_size=2,
            stride=1,
            padding=0,
            im2col_step=im2col_step)
        model.conv_offset.weight.data = torch.nn.Parameter(
            torch.Tensor(offset_weight).reshape(8, 1, 2, 2))
        model.conv_offset.bias.data = torch.nn.Parameter(
            torch.Tensor(offset_bias).reshape(8))
        model.weight.data = torch.nn.Parameter(
            torch.Tensor(deform_weight).reshape(1, 1, 2, 2))
        model.cuda()

        out = model(x)
        out.backward(torch.ones_like(out))

        assert np.allclose(out.data.detach().cpu().numpy(), repeated_gt_out,
                           threshold)
        assert np.allclose(x.grad.detach().cpu().numpy(), repeated_gt_x_grad,
                           threshold)
        assert np.allclose(
            model.conv_offset.weight.grad.detach().cpu().numpy() / batch_size,
            gt_offset_weight_grad, threshold)
        assert np.allclose(
            model.conv_offset.bias.grad.detach().cpu().numpy() / batch_size,
            gt_offset_bias_grad, threshold)
        assert np.allclose(
            model.weight.grad.detach().cpu().numpy() / batch_size,
            gt_deform_weight_grad, threshold)

        from mmcv.ops import DeformConv2d

        # test bias
        model = DeformConv2d(1, 1, 2, stride=1, padding=0)
        assert not hasattr(model, 'bias')
        # test bias=True
        with pytest.raises(AssertionError):
            model = DeformConv2d(1, 1, 2, stride=1, padding=0, bias=True)
        # test in_channels % group != 0
        with pytest.raises(AssertionError):
            model = DeformConv2d(3, 2, 3, groups=2)
        # test out_channels % group != 0
        with pytest.raises(AssertionError):
            model = DeformConv2d(3, 4, 3, groups=3)

    def test_deformconv(self):
        self._test_deformconv(torch.double, device='cpu')
        self._test_deformconv(torch.float, device='cpu', threshold=1e-1)
        self._test_deformconv(torch.double)
        self._test_deformconv(torch.float)
        self._test_deformconv(torch.half, threshold=1e-1)
        # test batch_size < im2col_step
        self._test_deformconv(torch.float, batch_size=1, im2col_step=2)
        # test bach_size % im2col_step != 0
        with pytest.raises(
                AssertionError,
                match='batch size must be divisible by im2col_step'):
            self._test_deformconv(torch.float, batch_size=10, im2col_step=3)

        # test amp when torch version >= '1.6.0', the type of
        # input data for deformconv might be torch.float or torch.half
        if (TORCH_VERSION != 'parrots'
                and digit_version(TORCH_VERSION) >= digit_version('1.6.0')):
            with autocast(enabled=True):
                self._test_amp_deformconv(torch.float, 1e-1)
                self._test_amp_deformconv(torch.half, 1e-1)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_deform_roi_pool.py
================================================
import os

import numpy as np
import torch

_USING_PARROTS = True
try:
    from parrots.autograd import gradcheck
except ImportError:
    from torch.autograd import gradcheck
    _USING_PARROTS = False

cur_dir = os.path.dirname(os.path.abspath(__file__))

inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
          ([[[[1., 2.], [3., 4.]], [[4., 3.], [2.,
                                               1.]]]], [[0., 0., 0., 1., 1.]]),
          ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
              [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])]
outputs = [([[[[1, 1.25], [1.5, 1.75]]]], [[[[3.0625, 0.4375],
                                             [0.4375, 0.0625]]]]),
           ([[[[1., 1.25], [1.5, 1.75]], [[4, 3.75],
                                          [3.5, 3.25]]]], [[[[3.0625, 0.4375],
                                                             [0.4375, 0.0625]],
                                                            [[3.0625, 0.4375],
                                                             [0.4375,
                                                              0.0625]]]]),
           ([[[[1.9375, 4.75],
               [7.5625,
                10.375]]]], [[[[0.47265625, 0.4296875, 0.4296875, 0.04296875],
                               [0.4296875, 0.390625, 0.390625, 0.0390625],
                               [0.4296875, 0.390625, 0.390625, 0.0390625],
                               [0.04296875, 0.0390625, 0.0390625,
                                0.00390625]]]])]


class TestDeformRoIPool(object):

    def test_deform_roi_pool_gradcheck(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import DeformRoIPoolPack
        pool_h = 2
        pool_w = 2
        spatial_scale = 1.0
        sampling_ratio = 2

        for case in inputs:
            np_input = np.array(case[0])
            np_rois = np.array(case[1])

            x = torch.tensor(
                np_input, device='cuda', dtype=torch.float, requires_grad=True)
            rois = torch.tensor(np_rois, device='cuda', dtype=torch.float)
            output_c = x.size(1)

            droipool = DeformRoIPoolPack((pool_h, pool_w),
                                         output_c,
                                         spatial_scale=spatial_scale,
                                         sampling_ratio=sampling_ratio).cuda()

            if _USING_PARROTS:
                gradcheck(droipool, (x, rois), no_grads=[rois])
            else:
                gradcheck(droipool, (x, rois), eps=1e-2, atol=1e-2)

    def test_modulated_deform_roi_pool_gradcheck(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import ModulatedDeformRoIPoolPack
        pool_h = 2
        pool_w = 2
        spatial_scale = 1.0
        sampling_ratio = 2

        for case in inputs:
            np_input = np.array(case[0])
            np_rois = np.array(case[1])

            x = torch.tensor(
                np_input, device='cuda', dtype=torch.float, requires_grad=True)
            rois = torch.tensor(np_rois, device='cuda', dtype=torch.float)
            output_c = x.size(1)

            droipool = ModulatedDeformRoIPoolPack(
                (pool_h, pool_w),
                output_c,
                spatial_scale=spatial_scale,
                sampling_ratio=sampling_ratio).cuda()

            if _USING_PARROTS:
                gradcheck(droipool, (x, rois), no_grads=[rois])
            else:
                gradcheck(droipool, (x, rois), eps=1e-2, atol=1e-2)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_focal_loss.py
================================================
import numpy as np
import torch

_USING_PARROTS = True
try:
    from parrots.autograd import gradcheck
except ImportError:
    from torch.autograd import gradcheck
    _USING_PARROTS = False

# torch.set_printoptions(precision=8, threshold=100)

inputs = [
    ([[1., 0], [0, 1.]], [0, 1]),
    ([[1., 0, -1.], [0, 1., 2.]], [2, 1]),
    ([[1e-6, 2e-6, 3e-6], [4e-6, 5e-5, 6e-4], [7e-3, 8e-2, 9e-1]], [1, 2, 0]),
]

softmax_outputs = [(0.00566451, [[-0.00657264, 0.00657264],
                                 [0.00657264, -0.00657264]]),
                   (0.34956908, [[0.10165970, 0.03739851, -0.13905823],
                                 [0.01227554, -0.10298023, 0.09070466]]),
                   (0.15754992, [[0.02590877, -0.05181759, 0.02590882],
                                 [0.02589641, 0.02589760, -0.05179400],
                                 [-0.07307514, 0.02234372, 0.05073142]])]

sigmoid_outputs = [(0.13562961, [[-0.00657264, 0.11185755],
                                 [0.11185755, -0.00657264]]),
                   (1.10251057, [[0.28808805, 0.11185755, -0.09602935],
                                 [0.11185755, -0.00657264, 0.40376765]]),
                   (0.42287254, [[0.07457182, -0.02485716, 0.07457201],
                                 [0.07457211, 0.07457669, -0.02483728],
                                 [-0.02462499, 0.08277918, 0.18050370]])]


class Testfocalloss(object):

    def _test_softmax(self, dtype=torch.float):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import softmax_focal_loss
        alpha = 0.25
        gamma = 2.0
        for case, output in zip(inputs, softmax_outputs):
            np_x = np.array(case[0])
            np_y = np.array(case[1])
            np_x_grad = np.array(output[1])

            x = torch.from_numpy(np_x).cuda().type(dtype)
            x.requires_grad_()
            y = torch.from_numpy(np_y).cuda().long()

            loss = softmax_focal_loss(x, y, gamma, alpha, None, 'mean')
            loss.backward()

            assert np.allclose(loss.data.cpu().numpy(), output[0], 1e-2)
            assert np.allclose(x.grad.data.cpu(), np_x_grad, 1e-2)

    def _test_sigmoid(self, dtype=torch.float):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import sigmoid_focal_loss
        alpha = 0.25
        gamma = 2.0
        for case, output in zip(inputs, sigmoid_outputs):
            np_x = np.array(case[0])
            np_y = np.array(case[1])
            np_x_grad = np.array(output[1])

            x = torch.from_numpy(np_x).cuda().type(dtype)
            x.requires_grad_()
            y = torch.from_numpy(np_y).cuda().long()

            loss = sigmoid_focal_loss(x, y, gamma, alpha, None, 'mean')
            loss.backward()

            assert np.allclose(loss.data.cpu().numpy(), output[0], 1e-2)
            assert np.allclose(x.grad.data.cpu(), np_x_grad, 1e-2)

    def _test_grad_softmax(self, dtype=torch.float):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import SoftmaxFocalLoss
        alpha = 0.25
        gamma = 2.0
        for case in inputs:
            np_x = np.array(case[0])
            np_y = np.array(case[1])

            x = torch.from_numpy(np_x).cuda().type(dtype)
            x.requires_grad_()
            y = torch.from_numpy(np_y).cuda().long()

            floss = SoftmaxFocalLoss(gamma, alpha)
            if _USING_PARROTS:
                # gradcheck(floss, (x, y),
                #           no_grads=[y])
                pass
            else:
                gradcheck(floss, (x, y), eps=1e-2, atol=1e-2)

    def _test_grad_sigmoid(self, dtype=torch.float):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import SigmoidFocalLoss
        alpha = 0.25
        gamma = 2.0
        for case in inputs:
            np_x = np.array(case[0])
            np_y = np.array(case[1])

            x = torch.from_numpy(np_x).cuda().type(dtype)
            x.requires_grad_()
            y = torch.from_numpy(np_y).cuda().long()

            floss = SigmoidFocalLoss(gamma, alpha)
            if _USING_PARROTS:
                # gradcheck(floss, (x, y),
                #           no_grads=[y])
                pass
            else:
                gradcheck(floss, (x, y), eps=1e-2, atol=1e-2)

    def test_softmax_float(self):
        self._test_softmax(dtype=torch.float)

    def test_softmax_half(self):
        self._test_softmax(dtype=torch.half)

    def test_sigmoid_float(self):
        self._test_sigmoid(dtype=torch.float)

    def test_sigmoid_half(self):
        self._test_sigmoid(dtype=torch.half)

    def test_grad_softmax_float(self):
        self._test_grad_softmax(dtype=torch.float)

    def test_grad_sigmoid_float(self):
        self._test_grad_sigmoid(dtype=torch.float)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_furthest_point_sample.py
================================================
import pytest
import torch

from mmcv.ops import furthest_point_sample, furthest_point_sample_with_dist


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_fps():
    xyz = torch.tensor([[[-0.2748, 1.0020, -1.1674], [0.1015, 1.3952, -1.2681],
                         [-0.8070, 2.4137,
                          -0.5845], [-1.0001, 2.1982, -0.5859],
                         [0.3841, 1.8983, -0.7431]],
                        [[-1.0696, 3.0758,
                          -0.1899], [-0.2559, 3.5521, -0.1402],
                         [0.8164, 4.0081, -0.1839], [-1.1000, 3.0213, -0.8205],
                         [-0.0518, 3.7251, -0.3950]]]).cuda()

    idx = furthest_point_sample(xyz, 3)
    expected_idx = torch.tensor([[0, 2, 4], [0, 2, 1]]).cuda()
    assert torch.all(idx == expected_idx)


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_fps_with_dist():
    xyz = torch.tensor([[[-0.2748, 1.0020, -1.1674], [0.1015, 1.3952, -1.2681],
                         [-0.8070, 2.4137,
                          -0.5845], [-1.0001, 2.1982, -0.5859],
                         [0.3841, 1.8983, -0.7431]],
                        [[-1.0696, 3.0758,
                          -0.1899], [-0.2559, 3.5521, -0.1402],
                         [0.8164, 4.0081, -0.1839], [-1.1000, 3.0213, -0.8205],
                         [-0.0518, 3.7251, -0.3950]]]).cuda()

    expected_idx = torch.tensor([[0, 2, 4], [0, 2, 1]]).cuda()
    xyz_square_dist = ((xyz.unsqueeze(dim=1) -
                        xyz.unsqueeze(dim=2))**2).sum(-1)
    idx = furthest_point_sample_with_dist(xyz_square_dist, 3)
    assert torch.all(idx == expected_idx)

    import numpy as np
    fps_idx = np.load('tests/data/for_3d_ops/fps_idx.npy')
    features_for_fps_distance = np.load(
        'tests/data/for_3d_ops/features_for_fps_distance.npy')
    expected_idx = torch.from_numpy(fps_idx).cuda()
    features_for_fps_distance = torch.from_numpy(
        features_for_fps_distance).cuda()

    idx = furthest_point_sample_with_dist(features_for_fps_distance, 16)
    assert torch.all(idx == expected_idx)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_fused_bias_leakyrelu.py
================================================
import pytest
import torch

_USING_PARROTS = True
try:
    from parrots.autograd import gradcheck
except ImportError:
    from torch.autograd import gradcheck, gradgradcheck
    _USING_PARROTS = False


class TestFusedBiasLeakyReLU(object):

    @classmethod
    def setup_class(cls):
        if not torch.cuda.is_available():
            return
        cls.input_tensor = torch.randn((2, 2, 2, 2), requires_grad=True).cuda()
        cls.bias = torch.zeros(2, requires_grad=True).cuda()

    @pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda')
    def test_gradient(self):

        from mmcv.ops import FusedBiasLeakyReLU
        if _USING_PARROTS:
            gradcheck(
                FusedBiasLeakyReLU(2).cuda(),
                self.input_tensor,
                delta=1e-4,
                pt_atol=1e-3)
        else:
            gradcheck(
                FusedBiasLeakyReLU(2).cuda(),
                self.input_tensor,
                eps=1e-4,
                atol=1e-3)

    @pytest.mark.skipif(
        not torch.cuda.is_available() or _USING_PARROTS,
        reason='requires cuda')
    def test_gradgradient(self):

        from mmcv.ops import FusedBiasLeakyReLU
        gradgradcheck(
            FusedBiasLeakyReLU(2).cuda(),
            self.input_tensor,
            eps=1e-4,
            atol=1e-3)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_gather_points.py
================================================
import pytest
import torch

from mmcv.ops import gather_points


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_gather_points():
    features = torch.tensor([[[
        -1.6095, -0.1029, -0.8876, -1.2447, -2.4031, 0.3708, -1.1586, -1.4967,
        -0.4800, 0.2252
    ],
                              [
                                  1.9138, 3.4979, 1.6854, 1.5631, 3.6776,
                                  3.1154, 2.1705, 2.5221, 2.0411, 3.1446
                              ],
                              [
                                  -1.4173, 0.3073, -1.4339, -1.4340, -1.2770,
                                  -0.2867, -1.4162, -1.4044, -1.4245, -1.4074
                              ]],
                             [[
                                 0.2160, 0.0842, 0.3661, -0.2749, -0.4909,
                                 -0.6066, -0.8773, -0.0745, -0.9496, 0.1434
                             ],
                              [
                                  1.3644, 1.8087, 1.6855, 1.9563, 1.2746,
                                  1.9662, 0.9566, 1.8778, 1.1437, 1.3639
                              ],
                              [
                                  -0.7172, 0.1692, 0.2241, 0.0721, -0.7540,
                                  0.0462, -0.6227, 0.3223, -0.6944, -0.5294
                              ]]]).cuda()

    idx = torch.tensor([[0, 1, 4, 0, 0, 0], [0, 5, 6, 0, 0, 0]]).int().cuda()

    output = gather_points(features, idx)
    expected_output = torch.tensor(
        [[[-1.6095, -0.1029, -2.4031, -1.6095, -1.6095, -1.6095],
          [1.9138, 3.4979, 3.6776, 1.9138, 1.9138, 1.9138],
          [-1.4173, 0.3073, -1.2770, -1.4173, -1.4173, -1.4173]],
         [[0.2160, -0.6066, -0.8773, 0.2160, 0.2160, 0.2160],
          [1.3644, 1.9662, 0.9566, 1.3644, 1.3644, 1.3644],
          [-0.7172, 0.0462, -0.6227, -0.7172, -0.7172, -0.7172]]]).cuda()

    assert torch.allclose(output, expected_output)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_group_points.py
================================================
import pytest
import torch

from mmcv.ops import grouping_operation


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_grouping_points():
    idx = torch.tensor([[[0, 0, 0], [3, 3, 3], [8, 8, 8], [0, 0, 0], [0, 0, 0],
                         [0, 0, 0]],
                        [[0, 0, 0], [6, 6, 6], [9, 9, 9], [0, 0, 0], [0, 0, 0],
                         [0, 0, 0]]]).int().cuda()
    festures = torch.tensor([[[
        0.5798, -0.7981, -0.9280, -1.3311, 1.3687, 0.9277, -0.4164, -1.8274,
        0.9268, 0.8414
    ],
                              [
                                  5.4247, 1.5113, 2.3944, 1.4740, 5.0300,
                                  5.1030, 1.9360, 2.1939, 2.1581, 3.4666
                              ],
                              [
                                  -1.6266, -1.0281, -1.0393, -1.6931, -1.3982,
                                  -0.5732, -1.0830, -1.7561, -1.6786, -1.6967
                              ]],
                             [[
                                 -0.0380, -0.1880, -1.5724, 0.6905, -0.3190,
                                 0.7798, -0.3693, -0.9457, -0.2942, -1.8527
                             ],
                              [
                                  1.1773, 1.5009, 2.6399, 5.9242, 1.0962,
                                  2.7346, 6.0865, 1.5555, 4.3303, 2.8229
                              ],
                              [
                                  -0.6646, -0.6870, -0.1125, -0.2224, -0.3445,
                                  -1.4049, 0.4990, -0.7037, -0.9924, 0.0386
                              ]]]).cuda()

    output = grouping_operation(festures, idx)
    expected_output = torch.tensor([[[[0.5798, 0.5798, 0.5798],
                                      [-1.3311, -1.3311, -1.3311],
                                      [0.9268, 0.9268, 0.9268],
                                      [0.5798, 0.5798, 0.5798],
                                      [0.5798, 0.5798, 0.5798],
                                      [0.5798, 0.5798, 0.5798]],
                                     [[5.4247, 5.4247, 5.4247],
                                      [1.4740, 1.4740, 1.4740],
                                      [2.1581, 2.1581, 2.1581],
                                      [5.4247, 5.4247, 5.4247],
                                      [5.4247, 5.4247, 5.4247],
                                      [5.4247, 5.4247, 5.4247]],
                                     [[-1.6266, -1.6266, -1.6266],
                                      [-1.6931, -1.6931, -1.6931],
                                      [-1.6786, -1.6786, -1.6786],
                                      [-1.6266, -1.6266, -1.6266],
                                      [-1.6266, -1.6266, -1.6266],
                                      [-1.6266, -1.6266, -1.6266]]],
                                    [[[-0.0380, -0.0380, -0.0380],
                                      [-0.3693, -0.3693, -0.3693],
                                      [-1.8527, -1.8527, -1.8527],
                                      [-0.0380, -0.0380, -0.0380],
                                      [-0.0380, -0.0380, -0.0380],
                                      [-0.0380, -0.0380, -0.0380]],
                                     [[1.1773, 1.1773, 1.1773],
                                      [6.0865, 6.0865, 6.0865],
                                      [2.8229, 2.8229, 2.8229],
                                      [1.1773, 1.1773, 1.1773],
                                      [1.1773, 1.1773, 1.1773],
                                      [1.1773, 1.1773, 1.1773]],
                                     [[-0.6646, -0.6646, -0.6646],
                                      [0.4990, 0.4990, 0.4990],
                                      [0.0386, 0.0386, 0.0386],
                                      [-0.6646, -0.6646, -0.6646],
                                      [-0.6646, -0.6646, -0.6646],
                                      [-0.6646, -0.6646, -0.6646]]]]).cuda()
    assert torch.allclose(output, expected_output)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_info.py
================================================
import torch


class TestInfo(object):

    def test_info(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import get_compiler_version, get_compiling_cuda_version
        cv = get_compiler_version()
        ccv = get_compiling_cuda_version()
        assert cv is not None
        assert ccv is not None


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_iou3d.py
================================================
import numpy as np
import pytest
import torch

from mmcv.ops import boxes_iou_bev, nms_bev, nms_normal_bev


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_boxes_iou_bev():
    np_boxes1 = np.asarray(
        [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6],
         [7.0, 7.0, 8.0, 8.0, 0.4]],
        dtype=np.float32)
    np_boxes2 = np.asarray(
        [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5],
         [5.0, 5.0, 6.0, 7.0, 0.4]],
        dtype=np.float32)
    np_expect_ious = np.asarray(
        [[0.2621, 0.2948, 0.0000], [0.0549, 0.1587, 0.0000],
         [0.0000, 0.0000, 0.0000]],
        dtype=np.float32)

    boxes1 = torch.from_numpy(np_boxes1).cuda()
    boxes2 = torch.from_numpy(np_boxes2).cuda()

    ious = boxes_iou_bev(boxes1, boxes2)
    assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4)


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_nms_bev():
    np_boxes = np.array(
        [[6.0, 3.0, 8.0, 7.0, 2.0], [3.0, 6.0, 9.0, 11.0, 1.0],
         [3.0, 7.0, 10.0, 12.0, 1.0], [1.0, 4.0, 13.0, 7.0, 3.0]],
        dtype=np.float32)
    np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)
    np_inds = np.array([1, 0, 3])
    boxes = torch.from_numpy(np_boxes)
    scores = torch.from_numpy(np_scores)
    inds = nms_bev(boxes.cuda(), scores.cuda(), thresh=0.3)

    assert np.allclose(inds.cpu().numpy(), np_inds)


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_nms_normal_bev():
    np_boxes = np.array(
        [[6.0, 3.0, 8.0, 7.0, 2.0], [3.0, 6.0, 9.0, 11.0, 1.0],
         [3.0, 7.0, 10.0, 12.0, 1.0], [1.0, 4.0, 13.0, 7.0, 3.0]],
        dtype=np.float32)
    np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)
    np_inds = np.array([1, 0, 3])
    boxes = torch.from_numpy(np_boxes)
    scores = torch.from_numpy(np_scores)
    inds = nms_normal_bev(boxes.cuda(), scores.cuda(), thresh=0.3)

    assert np.allclose(inds.cpu().numpy(), np_inds)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_knn.py
================================================
import pytest
import torch

from mmcv.ops import knn


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_knn():
    new_xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625],
                             [-2.2769, 2.7817, -0.2334],
                             [-0.4003, 2.4666, -0.5116],
                             [-0.0740, 1.3147, -1.3625],
                             [-0.0740, 1.3147, -1.3625]],
                            [[-2.0289, 2.4952, -0.1708],
                             [-2.0668, 6.0278, -0.4875],
                             [0.4066, 1.4211, -0.2947],
                             [-2.0289, 2.4952, -0.1708],
                             [-2.0289, 2.4952, -0.1708]]]).cuda()

    xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634],
                         [-0.4003, 2.4666,
                          -0.5116], [-0.5251, 2.4379, -0.8466],
                         [-0.9691, 1.1418,
                          -1.3733], [-0.2232, 0.9561, -1.3626],
                         [-2.2769, 2.7817, -0.2334],
                         [-0.2822, 1.3192, -1.3645], [0.1533, 1.5024, -1.0432],
                         [0.4917, 1.1529, -1.3496]],
                        [[-2.0289, 2.4952,
                          -0.1708], [-0.7188, 0.9956, -0.5096],
                         [-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610],
                         [0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791],
                         [-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947],
                         [0.3220, 1.4447, 0.3548], [-0.9744, 2.3856,
                                                    -1.2000]]]).cuda()

    idx = knn(5, xyz, new_xyz)
    new_xyz_ = new_xyz.unsqueeze(2).repeat(1, 1, xyz.shape[1], 1)
    xyz_ = xyz.unsqueeze(1).repeat(1, new_xyz.shape[1], 1, 1)
    dist = ((new_xyz_ - xyz_) * (new_xyz_ - xyz_)).sum(-1)
    expected_idx = dist.topk(k=5, dim=2, largest=False)[1].transpose(2, 1)
    assert torch.all(idx == expected_idx)

    idx = knn(5,
              xyz.transpose(1, 2).contiguous(),
              new_xyz.transpose(1, 2).contiguous(), True)
    assert torch.all(idx == expected_idx)

    idx = knn(5, xyz, xyz)
    xyz_ = xyz.unsqueeze(2).repeat(1, 1, xyz.shape[1], 1)
    xyz__ = xyz.unsqueeze(1).repeat(1, xyz.shape[1], 1, 1)
    dist = ((xyz_ - xyz__) * (xyz_ - xyz__)).sum(-1)
    expected_idx = dist.topk(k=5, dim=2, largest=False)[1].transpose(2, 1)
    assert torch.all(idx == expected_idx)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_masked_conv2d.py
================================================
import torch


class TestMaskedConv2d(object):

    def test_masked_conv2d(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import MaskedConv2d
        input = torch.randn(1, 3, 16, 16, requires_grad=True, device='cuda')
        mask = torch.randn(1, 16, 16, requires_grad=True, device='cuda')
        conv = MaskedConv2d(3, 3, 3).cuda()
        output = conv(input, mask)
        assert output is not None


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_merge_cells.py
================================================
"""
CommandLine:
    pytest tests/test_merge_cells.py
"""
import torch
import torch.nn.functional as F

from mmcv.ops.merge_cells import (BaseMergeCell, ConcatCell, GlobalPoolingCell,
                                  SumCell)


def test_sum_cell():
    inputs_x = torch.randn([2, 256, 32, 32])
    inputs_y = torch.randn([2, 256, 16, 16])
    sum_cell = SumCell(256, 256)
    output = sum_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:])
    assert output.size() == inputs_x.size()
    output = sum_cell(inputs_x, inputs_y, out_size=inputs_y.shape[-2:])
    assert output.size() == inputs_y.size()
    output = sum_cell(inputs_x, inputs_y)
    assert output.size() == inputs_x.size()


def test_concat_cell():
    inputs_x = torch.randn([2, 256, 32, 32])
    inputs_y = torch.randn([2, 256, 16, 16])
    concat_cell = ConcatCell(256, 256)
    output = concat_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:])
    assert output.size() == inputs_x.size()
    output = concat_cell(inputs_x, inputs_y, out_size=inputs_y.shape[-2:])
    assert output.size() == inputs_y.size()
    output = concat_cell(inputs_x, inputs_y)
    assert output.size() == inputs_x.size()


def test_global_pool_cell():
    inputs_x = torch.randn([2, 256, 32, 32])
    inputs_y = torch.randn([2, 256, 32, 32])
    gp_cell = GlobalPoolingCell(with_out_conv=False)
    gp_cell_out = gp_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:])
    assert (gp_cell_out.size() == inputs_x.size())
    gp_cell = GlobalPoolingCell(256, 256)
    gp_cell_out = gp_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:])
    assert (gp_cell_out.size() == inputs_x.size())


def test_resize_methods():
    inputs_x = torch.randn([2, 256, 128, 128])
    target_resize_sizes = [(128, 128), (256, 256)]
    resize_methods_list = ['nearest', 'bilinear']

    for method in resize_methods_list:
        merge_cell = BaseMergeCell(upsample_mode=method)
        for target_size in target_resize_sizes:
            merge_cell_out = merge_cell._resize(inputs_x, target_size)
            gt_out = F.interpolate(inputs_x, size=target_size, mode=method)
            assert merge_cell_out.equal(gt_out)

    target_size = (64, 64)  # resize to a smaller size
    merge_cell = BaseMergeCell()
    merge_cell_out = merge_cell._resize(inputs_x, target_size)
    kernel_size = inputs_x.shape[-1] // target_size[-1]
    gt_out = F.max_pool2d(
        inputs_x, kernel_size=kernel_size, stride=kernel_size)
    assert (merge_cell_out == gt_out).all()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_min_area_polygons.py
================================================
import numpy as np
import pytest
import torch

from mmcv.ops import min_area_polygons

np_pointsets = np.asarray([[
    1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 1.0, 2.0, 3.0, 3.0,
    2.0, 1.5, 1.5
],
                           [
                               1.0, 1.0, 8.0, 8.0, 1.0, 2.0, 2.0, 1.0, 1.0,
                               3.0, 3.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.5, 1.5
                           ]])

expected_polygons = np.asarray(
    [[3.0000, 1.0000, 1.0000, 1.0000, 1.0000, 3.0000, 3.0000, 3.0000],
     [8.0, 8.0, 2.3243, 0.0541, 0.0541, 1.6757, 5.7297, 9.6216]])


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_min_area_polygons():
    pointsets = torch.from_numpy(np_pointsets).cuda().float()

    assert np.allclose(
        min_area_polygons(pointsets).cpu().numpy(),
        expected_polygons,
        atol=1e-4)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_modulated_deform_conv.py
================================================
import os

import numpy
import pytest
import torch

from mmcv.utils import TORCH_VERSION, digit_version

try:
    # If PyTorch version >= 1.6.0 and fp16 is enabled, torch.cuda.amp.autocast
    # would be imported and used; we should test if our modules support it.
    from torch.cuda.amp import autocast
except ImportError:
    pass

cur_dir = os.path.dirname(os.path.abspath(__file__))

input_t = [[[[1., 2., 3.], [1., 2., 3.], [1., 2., 3.]]]]
output_t = [[[[0.5, 1.5, 2.5, 1.5], [1.0, 3.0, 5.0, 3.0], [1.0, 3.0, 5.0, 3.0],
              [0.5, 1.5, 2.5, 1.5]]]]
input_grad = [[[[2., 2., 2.], [2., 2., 2.], [2., 2., 2.]]]]
dcn_w_grad = [[[[9., 9.], [9., 9.]]]]
dcn_offset_w_grad = [[[[-7.0, -4.0], [0.0, 0.0]]], [[[-9.0, 7.5], [-6.0,
                                                                   5.0]]],
                     [[[-4.0, -7.0], [0.0, 0.0]]],
                     [[[-7.5, -9.0], [-5.0, -6.0]]],
                     [[[-7.0, -4.0], [-7.0, -4.0]]],
                     [[[-6.0, 5.0], [-9.0, 7.5]]],
                     [[[-4.0, -7.0], [-4.0, -7.0]]],
                     [[[-5.0, -6.0], [-7.5, -9.0]]], [[[10.5, 6.0], [7.0,
                                                                     4.0]]],
                     [[[6.0, 10.5], [4.0, 7.0]]], [[[7.0, 4.0], [10.5, 6.0]]],
                     [[[4.0, 7.0], [6.0, 10.5]]]]
dcn_offset_b_grad = [
    -3.0, -1.5, -3.0, -1.5, -3.0, -1.5, -3.0, -1.5, 4.5, 4.5, 4.5, 4.5
]


class TestMdconv(object):

    def _test_mdconv(self, dtype=torch.float, device='cuda'):
        if not torch.cuda.is_available() and device == 'cuda':
            pytest.skip('test requires GPU')
        from mmcv.ops import ModulatedDeformConv2dPack
        input = torch.tensor(input_t, dtype=dtype, device=device)
        input.requires_grad = True

        dcn = ModulatedDeformConv2dPack(
            1,
            1,
            kernel_size=(2, 2),
            stride=1,
            padding=1,
            deform_groups=1,
            bias=False)

        if device == 'cuda':
            dcn.cuda()

        dcn.weight.data.fill_(1.)
        dcn.type(dtype)
        output = dcn(input)
        output.sum().backward()
        assert numpy.allclose(output.cpu().detach().numpy(), output_t, 1e-2)
        assert numpy.allclose(input.grad.cpu().detach().numpy(), input_grad,
                              1e-2)
        assert numpy.allclose(dcn.weight.grad.cpu().detach().numpy(),
                              dcn_w_grad, 1e-2)
        assert numpy.allclose(
            dcn.conv_offset.weight.grad.cpu().detach().numpy(),
            dcn_offset_w_grad, 1e-2)
        assert numpy.allclose(dcn.conv_offset.bias.grad.cpu().detach().numpy(),
                              dcn_offset_b_grad, 1e-2)

    def _test_amp_mdconv(self, input_dtype=torch.float):
        """The function to test amp released on pytorch 1.6.0.

        The type of input data might be torch.float or torch.half,
        so we should test mdconv in both cases. With amp, the data
        type of model will NOT be set manually.

        Args:
            input_dtype: torch.float or torch.half.
        """
        if not torch.cuda.is_available():
            return
        from mmcv.ops import ModulatedDeformConv2dPack
        input = torch.tensor(input_t).cuda().type(input_dtype)
        input.requires_grad = True

        dcn = ModulatedDeformConv2dPack(
            1,
            1,
            kernel_size=(2, 2),
            stride=1,
            padding=1,
            deform_groups=1,
            bias=False).cuda()
        dcn.weight.data.fill_(1.)
        output = dcn(input)
        output.sum().backward()
        assert numpy.allclose(output.cpu().detach().numpy(), output_t, 1e-2)
        assert numpy.allclose(input.grad.cpu().detach().numpy(), input_grad,
                              1e-2)
        assert numpy.allclose(dcn.weight.grad.cpu().detach().numpy(),
                              dcn_w_grad, 1e-2)
        assert numpy.allclose(
            dcn.conv_offset.weight.grad.cpu().detach().numpy(),
            dcn_offset_w_grad, 1e-2)
        assert numpy.allclose(dcn.conv_offset.bias.grad.cpu().detach().numpy(),
                              dcn_offset_b_grad, 1e-2)

    def test_mdconv(self):
        self._test_mdconv(torch.double, device='cpu')
        self._test_mdconv(torch.float, device='cpu')
        self._test_mdconv(torch.double)
        self._test_mdconv(torch.float)
        self._test_mdconv(torch.half)

        # test amp when torch version >= '1.6.0', the type of
        # input data for mdconv might be torch.float or torch.half
        if (TORCH_VERSION != 'parrots'
                and digit_version(TORCH_VERSION) >= digit_version('1.6.0')):
            with autocast(enabled=True):
                self._test_amp_mdconv(torch.float)
                self._test_amp_mdconv(torch.half)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_ms_deformable_attn.py
================================================
import pytest
import torch

from mmcv.ops.multi_scale_deform_attn import (
    MultiScaleDeformableAttention, MultiScaleDeformableAttnFunction,
    multi_scale_deformable_attn_pytorch)

_USING_PARROTS = True
try:
    from parrots.autograd import gradcheck
except ImportError:
    from torch.autograd import gradcheck
    _USING_PARROTS = False


@pytest.mark.parametrize('device_type', [
    'cpu',
    pytest.param(
        'cuda:0',
        marks=pytest.mark.skipif(
            not torch.cuda.is_available(), reason='requires CUDA support'))
])
def test_multiscale_deformable_attention(device_type):

    with pytest.raises(ValueError):
        # embed_dims must be divisible by num_heads,
        MultiScaleDeformableAttention(
            embed_dims=256,
            num_heads=7,
        )
    device = torch.device(device_type)
    msda = MultiScaleDeformableAttention(
        embed_dims=3, num_levels=2, num_heads=3)
    msda.init_weights()
    num_query = 5
    bs = 1
    embed_dims = 3
    query = torch.rand(num_query, bs, embed_dims).to(device)
    key = torch.rand(num_query, bs, embed_dims).to(device)
    spatial_shapes = torch.Tensor([[2, 2], [1, 1]]).long().to(device)
    level_start_index = torch.Tensor([0, 4]).long().to(device)
    reference_points = torch.rand(bs, num_query, 2, 2).to(device)
    msda.to(device)
    msda(
        query,
        key,
        key,
        reference_points=reference_points,
        spatial_shapes=spatial_shapes,
        level_start_index=level_start_index)


def test_forward_multi_scale_deformable_attn_pytorch():
    N, M, D = 1, 2, 2
    Lq, L, P = 2, 2, 2
    shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long)
    S = sum([(H * W).item() for H, W in shapes])

    torch.manual_seed(3)
    value = torch.rand(N, S, M, D) * 0.01
    sampling_locations = torch.rand(N, Lq, M, L, P, 2)
    attention_weights = torch.rand(N, Lq, M, L, P) + 1e-5
    attention_weights /= attention_weights.sum(
        -1, keepdim=True).sum(
            -2, keepdim=True)

    multi_scale_deformable_attn_pytorch(value.double(), shapes,
                                        sampling_locations.double(),
                                        attention_weights.double()).detach()


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_forward_equal_with_pytorch_double():
    N, M, D = 1, 2, 2
    Lq, L, P = 2, 2, 2
    shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
    level_start_index = torch.cat((shapes.new_zeros(
        (1, )), shapes.prod(1).cumsum(0)[:-1]))
    S = sum([(H * W).item() for H, W in shapes])

    torch.manual_seed(3)
    value = torch.rand(N, S, M, D).cuda() * 0.01
    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
    attention_weights /= attention_weights.sum(
        -1, keepdim=True).sum(
            -2, keepdim=True)
    im2col_step = 2
    output_pytorch = multi_scale_deformable_attn_pytorch(
        value.double(), shapes, sampling_locations.double(),
        attention_weights.double()).detach().cpu()

    output_cuda = MultiScaleDeformableAttnFunction.apply(
        value.double(), shapes, level_start_index, sampling_locations.double(),
        attention_weights.double(), im2col_step).detach().cpu()
    assert torch.allclose(output_cuda, output_pytorch)
    max_abs_err = (output_cuda - output_pytorch).abs().max()
    max_rel_err = ((output_cuda - output_pytorch).abs() /
                   output_pytorch.abs()).max()
    assert max_abs_err < 1e-18
    assert max_rel_err < 1e-15


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_forward_equal_with_pytorch_float():
    N, M, D = 1, 2, 2
    Lq, L, P = 2, 2, 2
    shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
    level_start_index = torch.cat((shapes.new_zeros(
        (1, )), shapes.prod(1).cumsum(0)[:-1]))
    S = sum([(H * W).item() for H, W in shapes])

    torch.manual_seed(3)
    value = torch.rand(N, S, M, D).cuda() * 0.01
    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
    attention_weights /= attention_weights.sum(
        -1, keepdim=True).sum(
            -2, keepdim=True)
    im2col_step = 2
    output_pytorch = multi_scale_deformable_attn_pytorch(
        value, shapes, sampling_locations, attention_weights).detach().cpu()

    output_cuda = MultiScaleDeformableAttnFunction.apply(
        value, shapes, level_start_index, sampling_locations,
        attention_weights, im2col_step).detach().cpu()
    assert torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)
    max_abs_err = (output_cuda - output_pytorch).abs().max()
    max_rel_err = ((output_cuda - output_pytorch).abs() /
                   output_pytorch.abs()).max()
    assert max_abs_err < 1e-9
    assert max_rel_err < 1e-6


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
@pytest.mark.parametrize('channels', [
    4,
    30,
    32,
    64,
    71,
    1025,
])
def test_gradient_numerical(channels,
                            grad_value=True,
                            grad_sampling_loc=True,
                            grad_attn_weight=True):

    N, M, _ = 1, 2, 2
    Lq, L, P = 2, 2, 2
    shapes = torch.as_tensor([(3, 2), (2, 1)], dtype=torch.long).cuda()
    level_start_index = torch.cat((shapes.new_zeros(
        (1, )), shapes.prod(1).cumsum(0)[:-1]))
    S = sum([(H * W).item() for H, W in shapes])

    value = torch.rand(N, S, M, channels).cuda() * 0.01
    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
    attention_weights /= attention_weights.sum(
        -1, keepdim=True).sum(
            -2, keepdim=True)
    im2col_step = 2

    func = MultiScaleDeformableAttnFunction.apply

    value.requires_grad = grad_value
    sampling_locations.requires_grad = grad_sampling_loc
    attention_weights.requires_grad = grad_attn_weight
    if _USING_PARROTS:
        assert gradcheck(
            func, (value.double(), shapes, level_start_index,
                   sampling_locations.double(), attention_weights.double(),
                   im2col_step),
            no_grads=[shapes, level_start_index])
    else:
        assert gradcheck(func, (value.double(), shapes, level_start_index,
                                sampling_locations.double(),
                                attention_weights.double(), im2col_step))


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_nms.py
================================================
import numpy as np
import pytest
import torch


class Testnms(object):

    def test_nms_allclose(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import nms
        np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0],
                             [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]],
                            dtype=np.float32)
        np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)
        np_inds = np.array([1, 0, 3])
        np_dets = np.array([[3.0, 6.0, 9.0, 11.0, 0.9],
                            [6.0, 3.0, 8.0, 7.0, 0.6],
                            [1.0, 4.0, 13.0, 7.0, 0.2]])
        boxes = torch.from_numpy(np_boxes)
        scores = torch.from_numpy(np_scores)
        dets, inds = nms(boxes, scores, iou_threshold=0.3, offset=0)
        assert np.allclose(dets, np_dets)  # test cpu
        assert np.allclose(inds, np_inds)  # test cpu
        dets, inds = nms(
            boxes.cuda(), scores.cuda(), iou_threshold=0.3, offset=0)
        assert np.allclose(dets.cpu().numpy(), np_dets)  # test gpu
        assert np.allclose(inds.cpu().numpy(), np_inds)  # test gpu

    def test_softnms_allclose(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import soft_nms
        np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0],
                             [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]],
                            dtype=np.float32)
        np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)

        np_output = {
            'linear': {
                'dets':
                np.array(
                    [[3., 6., 9., 11., 0.9], [6., 3., 8., 7., 0.6],
                     [3., 7., 10., 12., 0.29024392], [1., 4., 13., 7., 0.2]],
                    dtype=np.float32),
                'inds':
                np.array([1, 0, 2, 3], dtype=np.int64)
            },
            'gaussian': {
                'dets':
                np.array([[3., 6., 9., 11., 0.9], [6., 3., 8., 7., 0.59630775],
                          [3., 7., 10., 12., 0.35275510],
                          [1., 4., 13., 7., 0.18650459]],
                         dtype=np.float32),
                'inds':
                np.array([1, 0, 2, 3], dtype=np.int64)
            },
            'naive': {
                'dets':
                np.array([[3., 6., 9., 11., 0.9], [6., 3., 8., 7., 0.6],
                          [1., 4., 13., 7., 0.2]],
                         dtype=np.float32),
                'inds':
                np.array([1, 0, 3], dtype=np.int64)
            }
        }

        boxes = torch.from_numpy(np_boxes)
        scores = torch.from_numpy(np_scores)

        configs = [[0.3, 0.5, 0.01, 'linear'], [0.3, 0.5, 0.01, 'gaussian'],
                   [0.3, 0.5, 0.01, 'naive']]

        for iou, sig, mscore, m in configs:
            dets, inds = soft_nms(
                boxes,
                scores,
                iou_threshold=iou,
                sigma=sig,
                min_score=mscore,
                method=m)
            assert np.allclose(dets.cpu().numpy(), np_output[m]['dets'])
            assert np.allclose(inds.cpu().numpy(), np_output[m]['inds'])

        if torch.__version__ != 'parrots':
            boxes = boxes.cuda()
            scores = scores.cuda()
            for iou, sig, mscore, m in configs:
                dets, inds = soft_nms(
                    boxes,
                    scores,
                    iou_threshold=iou,
                    sigma=sig,
                    min_score=mscore,
                    method=m)
                assert np.allclose(dets.cpu().numpy(), np_output[m]['dets'])
                assert np.allclose(inds.cpu().numpy(), np_output[m]['inds'])

    def test_nms_match(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import nms, nms_match
        iou_thr = 0.6
        # empty input
        empty_dets = np.array([])
        assert len(nms_match(empty_dets, iou_thr)) == 0

        # non empty ndarray input
        np_dets = np.array(
            [[49.1, 32.4, 51.0, 35.9, 0.9], [49.3, 32.9, 51.0, 35.3, 0.9],
             [35.3, 11.5, 39.9, 14.5, 0.4], [35.2, 11.7, 39.7, 15.7, 0.3]],
            dtype=np.float32)
        np_groups = nms_match(np_dets, iou_thr)
        assert isinstance(np_groups[0], np.ndarray)
        assert len(np_groups) == 2
        tensor_dets = torch.from_numpy(np_dets)
        boxes = tensor_dets[:, :4]
        scores = tensor_dets[:, 4]
        nms_keep_inds = nms(boxes.contiguous(), scores.contiguous(),
                            iou_thr)[1]
        assert set([g[0].item()
                    for g in np_groups]) == set(nms_keep_inds.tolist())

        # non empty tensor input
        tensor_dets = torch.from_numpy(np_dets)
        tensor_groups = nms_match(tensor_dets, iou_thr)
        assert isinstance(tensor_groups[0], torch.Tensor)
        for i in range(len(tensor_groups)):
            assert np.equal(tensor_groups[i].numpy(), np_groups[i]).all()

        # input of wrong shape
        wrong_dets = np.zeros((2, 3))
        with pytest.raises(AssertionError):
            nms_match(wrong_dets, iou_thr)

    def test_batched_nms(self):
        import mmcv
        from mmcv.ops import batched_nms
        results = mmcv.load('./tests/data/batched_nms_data.pkl')

        nms_max_num = 100
        nms_cfg = dict(
            type='nms',
            iou_threshold=0.7,
            score_threshold=0.5,
            max_num=nms_max_num)
        boxes, keep = batched_nms(
            torch.from_numpy(results['boxes']),
            torch.from_numpy(results['scores']),
            torch.from_numpy(results['idxs']),
            nms_cfg,
            class_agnostic=False)

        nms_cfg.update(split_thr=100)
        seq_boxes, seq_keep = batched_nms(
            torch.from_numpy(results['boxes']),
            torch.from_numpy(results['scores']),
            torch.from_numpy(results['idxs']),
            nms_cfg,
            class_agnostic=False)

        assert torch.equal(keep, seq_keep)
        assert torch.equal(boxes, seq_boxes)
        assert torch.equal(keep,
                           torch.from_numpy(results['keep'][:nms_max_num]))

        nms_cfg = dict(type='soft_nms', iou_threshold=0.7)
        boxes, keep = batched_nms(
            torch.from_numpy(results['boxes']),
            torch.from_numpy(results['scores']),
            torch.from_numpy(results['idxs']),
            nms_cfg,
            class_agnostic=False)

        nms_cfg.update(split_thr=100)
        seq_boxes, seq_keep = batched_nms(
            torch.from_numpy(results['boxes']),
            torch.from_numpy(results['scores']),
            torch.from_numpy(results['idxs']),
            nms_cfg,
            class_agnostic=False)

        assert torch.equal(keep, seq_keep)
        assert torch.equal(boxes, seq_boxes)

        # test skip nms when `nms_cfg` is None
        seq_boxes, seq_keep = batched_nms(
            torch.from_numpy(results['boxes']),
            torch.from_numpy(results['scores']),
            torch.from_numpy(results['idxs']),
            None,
            class_agnostic=False)
        assert len(seq_keep) == len(results['boxes'])
        # assert score is descending order
        assert ((seq_boxes[:, -1][1:] - seq_boxes[:, -1][:-1]) < 0).all()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_nms_rotated.py
================================================
import numpy as np
import pytest
import torch


@pytest.mark.skipif(
    not torch.cuda.is_available(),
    reason='GPU is required to test NMSRotated op')
class TestNmsRotated:

    def test_ml_nms_rotated(self):
        from mmcv.ops import nms_rotated
        np_boxes = np.array(
            [[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8],
             [3.0, 7.0, 10.0, 12.0, 0.3, 0.5], [1.0, 4.0, 13.0, 7.0, 0.6, 0.9]
             ],
            dtype=np.float32)
        np_labels = np.array([1, 0, 1, 0], dtype=np.float32)

        np_expect_dets = np.array(
            [[1.0, 4.0, 13.0, 7.0, 0.6], [3.0, 6.0, 9.0, 11.0, 0.6],
             [6.0, 3.0, 8.0, 7.0, 0.5]],
            dtype=np.float32)
        np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64)

        boxes = torch.from_numpy(np_boxes).cuda()
        labels = torch.from_numpy(np_labels).cuda()

        # test cw angle definition
        dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5, labels)

        assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets)
        assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds)

        # test ccw angle definition
        boxes[..., -2] *= -1
        dets, keep_inds = nms_rotated(
            boxes[:, :5], boxes[:, -1], 0.5, labels, clockwise=False)
        dets[..., -2] *= -1
        assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets)
        assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds)

    def test_nms_rotated(self):
        from mmcv.ops import nms_rotated
        np_boxes = np.array(
            [[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8],
             [3.0, 7.0, 10.0, 12.0, 0.3, 0.5], [1.0, 4.0, 13.0, 7.0, 0.6, 0.9]
             ],
            dtype=np.float32)

        np_expect_dets = np.array(
            [[1.0, 4.0, 13.0, 7.0, 0.6], [3.0, 6.0, 9.0, 11.0, 0.6],
             [6.0, 3.0, 8.0, 7.0, 0.5]],
            dtype=np.float32)
        np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64)

        boxes = torch.from_numpy(np_boxes).cuda()

        # test cw angle definition
        dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5)
        assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets)
        assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds)

        # test ccw angle definition
        boxes[..., -2] *= -1
        dets, keep_inds = nms_rotated(
            boxes[:, :5], boxes[:, -1], 0.5, clockwise=False)
        dets[..., -2] *= -1
        assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets)
        assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_onnx.py
================================================
import os
import warnings
from functools import partial

import numpy as np
import onnx
import onnxruntime as rt
import pytest
import torch
import torch.nn as nn
import torch.nn.functional as F
from packaging import version

onnx_file = 'tmp.onnx'


@pytest.fixture(autouse=True)
def run_before_and_after_test():
    # clear onnx_file before test
    if os.path.exists(onnx_file):
        os.remove(onnx_file)

    yield

    # clear onnx_file after test
    if os.path.exists(onnx_file):
        os.remove(onnx_file)


class WrapFunction(nn.Module):

    def __init__(self, wrapped_function):
        super(WrapFunction, self).__init__()
        self.wrapped_function = wrapped_function

    def forward(self, *args, **kwargs):
        return self.wrapped_function(*args, **kwargs)


def process_grid_sample(func, input, grid, ort_custom_op_path=''):
    wrapped_model = WrapFunction(func).eval()

    input_names = ['input', 'grid']
    output_names = ['output']

    with torch.no_grad():
        torch.onnx.export(
            wrapped_model, (input, grid),
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=input_names,
            output_names=output_names,
            opset_version=11)

    onnx_model = onnx.load(onnx_file)

    session_options = rt.SessionOptions()
    if ort_custom_op_path:
        session_options.register_custom_ops_library(ort_custom_op_path)

    # get onnx output
    input_all = [node.name for node in onnx_model.graph.input]
    input_initializer = [node.name for node in onnx_model.graph.initializer]
    net_feed_input = list(set(input_all) - set(input_initializer))
    assert (len(net_feed_input) == 2)
    sess = rt.InferenceSession(onnx_file, session_options)
    ort_result = sess.run(None, {
        'input': input.detach().numpy(),
        'grid': grid.detach().numpy()
    })
    pytorch_results = wrapped_model(input.clone(), grid.clone())
    assert np.allclose(pytorch_results, ort_result, atol=1e-3)


@pytest.mark.parametrize('mode', ['bilinear', 'nearest'])
@pytest.mark.parametrize('padding_mode', ['zeros', 'border', 'reflection'])
@pytest.mark.parametrize('align_corners', [True, False])
def test_grid_sample(mode, padding_mode, align_corners):
    from mmcv.onnx.symbolic import register_extra_symbolics
    opset_version = 11
    register_extra_symbolics(opset_version)

    from mmcv.ops import get_onnxruntime_op_path
    ort_custom_op_path = get_onnxruntime_op_path()
    if not os.path.exists(ort_custom_op_path):
        pytest.skip('custom ops for onnxruntime are not compiled.')

    input = torch.rand(1, 1, 10, 10)
    grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
    grid = F.affine_grid(
        grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input)

    def func(input, grid):
        return F.grid_sample(
            input,
            grid,
            mode=mode,
            padding_mode=padding_mode,
            align_corners=align_corners)

    return process_grid_sample(func, input, grid, ort_custom_op_path)


@pytest.mark.parametrize('align_corners', [True, False])
def test_bilinear_grid_sample(align_corners):
    from mmcv.ops.point_sample import bilinear_grid_sample

    # only support pytorch >= 1.5.0
    if version.parse(torch.__version__) < version.parse('1.5.0'):
        pytest.skip('Only support PyTorch >= 1.5.0')

    input = torch.rand(1, 1, 10, 10)
    grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
    grid = F.affine_grid(
        grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input)

    def func(input, grid):
        return bilinear_grid_sample(input, grid, align_corners=align_corners)

    return process_grid_sample(func, input, grid)


def test_nms():
    if torch.__version__ == 'parrots':
        pytest.skip('onnx is not supported in parrots directly')
    from mmcv.ops import get_onnxruntime_op_path, nms
    np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0],
                         [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]],
                        dtype=np.float32)
    np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)
    boxes = torch.from_numpy(np_boxes)
    scores = torch.from_numpy(np_scores)

    nms = partial(
        nms, iou_threshold=0.3, offset=0, score_threshold=0, max_num=0)
    pytorch_dets, _ = nms(boxes, scores)
    pytorch_score = pytorch_dets[:, 4]

    wrapped_model = WrapFunction(nms)
    wrapped_model.cpu().eval()
    with torch.no_grad():
        torch.onnx.export(
            wrapped_model, (boxes, scores),
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=['boxes', 'scores'],
            opset_version=11)

    onnx_model = onnx.load(onnx_file)
    ort_custom_op_path = get_onnxruntime_op_path()
    session_options = rt.SessionOptions()
    if os.path.exists(ort_custom_op_path):
        session_options.register_custom_ops_library(ort_custom_op_path)

    # get onnx output
    input_all = [node.name for node in onnx_model.graph.input]
    input_initializer = [node.name for node in onnx_model.graph.initializer]
    net_feed_input = list(set(input_all) - set(input_initializer))
    assert (len(net_feed_input) == 2)
    sess = rt.InferenceSession(onnx_file, session_options)
    onnx_dets, _ = sess.run(None, {
        'scores': scores.detach().numpy(),
        'boxes': boxes.detach().numpy()
    })
    onnx_score = onnx_dets[:, 4]
    assert np.allclose(pytorch_score, onnx_score, atol=1e-3)


@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_softnms():
    if torch.__version__ == 'parrots':
        pytest.skip('onnx is not supported in parrots directly')
    from mmcv.ops import get_onnxruntime_op_path, soft_nms

    # only support pytorch >= 1.7.0
    if version.parse(torch.__version__) < version.parse('1.7.0'):
        warnings.warn('test_softnms should be ran with pytorch >= 1.7.0')
        return

    # only support onnxruntime >= 1.5.1
    assert version.parse(rt.__version__) >= version.parse(
        '1.5.1'), 'test_softnms should be ran with onnxruntime >= 1.5.1'

    ort_custom_op_path = get_onnxruntime_op_path()
    if not os.path.exists(ort_custom_op_path):
        pytest.skip('softnms for onnxruntime is not compiled.')

    np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0],
                         [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]],
                        dtype=np.float32)
    np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)

    boxes = torch.from_numpy(np_boxes)
    scores = torch.from_numpy(np_scores)

    configs = [[0.3, 0.5, 0.01, 'linear'], [0.3, 0.5, 0.01, 'gaussian'],
               [0.3, 0.5, 0.01, 'naive']]

    session_options = rt.SessionOptions()
    session_options.register_custom_ops_library(ort_custom_op_path)

    for _iou_threshold, _sigma, _min_score, _method in configs:
        pytorch_dets, pytorch_inds = soft_nms(
            boxes,
            scores,
            iou_threshold=_iou_threshold,
            sigma=_sigma,
            min_score=_min_score,
            method=_method)
        nms = partial(
            soft_nms,
            iou_threshold=_iou_threshold,
            sigma=_sigma,
            min_score=_min_score,
            method=_method)

        wrapped_model = WrapFunction(nms)
        wrapped_model.cpu().eval()
        with torch.no_grad():
            torch.onnx.export(
                wrapped_model, (boxes, scores),
                onnx_file,
                export_params=True,
                keep_initializers_as_inputs=True,
                input_names=['boxes', 'scores'],
                opset_version=11)
        onnx_model = onnx.load(onnx_file)

        # get onnx output
        input_all = [node.name for node in onnx_model.graph.input]
        input_initializer = [
            node.name for node in onnx_model.graph.initializer
        ]
        net_feed_input = list(set(input_all) - set(input_initializer))
        assert (len(net_feed_input) == 2)
        sess = rt.InferenceSession(onnx_file, session_options)
        onnx_dets, onnx_inds = sess.run(None, {
            'scores': scores.detach().numpy(),
            'boxes': boxes.detach().numpy()
        })

        assert np.allclose(pytorch_dets, onnx_dets, atol=1e-3)
        assert np.allclose(onnx_inds, onnx_inds, atol=1e-3)


def test_roialign():
    if torch.__version__ == 'parrots':
        pytest.skip('onnx is not supported in parrots directly')
    try:
        from mmcv.ops import get_onnxruntime_op_path, roi_align
    except (ImportError, ModuleNotFoundError):
        pytest.skip('roi_align op is not successfully compiled')

    ort_custom_op_path = get_onnxruntime_op_path()
    # roi align config
    pool_h = 2
    pool_w = 2
    spatial_scale = 1.0
    sampling_ratio = 2

    inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
              ([[[[1., 2.], [3., 4.]], [[4., 3.],
                                        [2., 1.]]]], [[0., 0., 0., 1., 1.]]),
              ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
                  [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])]

    def warpped_function(torch_input, torch_rois):
        return roi_align(torch_input, torch_rois, (pool_w, pool_h),
                         spatial_scale, sampling_ratio, 'avg', True)

    for case in inputs:
        np_input = np.array(case[0], dtype=np.float32)
        np_rois = np.array(case[1], dtype=np.float32)
        input = torch.from_numpy(np_input)
        rois = torch.from_numpy(np_rois)

        # compute pytorch_output
        with torch.no_grad():
            pytorch_output = roi_align(input, rois, (pool_w, pool_h),
                                       spatial_scale, sampling_ratio, 'avg',
                                       True)

        # export and load onnx model
        wrapped_model = WrapFunction(warpped_function)
        with torch.no_grad():
            torch.onnx.export(
                wrapped_model, (input, rois),
                onnx_file,
                export_params=True,
                keep_initializers_as_inputs=True,
                input_names=['input', 'rois'],
                opset_version=11)

        onnx_model = onnx.load(onnx_file)
        session_options = rt.SessionOptions()
        if os.path.exists(ort_custom_op_path):
            session_options.register_custom_ops_library(ort_custom_op_path)

        # compute onnx_output
        input_all = [node.name for node in onnx_model.graph.input]
        input_initializer = [
            node.name for node in onnx_model.graph.initializer
        ]
        net_feed_input = list(set(input_all) - set(input_initializer))
        assert (len(net_feed_input) == 2)
        sess = rt.InferenceSession(onnx_file, session_options)
        onnx_output = sess.run(None, {
            'input': input.detach().numpy(),
            'rois': rois.detach().numpy()
        })
        onnx_output = onnx_output[0]

        # allclose

        assert np.allclose(pytorch_output, onnx_output, atol=1e-3)


def test_roialign_rotated():
    if torch.__version__ == 'parrots':
        pytest.skip('onnx is not supported in parrots directly')
    try:
        from mmcv.ops import get_onnxruntime_op_path, roi_align_rotated
    except (ImportError, ModuleNotFoundError):
        pytest.skip('roi_align_aligned op is not successfully compiled')

    ort_custom_op_path = get_onnxruntime_op_path()
    if not os.path.exists(ort_custom_op_path):
        pytest.skip('custom ops for onnxruntime are not compiled.')
    # roi align config
    pool_h = 2
    pool_w = 2
    spatial_scale = 1.0
    sampling_ratio = 2

    inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., 0]]),
              ([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., np.pi / 2]]),
              ([[[[1., 2.], [3., 4.]],
                 [[4., 3.], [2., 1.]]]], [[0., 0.5, 0.5, 1., 1., 0]]),
              ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
                  [11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3., 0]]),
              ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
                  [11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3.,
                                             np.pi / 2]])]

    def warpped_function(torch_input, torch_rois):
        return roi_align_rotated(torch_input, torch_rois, (pool_w, pool_h),
                                 spatial_scale, sampling_ratio, True, False)

    for case in inputs:
        np_input = np.array(case[0], dtype=np.float32)
        np_rois = np.array(case[1], dtype=np.float32)
        input = torch.from_numpy(np_input)
        rois = torch.from_numpy(np_rois)

        # compute pytorch_output
        with torch.no_grad():
            pytorch_output = roi_align_rotated(input, rois, (pool_w, pool_h),
                                               spatial_scale, sampling_ratio,
                                               True, False)

        # export and load onnx model
        wrapped_model = WrapFunction(warpped_function)
        with torch.no_grad():
            torch.onnx.export(
                wrapped_model, (input, rois),
                onnx_file,
                export_params=True,
                keep_initializers_as_inputs=True,
                input_names=['features', 'rois'],
                opset_version=11)

        onnx_model = onnx.load(onnx_file)
        session_options = rt.SessionOptions()
        if os.path.exists(ort_custom_op_path):
            session_options.register_custom_ops_library(ort_custom_op_path)

        # compute onnx_output
        input_all = [node.name for node in onnx_model.graph.input]
        input_initializer = [
            node.name for node in onnx_model.graph.initializer
        ]
        net_feed_input = list(set(input_all) - set(input_initializer))
        assert (len(net_feed_input) == 2)
        sess = rt.InferenceSession(onnx_file, session_options)
        onnx_output = sess.run(None, {
            'features': input.detach().numpy(),
            'rois': rois.detach().numpy()
        })
        onnx_output = onnx_output[0]

        # allclose

        assert np.allclose(pytorch_output, onnx_output, atol=1e-3)


@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_roipool():
    if torch.__version__ == 'parrots':
        pytest.skip('onnx is not supported in parrots directly')
    from mmcv.ops import roi_pool

    # roi pool config
    pool_h = 2
    pool_w = 2
    spatial_scale = 1.0

    inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
              ([[[[1., 2.], [3., 4.]], [[4., 3.],
                                        [2., 1.]]]], [[0., 0., 0., 1., 1.]]),
              ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
                  [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])]

    def warpped_function(torch_input, torch_rois):
        return roi_pool(torch_input, torch_rois, (pool_w, pool_h),
                        spatial_scale)

    for case in inputs:
        np_input = np.array(case[0], dtype=np.float32)
        np_rois = np.array(case[1], dtype=np.float32)
        input = torch.from_numpy(np_input).cuda()
        rois = torch.from_numpy(np_rois).cuda()

        # compute pytorch_output
        with torch.no_grad():
            pytorch_output = roi_pool(input, rois, (pool_w, pool_h),
                                      spatial_scale)
            pytorch_output = pytorch_output.cpu()

        # export and load onnx model
        wrapped_model = WrapFunction(warpped_function)
        with torch.no_grad():
            torch.onnx.export(
                wrapped_model, (input, rois),
                onnx_file,
                export_params=True,
                keep_initializers_as_inputs=True,
                input_names=['input', 'rois'],
                opset_version=11)
        onnx_model = onnx.load(onnx_file)

        # compute onnx_output
        input_all = [node.name for node in onnx_model.graph.input]
        input_initializer = [
            node.name for node in onnx_model.graph.initializer
        ]
        net_feed_input = list(set(input_all) - set(input_initializer))
        assert (len(net_feed_input) == 2)
        sess = rt.InferenceSession(onnx_file)
        onnx_output = sess.run(
            None, {
                'input': input.detach().cpu().numpy(),
                'rois': rois.detach().cpu().numpy()
            })
        onnx_output = onnx_output[0]

        # allclose
        assert np.allclose(pytorch_output, onnx_output, atol=1e-3)


def test_interpolate():
    from mmcv.onnx.symbolic import register_extra_symbolics
    opset_version = 11
    register_extra_symbolics(opset_version)

    def func(feat, scale_factor=2):
        out = F.interpolate(feat, scale_factor=scale_factor)
        return out

    net = WrapFunction(func)
    net = net.cpu().eval()
    dummy_input = torch.randn(2, 4, 8, 8).cpu()
    torch.onnx.export(
        net,
        dummy_input,
        onnx_file,
        input_names=['input'],
        opset_version=opset_version)
    sess = rt.InferenceSession(onnx_file)
    onnx_result = sess.run(None, {'input': dummy_input.detach().numpy()})
    pytorch_result = func(dummy_input).detach().numpy()

    assert np.allclose(pytorch_result, onnx_result, atol=1e-3)


@pytest.mark.parametrize('mode', ['top', 'bottom', 'left', 'right'])
def test_corner_pool(mode, opset=11):
    if torch.__version__ == 'parrots':
        pytest.skip('onnx is not supported in parrots directly')

    from mmcv.ops import get_onnxruntime_op_path
    ort_custom_op_path = get_onnxruntime_op_path()
    if not os.path.exists(ort_custom_op_path):
        pytest.skip('custom ops for onnxruntime are not compiled.')

    from mmcv.ops.corner_pool import CornerPool

    def corner_pool_func(input):
        corner_pool_module = CornerPool(mode)
        return corner_pool_module.corner_pool.apply(input)

    wrapped_model = WrapFunction(corner_pool_func).eval()

    input = torch.rand((2, 3, 9, 12))  # (n,c,h,w)

    with torch.no_grad():
        torch.onnx.export(
            wrapped_model,
            input,
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=['input'],
            output_names=['output'],
            opset_version=opset)

    onnx_model = onnx.load(onnx_file)
    input_all = [node.name for node in onnx_model.graph.input]
    input_initializer = [node.name for node in onnx_model.graph.initializer]
    net_feed_input = list(set(input_all) - set(input_initializer))
    assert (len(net_feed_input) == 1)

    session_options = rt.SessionOptions()
    session_options.register_custom_ops_library(ort_custom_op_path)
    sess = rt.InferenceSession(onnx_file, session_options)
    ort_result = sess.run(None, {'input': input.detach().numpy()})
    pytorch_results = wrapped_model(input.clone())

    assert np.allclose(pytorch_results, ort_result, atol=1e-5)


@pytest.mark.parametrize('key', ['cummax', 'cummin'])
def test_cummax_cummin(key, opset=11):
    if torch.__version__ == 'parrots':
        pytest.skip('onnx is not supported in parrots directly')

    # Note generally `cummax` or `cummin` is exportable to ONNX
    # as long as the pytorch version >= 1.5.0, since `torch.cummax`
    # is only supported with torch >= 1.5.0.
    # But when `cummax` or `cummin` serves as an intermediate component
    # whose outputs is used as inputs for another modules, it's expected
    # that pytorch version must be >= 1.7.0. Otherwise error appears like:
    # `RuntimeError: tuple  appears in op that does not forward tuples,
    # unsupported 'kind: prim::PythonOp`.
    if version.parse(torch.__version__) < version.parse('1.7.0'):
        pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0')

    # register custom op `mmcv::cummax` and `mmcv::cummin`
    from mmcv.onnx.symbolic import register_extra_symbolics
    register_extra_symbolics(opset)

    from mmcv.ops import get_onnxruntime_op_path
    ort_custom_op_path = get_onnxruntime_op_path()
    if not os.path.exists(ort_custom_op_path):
        pytest.skip('custom ops for onnxruntime are not compiled.')

    input_list = [
        # arbitrary shape, e.g. 1-D, 2-D, 3-D, ...
        torch.rand((2, 3, 4, 1, 5)),
        torch.rand((1)),
        torch.rand((2, 0, 1)),  # tensor.numel() is 0
        torch.FloatTensor(),  # empty tensor
    ]

    cummax_cummin_funcs = {'cummax': torch.cummax, 'cummin': torch.cummin}

    for input in input_list:
        ndims = input.dim()
        # valid dim range is [-ndims, ndims-1]
        # test for all `dim` value which is valid
        for dim in range(-ndims, ndims):
            cummax_func = partial(cummax_cummin_funcs[key], dim=dim)
            wrapped_model = WrapFunction(cummax_func).eval()

            with torch.no_grad():
                torch.onnx.export(
                    wrapped_model,
                    input,
                    onnx_file,
                    export_params=True,
                    keep_initializers_as_inputs=True,
                    input_names=['input'],
                    output_names=['output', 'indices'],
                    opset_version=opset)

            onnx_model = onnx.load(onnx_file)
            input_all = [node.name for node in onnx_model.graph.input]
            input_initializer = [
                node.name for node in onnx_model.graph.initializer
            ]
            net_feed_input = list(set(input_all) - set(input_initializer))
            assert (len(net_feed_input) == 1)

            session_options = rt.SessionOptions()
            session_options.register_custom_ops_library(ort_custom_op_path)
            sess = rt.InferenceSession(onnx_file, session_options)
            ort_output, ort_inds = sess.run(None,
                                            {'input': input.detach().numpy()})
            pytorch_output, pytorch_inds = wrapped_model(input.clone())
            pytorch_output = pytorch_output.detach().numpy()
            pytorch_inds = pytorch_inds.detach().numpy()
            assert np.allclose(pytorch_output, ort_output, atol=1e-5)
            assert np.all(pytorch_inds == ort_inds)


@pytest.mark.parametrize('shifts_dims_pair', [([-3, 5], [2, 0]), (5, None)])
def test_roll(shifts_dims_pair):
    opset = 11
    from mmcv.onnx.symbolic import register_extra_symbolics
    register_extra_symbolics(opset)

    input = torch.arange(0, 4 * 5 * 6, dtype=torch.float32).view(4, 5, 6)

    shifts, dims = shifts_dims_pair
    func = partial(torch.roll, shifts=shifts, dims=dims)
    wrapped_model = WrapFunction(func).eval()

    with torch.no_grad():
        torch.onnx.export(
            wrapped_model,
            input,
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=['input'],
            output_names=['output'],
            opset_version=opset)

    onnx_model = onnx.load(onnx_file)
    input_all = [node.name for node in onnx_model.graph.input]
    input_initializer = [node.name for node in onnx_model.graph.initializer]
    net_feed_input = list(set(input_all) - set(input_initializer))
    assert (len(net_feed_input) == 1)

    sess = rt.InferenceSession(onnx_file)
    ort_output = sess.run(None, {'input': input.detach().numpy()})[0]

    with torch.no_grad():
        pytorch_output = wrapped_model(input.clone())

    torch.testing.assert_allclose(ort_output, pytorch_output)


@pytest.mark.skipif(
    torch.__version__ == 'parrots',
    reason='onnx is not supported in parrots directly')
@pytest.mark.skipif(
    not torch.cuda.is_available(),
    reason='modulated_deform_conv2d only supports in GPU')
def test_modulated_deform_conv2d():
    try:
        from mmcv.ops import ModulatedDeformConv2d, get_onnxruntime_op_path
    except (ImportError, ModuleNotFoundError):
        pytest.skip('modulated_deform_conv op is not successfully compiled')

    ort_custom_op_path = get_onnxruntime_op_path()
    # modulated deform conv config
    in_channels = 3
    out_channels = 64
    stride = 1
    padding = 0
    dilation = 1
    groups = 1
    deform_groups = 1
    kernel_size = 3

    input = torch.rand(1, in_channels, 28, 28).cuda()  # (n, c, h, w)
    conv_offset = nn.Conv2d(
        in_channels=3,
        out_channels=deform_groups * 3 * kernel_size * kernel_size,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        bias=True).cuda()
    conv_offset.cuda()
    out = conv_offset(input)
    o1, o2, mask = torch.chunk(out, 3, dim=1)
    offset = torch.cat((o1, o2), dim=1)
    mask = torch.sigmoid(mask)

    model_with_bias = ModulatedDeformConv2d(
        in_channels,
        out_channels,
        kernel_size,
        stride,
        padding,
        dilation,
        groups,
        deform_groups,
        bias=True)
    model_without_bias = ModulatedDeformConv2d(
        in_channels,
        out_channels,
        kernel_size,
        stride,
        padding,
        dilation,
        groups,
        deform_groups,
        bias=False)
    models = [model_with_bias.cuda(), model_without_bias.cuda()]

    for model in models:
        # export and load onnx model
        with torch.no_grad():
            torch.onnx.export(
                model, (input, offset, mask),
                onnx_file,
                export_params=True,
                keep_initializers_as_inputs=True,
                input_names=['input', 'offset', 'mask'],
                opset_version=11)

        session_options = rt.SessionOptions()
        if os.path.exists(ort_custom_op_path):
            session_options.register_custom_ops_library(ort_custom_op_path)

        # compute onnx_output
        sess = rt.InferenceSession(onnx_file, session_options)
        onnx_output = sess.run(
            None, {
                'input': input.cpu().detach().numpy(),
                'offset': offset.cpu().detach().numpy(),
                'mask': mask.cpu().detach().numpy()
            })[0]

        # compute pytorch_output
        with torch.no_grad():
            pytorch_output = model(input, offset, mask).cpu()
        # allclose
        assert np.allclose(pytorch_output, onnx_output, atol=1e-3)


@pytest.mark.skipif(
    torch.__version__ == 'parrots',
    reason='onnx is not supported in parrots directly')
def test_deform_conv2d(threshold=1e-3):
    try:
        from mmcv.ops import DeformConv2d, get_onnxruntime_op_path
    except (ImportError, ModuleNotFoundError):
        pytest.skip('deform_conv op is not successfully compiled')

    ort_custom_op_path = get_onnxruntime_op_path()
    if not os.path.exists(ort_custom_op_path):
        pytest.skip('custom ops for onnxruntime are not compiled.')

    # deform conv config
    # modulated deform conv config
    in_channels = 1
    out_channels = 64
    stride = 1
    padding = 0
    dilation = 1
    groups = 1
    deform_groups = 1
    kernel_size = 2
    input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
    offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]],
                     [[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]],
                     [[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]],
                     [[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]]
    offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7]
    deform_weight = [[[0.4, 0.2, 0.1, 0.9]]]

    x = torch.tensor(input)
    conv_offset = nn.Conv2d(
        in_channels=in_channels,
        out_channels=deform_groups * 2 * kernel_size * kernel_size,
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        bias=True)

    conv_offset.weight.data = torch.nn.Parameter(
        torch.Tensor(offset_weight).reshape(8, 1, 2, 2))
    conv_offset.bias.data = torch.nn.Parameter(
        torch.Tensor(offset_bias).reshape(8))

    offset = conv_offset(x)

    model = DeformConv2d(in_channels, out_channels, kernel_size, stride,
                         padding, dilation, groups, deform_groups)

    model.weight.data = torch.nn.Parameter(
        torch.Tensor(deform_weight).reshape(1, 1, 2, 2))

    with torch.no_grad():
        torch.onnx.export(
            model, (x, offset),
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=['input', 'offset'],
            opset_version=11)

    session_options = rt.SessionOptions()
    if os.path.exists(ort_custom_op_path):
        session_options.register_custom_ops_library(ort_custom_op_path)

    # compute onnx_output
    sess = rt.InferenceSession(onnx_file, session_options)
    onnx_output = sess.run(
        None, {
            'input': x.cpu().detach().numpy(),
            'offset': offset.cpu().detach().numpy(),
        })[0]

    # compute pytorch_output
    with torch.no_grad():
        pytorch_output = model(x, offset).cpu()
    # allclose
    assert np.allclose(pytorch_output, onnx_output, atol=1e-3)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_pixel_group.py
================================================
import numpy as np
import torch


def test_pixel_group():
    from mmcv.ops import pixel_group
    np_score = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                         [0, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0],
                         [0, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0],
                         [0, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0],
                         [0, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0],
                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]).astype(np.float32)
    np_mask = (np_score > 0.5)
    np_embedding = np.zeros((10, 10, 8)).astype(np.float32)
    np_embedding[:, :7] = 0.9
    np_embedding[:, 7:] = 10.0
    np_kernel_label = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 1, 1, 1, 0, 0, 0, 2, 0],
                                [0, 0, 1, 1, 1, 0, 0, 0, 2, 0],
                                [0, 0, 1, 1, 1, 0, 0, 0, 2, 0],
                                [0, 0, 1, 1, 1, 0, 0, 0, 2, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0, 0, 0,
                                 0]]).astype(np.int32)
    np_kernel_contour = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                  [0, 0, 1, 1, 1, 0, 0, 0, 1, 0],
                                  [0, 0, 1, 0, 1, 0, 0, 0, 1, 0],
                                  [0, 0, 1, 0, 1, 0, 0, 0, 1, 0],
                                  [0, 0, 1, 1, 1, 0, 0, 0, 1, 0],
                                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                  [0, 0, 0, 0, 0, 0, 0, 0, 0,
                                   0]]).astype(np.uint8)
    kernel_region_num = 3
    distance_threshold = float(0.8)
    result = pixel_group(np_score, np_mask, np_embedding, np_kernel_label,
                         np_kernel_contour, kernel_region_num,
                         distance_threshold)
    gt_1 = [
        0.8999997973442078, 24.0, 1.0, 3.0, 2.0, 3.0, 3.0, 3.0, 4.0, 3.0, 5.0,
        3.0, 6.0, 3.0, 1.0, 4.0, 2.0, 4.0, 3.0, 4.0, 4.0, 4.0, 5.0, 4.0, 6.0,
        4.0, 1.0, 5.0, 2.0, 5.0, 3.0, 5.0, 4.0, 5.0, 5.0, 5.0, 6.0, 5.0, 1.0,
        6.0, 2.0, 6.0, 3.0, 6.0, 4.0, 6.0, 5.0, 6.0, 6.0, 6.0
    ]

    gt_2 = [
        0.9000000357627869, 8.0, 7.0, 3.0, 8.0, 3.0, 7.0, 4.0, 8.0, 4.0, 7.0,
        5.0, 8.0, 5.0, 7.0, 6.0, 8.0, 6.0
    ]

    assert np.allclose(result[0], [0, 0])
    assert np.allclose(result[1], gt_1)
    assert np.allclose(result[2], gt_2)

    # test torch Tensor
    np_score_t = torch.from_numpy(np_score)
    np_mask_t = torch.from_numpy(np_mask)
    np_embedding_t = torch.from_numpy(np_embedding)
    np_kernel_label_t = torch.from_numpy(np_kernel_label)
    np_kernel_contour_t = torch.from_numpy(np_kernel_contour)

    result = pixel_group(np_score_t, np_mask_t, np_embedding_t,
                         np_kernel_label_t, np_kernel_contour_t,
                         kernel_region_num, distance_threshold)

    assert np.allclose(result[0], [0, 0])
    assert np.allclose(result[1], gt_1)
    assert np.allclose(result[2], gt_2)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_points_in_polygons.py
================================================
import numpy as np
import pytest
import torch

from mmcv.ops import points_in_polygons


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_points_in_polygons():
    points = np.array([[300., 300.], [400., 400.], [100., 100], [300, 250],
                       [100, 0]])
    polygons = np.array([[200., 200., 400., 400., 500., 200., 400., 100.],
                         [400., 400., 500., 500., 600., 300., 500., 200.],
                         [300., 300., 600., 700., 700., 700., 700., 100.]])
    expected_output = np.array([[0., 0., 0.], [0., 0., 1.], [0., 0., 0.],
                                [1., 0., 0.], [0., 0., 0.]])
    points = torch.from_numpy(points).cuda().float()
    polygons = torch.from_numpy(polygons).cuda().float()
    expected_output = torch.from_numpy(expected_output).cuda().float()
    assert torch.allclose(
        points_in_polygons(points, polygons), expected_output, 1e-3)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_psa_mask.py
================================================
import numpy as np
import torch
import torch.nn as nn


class Loss(nn.Module):

    def __init__(self):
        super().__init__()

    def forward(self, input, target):
        input = input.view(-1)
        target = target.view(-1)
        return torch.mean(input - target)


class TestPSAMask(object):

    def test_psa_mask_collect(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import PSAMask
        test_loss = Loss()

        input = np.fromfile(
            'tests/data/for_psa_mask/psa_input.bin', dtype=np.float32)
        output_collect = np.fromfile(
            'tests/data/for_psa_mask/psa_output_collect.bin', dtype=np.float32)

        input = input.reshape((4, 16, 8, 8))
        output_collect = output_collect.reshape((4, 64, 8, 8))
        label = torch.ones((4, 64, 8, 8))

        input = torch.FloatTensor(input)
        input.requires_grad = True

        psamask_collect = PSAMask('collect', (4, 4))

        # test collect cpu
        test_output = psamask_collect(input)
        loss = test_loss(test_output, label)
        loss.backward()
        test_output = test_output.detach().numpy()
        assert np.allclose(test_output, output_collect)
        assert test_output.shape == output_collect.shape

        psamask_collect.cuda()
        input = input.cuda()
        label = label.cuda()

        # test collect cuda
        test_output = psamask_collect(input)
        loss = test_loss(test_output, label)
        loss.backward()
        test_output = test_output.detach().cpu().numpy()
        assert np.allclose(test_output, output_collect)
        assert test_output.shape == output_collect.shape

    def test_psa_mask_distribute(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import PSAMask
        test_loss = Loss()

        input = np.fromfile(
            'tests/data/for_psa_mask/psa_input.bin', dtype=np.float32)
        output_distribute = np.fromfile(
            'tests/data/for_psa_mask/psa_output_distribute.bin',
            dtype=np.float32)

        input = input.reshape((4, 16, 8, 8))
        output_distribute = output_distribute.reshape((4, 64, 8, 8))
        label = torch.ones((4, 64, 8, 8))

        input = torch.FloatTensor(input)
        input.requires_grad = True

        psamask_distribute = PSAMask('distribute', (4, 4))

        # test distribute cpu
        test_output = psamask_distribute(input)
        loss = test_loss(test_output, label)
        loss.backward()
        test_output = test_output.detach().numpy()
        assert np.allclose(test_output, output_distribute)
        assert test_output.shape == output_distribute.shape

        psamask_distribute.cuda()
        input = input.cuda()
        label = label.cuda()

        # test distribute cuda
        test_output = psamask_distribute(input)
        loss = test_loss(test_output, label)
        loss.backward()
        test_output = test_output.detach().cpu().numpy()
        assert np.allclose(test_output, output_distribute)
        assert test_output.shape == output_distribute.shape


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_riroi_align_rotated.py
================================================
import numpy as np
import pytest
import torch
from torch.autograd import gradcheck

from mmcv.ops import RiRoIAlignRotated

np_feature = np.array([[[[1, 2], [3, 4]], [[1, 2], [4, 3]], [[4, 3], [2, 1]],
                        [[1, 2], [5, 6]], [[3, 4], [7, 8]], [[9, 10], [13,
                                                                       14]],
                        [[11, 12], [15, 16]], [[1, 1], [2, 2]]]])
np_rois = np.array([[0., 0.5, 0.5, 1., 1., np.pi / 3],
                    [0., 1., 1., 3., 3., np.pi / 2]])
expect_output = np.array([[[[1.8425, 1.3516], [2.3151, 1.8241]],
                           [[2.4779, 1.7416], [3.2173, 2.5632]],
                           [[2.7149, 2.2638], [2.6540, 2.3673]],
                           [[2.9461, 2.8638], [2.8028, 2.7205]],
                           [[4.1943, 2.7214], [5.6119, 4.1391]],
                           [[7.5276, 6.0547], [8.9453, 7.4724]],
                           [[12.1943, 10.7214], [13.6119, 12.1391]],
                           [[9.5489, 8.4237], [10.5763, 9.4511]]],
                          [[[7.6562, 12.5625], [4.0000, 6.6250]],
                           [[1.0000, 1.3125], [0.5000, 0.6562]],
                           [[1.6562, 1.9375], [1.0000, 1.3125]],
                           [[1.8438, 2.0547], [0.7500, 1.1562]],
                           [[0.8438, 3.0625], [0.2500, 1.1875]],
                           [[2.6562, 2.5625], [1.5000, 1.6250]],
                           [[3.6562, 4.5625], [2.0000, 2.6250]],
                           [[6.6562, 10.5625], [3.5000, 5.6250]]]])

expect_grad = np.array([[[[1.4727, 1.5586], [1.5586, 1.6602]],
                         [[1.4727, 1.5586], [1.5586, 1.6602]],
                         [[1.4727, 1.5586], [1.5586, 1.6602]],
                         [[1.4727, 1.5586], [1.5586, 1.6602]],
                         [[1.4727, 1.5586], [1.5586, 1.6602]],
                         [[1.4727, 1.5586], [1.5586, 1.6602]],
                         [[1.4727, 1.5586], [1.5586, 1.6602]],
                         [[1.4727, 1.5586], [1.5586, 1.6602]]]])

pool_h = 2
pool_w = 2
spatial_scale = 1.0
num_samples = 2
sampling_ratio = 2
num_orientations = 8
clockwise = False


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_roialign_rotated_gradcheck():
    x = torch.tensor(
        np_feature, dtype=torch.float, device='cuda', requires_grad=True)
    rois = torch.tensor(np_rois, dtype=torch.float, device='cuda')
    froipool = RiRoIAlignRotated((pool_h, pool_w), spatial_scale, num_samples,
                                 num_orientations, clockwise)
    gradcheck(froipool, (x, rois), eps=1e-3, atol=1e-3)


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_roialign_rotated_allclose():
    x = torch.tensor(
        np_feature, dtype=torch.float, device='cuda', requires_grad=True)
    rois = torch.tensor(np_rois, dtype=torch.float, device='cuda')
    froipool = RiRoIAlignRotated((pool_h, pool_w), spatial_scale, num_samples,
                                 num_orientations, clockwise)
    output = froipool(x, rois)
    output.backward(torch.ones_like(output))
    assert np.allclose(
        output.data.type(torch.float).cpu().numpy(), expect_output, atol=1e-3)
    assert np.allclose(
        x.grad.data.type(torch.float).cpu().numpy(), expect_grad, atol=1e-3)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_roi_align.py
================================================
import numpy as np
import pytest
import torch

_USING_PARROTS = True
try:
    from parrots.autograd import gradcheck
except ImportError:
    from torch.autograd import gradcheck
    _USING_PARROTS = False

# yapf:disable
inputs = [([[[[1., 2.], [3., 4.]]]],
           [[0., 0., 0., 1., 1.]]),
          ([[[[1., 2.], [3., 4.]],
             [[4., 3.], [2., 1.]]]],
           [[0., 0., 0., 1., 1.]]),
          ([[[[1., 2., 5., 6.], [3., 4., 7., 8.],
              [9., 10., 13., 14.], [11., 12., 15., 16.]]]],
           [[0., 0., 0., 3., 3.]])]
outputs = [([[[[1.0, 1.25], [1.5, 1.75]]]],
            [[[[3.0625, 0.4375], [0.4375, 0.0625]]]]),
           ([[[[1.0, 1.25], [1.5, 1.75]],
              [[4.0, 3.75], [3.5, 3.25]]]],
            [[[[3.0625, 0.4375], [0.4375, 0.0625]],
              [[3.0625, 0.4375], [0.4375, 0.0625]]]]),
           ([[[[1.9375, 4.75], [7.5625, 10.375]]]],
            [[[[0.47265625, 0.42968750, 0.42968750, 0.04296875],
               [0.42968750, 0.39062500, 0.39062500, 0.03906250],
               [0.42968750, 0.39062500, 0.39062500, 0.03906250],
               [0.04296875, 0.03906250, 0.03906250, 0.00390625]]]])]
# yapf:enable

pool_h = 2
pool_w = 2
spatial_scale = 1.0
sampling_ratio = 2


def _test_roialign_gradcheck(device, dtype):
    if not torch.cuda.is_available() and device == 'cuda':
        pytest.skip('test requires GPU')
    try:
        from mmcv.ops import RoIAlign
    except ModuleNotFoundError:
        pytest.skip('RoIAlign op is not successfully compiled')
    if dtype is torch.half:
        pytest.skip('grad check does not support fp16')
    for case in inputs:
        np_input = np.array(case[0])
        np_rois = np.array(case[1])

        x = torch.tensor(
            np_input, dtype=dtype, device=device, requires_grad=True)
        rois = torch.tensor(np_rois, dtype=dtype, device=device)

        froipool = RoIAlign((pool_h, pool_w), spatial_scale, sampling_ratio)

        if torch.__version__ == 'parrots':
            gradcheck(
                froipool, (x, rois), no_grads=[rois], delta=1e-5, pt_atol=1e-5)
        else:
            gradcheck(froipool, (x, rois), eps=1e-5, atol=1e-5)


def _test_roialign_allclose(device, dtype):
    if not torch.cuda.is_available() and device == 'cuda':
        pytest.skip('test requires GPU')
    try:
        from mmcv.ops import roi_align
    except ModuleNotFoundError:
        pytest.skip('test requires compilation')
    pool_h = 2
    pool_w = 2
    spatial_scale = 1.0
    sampling_ratio = 2

    for case, output in zip(inputs, outputs):
        np_input = np.array(case[0])
        np_rois = np.array(case[1])
        np_output = np.array(output[0])
        np_grad = np.array(output[1])

        x = torch.tensor(
            np_input, dtype=dtype, device=device, requires_grad=True)
        rois = torch.tensor(np_rois, dtype=dtype, device=device)

        output = roi_align(x, rois, (pool_h, pool_w), spatial_scale,
                           sampling_ratio, 'avg', True)
        output.backward(torch.ones_like(output))
        assert np.allclose(
            output.data.type(torch.float).cpu().numpy(), np_output, atol=1e-3)
        assert np.allclose(
            x.grad.data.type(torch.float).cpu().numpy(), np_grad, atol=1e-3)


@pytest.mark.parametrize('device', ['cuda', 'cpu'])
@pytest.mark.parametrize('dtype', [torch.float, torch.double, torch.half])
def test_roialign(device, dtype):
    # check double only
    if dtype is torch.double:
        _test_roialign_gradcheck(device=device, dtype=dtype)
    _test_roialign_allclose(device=device, dtype=dtype)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_roi_align_rotated.py
================================================
import numpy as np
import pytest
import torch

_USING_PARROTS = True
try:
    from parrots.autograd import gradcheck
except ImportError:
    from torch.autograd import gradcheck
    _USING_PARROTS = False

# yapf:disable
inputs = [([[[[1., 2.], [3., 4.]]]],
           [[0., 0.5, 0.5, 1., 1., 0]]),
          ([[[[1., 2.], [3., 4.]]]],
           [[0., 0.5, 0.5, 1., 1., np.pi / 2]]),
          ([[[[1., 2.], [3., 4.]],
             [[4., 3.], [2., 1.]]]],
           [[0., 0.5, 0.5, 1., 1., 0]]),
          ([[[[1., 2., 5., 6.], [3., 4., 7., 8.],
              [9., 10., 13., 14.], [11., 12., 15., 16.]]]],
           [[0., 1.5, 1.5, 3., 3., 0]]),
          ([[[[1., 2., 5., 6.], [3., 4., 7., 8.],
              [9., 10., 13., 14.], [11., 12., 15., 16.]]]],
           [[0., 1.5, 1.5, 3., 3., np.pi / 2]])]
outputs = [([[[[1.0, 1.25], [1.5, 1.75]]]],
            [[[[3.0625, 0.4375], [0.4375, 0.0625]]]]),
           ([[[[1.5, 1], [1.75, 1.25]]]],
            [[[[3.0625, 0.4375], [0.4375, 0.0625]]]]),
           ([[[[1.0, 1.25], [1.5, 1.75]],
              [[4.0, 3.75], [3.5, 3.25]]]],
            [[[[3.0625, 0.4375], [0.4375, 0.0625]],
              [[3.0625, 0.4375], [0.4375, 0.0625]]]]),
           ([[[[1.9375, 4.75], [7.5625, 10.375]]]],
            [[[[0.47265625, 0.42968750, 0.42968750, 0.04296875],
               [0.42968750, 0.39062500, 0.39062500, 0.03906250],
               [0.42968750, 0.39062500, 0.39062500, 0.03906250],
               [0.04296875, 0.03906250, 0.03906250, 0.00390625]]]]),
           ([[[[7.5625, 1.9375], [10.375, 4.75]]]],
            [[[[0.47265625, 0.42968750, 0.42968750, 0.04296875],
               [0.42968750, 0.39062500, 0.39062500, 0.03906250],
               [0.42968750, 0.39062500, 0.39062500, 0.03906250],
               [0.04296875, 0.03906250, 0.03906250, 0.00390625]]]])]
# yapf:enable

pool_h = 2
pool_w = 2
spatial_scale = 1.0
sampling_ratio = 2


def _test_roialign_rotated_gradcheck(device, dtype):
    if not torch.cuda.is_available() and device == 'cuda':
        pytest.skip('unittest does not support GPU yet.')
    try:
        from mmcv.ops import RoIAlignRotated
    except ModuleNotFoundError:
        pytest.skip('RoIAlignRotated op is not successfully compiled')
    if dtype is torch.half:
        pytest.skip('grad check does not support fp16')
    for case in inputs:
        np_input = np.array(case[0])
        np_rois = np.array(case[1])

        x = torch.tensor(
            np_input, dtype=dtype, device=device, requires_grad=True)
        rois = torch.tensor(np_rois, dtype=dtype, device=device)

        froipool = RoIAlignRotated((pool_h, pool_w), spatial_scale,
                                   sampling_ratio)

        if torch.__version__ == 'parrots':
            gradcheck(
                froipool, (x, rois), no_grads=[rois], delta=1e-5, pt_atol=1e-5)
        else:
            gradcheck(froipool, (x, rois), eps=1e-5, atol=1e-5)


def _test_roialign_rotated_allclose(device, dtype):
    if not torch.cuda.is_available() and device == 'cuda':
        pytest.skip('unittest does not support GPU yet.')
    try:
        from mmcv.ops import roi_align_rotated
    except ModuleNotFoundError:
        pytest.skip('test requires compilation')
    pool_h = 2
    pool_w = 2
    spatial_scale = 1.0
    sampling_ratio = 2

    for case, output in zip(inputs, outputs):
        np_input = np.array(case[0])
        np_rois = np.array(case[1])
        np_output = np.array(output[0])
        np_grad = np.array(output[1])

        x = torch.tensor(
            np_input, dtype=dtype, device=device, requires_grad=True)
        rois = torch.tensor(np_rois, dtype=dtype, device=device)

        output = roi_align_rotated(x, rois, (pool_h, pool_w), spatial_scale,
                                   sampling_ratio, True)
        output.backward(torch.ones_like(output))
        assert np.allclose(
            output.data.type(torch.float).cpu().numpy(), np_output, atol=1e-3)
        assert np.allclose(
            x.grad.data.type(torch.float).cpu().numpy(), np_grad, atol=1e-3)


@pytest.mark.parametrize('device', ['cuda', 'cpu'])
@pytest.mark.parametrize('dtype', [torch.float, torch.double, torch.half])
def test_roialign_rotated(device, dtype):
    # check double only
    if (dtype is torch.double):
        _test_roialign_rotated_gradcheck(device=device, dtype=dtype)
    _test_roialign_rotated_allclose(device=device, dtype=dtype)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_roi_pool.py
================================================
import os

import numpy as np
import torch

_USING_PARROTS = True
try:
    from parrots.autograd import gradcheck
except ImportError:
    from torch.autograd import gradcheck

    _USING_PARROTS = False

cur_dir = os.path.dirname(os.path.abspath(__file__))

inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
          ([[[[1., 2.], [3., 4.]], [[4., 3.], [2.,
                                               1.]]]], [[0., 0., 0., 1., 1.]]),
          ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
              [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])]
outputs = [([[[[1., 2.], [3., 4.]]]], [[[[1., 1.], [1., 1.]]]]),
           ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[[[1., 1.],
                                                               [1., 1.]],
                                                              [[1., 1.],
                                                               [1., 1.]]]]),
           ([[[[4., 8.], [12., 16.]]]], [[[[0., 0., 0., 0.], [0., 1., 0., 1.],
                                           [0., 0., 0., 0.], [0., 1., 0.,
                                                              1.]]]])]


class TestRoiPool(object):

    def test_roipool_gradcheck(self):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import RoIPool
        pool_h = 2
        pool_w = 2
        spatial_scale = 1.0

        for case in inputs:
            np_input = np.array(case[0])
            np_rois = np.array(case[1])

            x = torch.tensor(np_input, device='cuda', requires_grad=True)
            rois = torch.tensor(np_rois, device='cuda')

            froipool = RoIPool((pool_h, pool_w), spatial_scale)

            if _USING_PARROTS:
                pass
                # gradcheck(froipool, (x, rois), no_grads=[rois])
            else:
                gradcheck(froipool, (x, rois), eps=1e-2, atol=1e-2)

    def _test_roipool_allclose(self, dtype=torch.float):
        if not torch.cuda.is_available():
            return
        from mmcv.ops import roi_pool
        pool_h = 2
        pool_w = 2
        spatial_scale = 1.0

        for case, output in zip(inputs, outputs):
            np_input = np.array(case[0])
            np_rois = np.array(case[1])
            np_output = np.array(output[0])
            np_grad = np.array(output[1])

            x = torch.tensor(
                np_input, dtype=dtype, device='cuda', requires_grad=True)
            rois = torch.tensor(np_rois, dtype=dtype, device='cuda')

            output = roi_pool(x, rois, (pool_h, pool_w), spatial_scale)
            output.backward(torch.ones_like(output))
            assert np.allclose(output.data.cpu().numpy(), np_output, 1e-3)
            assert np.allclose(x.grad.data.cpu().numpy(), np_grad, 1e-3)

    def test_roipool_allclose(self):
        self._test_roipool_allclose(torch.double)
        self._test_roipool_allclose(torch.float)
        self._test_roipool_allclose(torch.half)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_roiaware_pool3d.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import pytest
import torch

from mmcv.ops import (RoIAwarePool3d, points_in_boxes_all, points_in_boxes_cpu,
                      points_in_boxes_part)


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_RoIAwarePool3d():
    roiaware_pool3d_max = RoIAwarePool3d(
        out_size=4, max_pts_per_voxel=128, mode='max')
    roiaware_pool3d_avg = RoIAwarePool3d(
        out_size=4, max_pts_per_voxel=128, mode='avg')
    rois = torch.tensor(
        [[1.0, 2.0, 3.0, 5.0, 4.0, 6.0, -0.3 - np.pi / 2],
         [-10.0, 23.0, 16.0, 20.0, 10.0, 20.0, -0.5 - np.pi / 2]],
        dtype=torch.float32).cuda(
        )  # boxes (m, 7) with bottom center in lidar coordinate
    pts = torch.tensor(
        [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
         [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
         [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9],
         [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]],
        dtype=torch.float32).cuda()  # points (n, 3) in lidar coordinate
    pts_feature = pts.clone()

    pooled_features_max = roiaware_pool3d_max(
        rois=rois, pts=pts, pts_feature=pts_feature)
    assert pooled_features_max.shape == torch.Size([2, 4, 4, 4, 3])
    assert torch.allclose(pooled_features_max.sum(),
                          torch.tensor(51.100).cuda(), 1e-3)

    pooled_features_avg = roiaware_pool3d_avg(
        rois=rois, pts=pts, pts_feature=pts_feature)
    assert pooled_features_avg.shape == torch.Size([2, 4, 4, 4, 3])
    assert torch.allclose(pooled_features_avg.sum(),
                          torch.tensor(49.750).cuda(), 1e-3)


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_points_in_boxes_part():
    boxes = torch.tensor(
        [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3]],
         [[-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]],
        dtype=torch.float32).cuda(
        )  # boxes (b, t, 7) with bottom center in lidar coordinate
    pts = torch.tensor(
        [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
          [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
          [4.7, 3.5, -12.2]],
         [[3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9], [-21.3, -52, -5],
          [0, 0, 0], [6, 7, 8], [-2, -3, -4], [6, 4, 9]]],
        dtype=torch.float32).cuda()  # points (b, m, 3) in lidar coordinate

    point_indices = points_in_boxes_part(points=pts, boxes=boxes)
    expected_point_indices = torch.tensor(
        [[0, 0, 0, 0, 0, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1]],
        dtype=torch.int32).cuda()
    assert point_indices.shape == torch.Size([2, 8])
    assert (point_indices == expected_point_indices).all()

    boxes = torch.tensor([[[0.0, 0.0, 0.0, 1.0, 20.0, 1.0, 0.523598]]],
                         dtype=torch.float32).cuda()  # 30 degrees
    pts = torch.tensor(
        [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0],
          [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]],
        dtype=torch.float32).cuda()
    point_indices = points_in_boxes_part(points=pts, boxes=boxes)
    expected_point_indices = torch.tensor([[-1, -1, 0, -1, 0, -1, -1, -1]],
                                          dtype=torch.int32).cuda()
    assert (point_indices == expected_point_indices).all()


def test_points_in_boxes_cpu():
    boxes = torch.tensor(
        [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3],
          [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]],
        dtype=torch.float32
    )  # boxes (m, 7) with bottom center in lidar coordinate
    pts = torch.tensor(
        [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
          [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
          [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [
              -16, -18, 9
          ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]],
        dtype=torch.float32)  # points (n, 3) in lidar coordinate

    point_indices = points_in_boxes_cpu(points=pts, boxes=boxes)
    expected_point_indices = torch.tensor(
        [[[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0],
          [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],
        dtype=torch.int32)
    assert point_indices.shape == torch.Size([1, 15, 2])
    assert (point_indices == expected_point_indices).all()

    boxes = torch.tensor([[[0.0, 0.0, 0.0, 1.0, 20.0, 1.0, 0.523598]]],
                         dtype=torch.float32)  # 30 degrees
    pts = torch.tensor(
        [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0],
          [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]],
        dtype=torch.float32)
    point_indices = points_in_boxes_cpu(points=pts, boxes=boxes)
    expected_point_indices = torch.tensor(
        [[[0], [0], [1], [0], [1], [0], [0], [0]]], dtype=torch.int32)
    assert (point_indices == expected_point_indices).all()


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_points_in_boxes_all():

    boxes = torch.tensor(
        [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3],
          [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]],
        dtype=torch.float32).cuda(
        )  # boxes (m, 7) with bottom center in lidar coordinate
    pts = torch.tensor(
        [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
          [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
          [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [
              -16, -18, 9
          ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]],
        dtype=torch.float32).cuda()  # points (n, 3) in lidar coordinate

    point_indices = points_in_boxes_all(points=pts, boxes=boxes)
    expected_point_indices = torch.tensor(
        [[[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0],
          [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],
        dtype=torch.int32).cuda()
    assert point_indices.shape == torch.Size([1, 15, 2])
    assert (point_indices == expected_point_indices).all()


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_roipoint_pool3d.py
================================================
import pytest
import torch

from mmcv.ops import RoIPointPool3d


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_gather_points():
    feats = torch.tensor(
        [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
         [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
         [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9],
         [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]],
        dtype=torch.float32).unsqueeze(0).cuda()
    points = feats.clone()
    rois = torch.tensor([[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3],
                          [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]],
                        dtype=torch.float32).cuda()

    roipoint_pool3d = RoIPointPool3d(num_sampled_points=4)
    roi_feat, empty_flag = roipoint_pool3d(feats, points, rois)
    expected_roi_feat = torch.tensor([[[[1, 2, 3.3, 1, 2, 3.3],
                                        [1.2, 2.5, 3, 1.2, 2.5, 3],
                                        [0.8, 2.1, 3.5, 0.8, 2.1, 3.5],
                                        [1.6, 2.6, 3.6, 1.6, 2.6, 3.6]],
                                       [[-9.2, 21, 18.2, -9.2, 21, 18.2],
                                        [-9.2, 21, 18.2, -9.2, 21, 18.2],
                                        [-9.2, 21, 18.2, -9.2, 21, 18.2],
                                        [-9.2, 21, 18.2, -9.2, 21,
                                         18.2]]]]).cuda()
    expected_empty_flag = torch.tensor([[0, 0]]).int().cuda()

    assert torch.allclose(roi_feat, expected_roi_feat)
    assert torch.allclose(empty_flag, expected_empty_flag)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_rotated_feature_align.py
================================================
import pytest
import torch

from mmcv.ops import rotated_feature_align


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_rotated_feature_align():
    feature = torch.tensor([[[[1.2924, -0.2172, -0.5222, 0.1172],
                              [0.9144, 1.2248, 1.3115, -0.9690],
                              [-0.8949, -1.1797, -0.9093, -0.3961],
                              [-0.4586, 0.5062, -0.7947, -0.7397]],
                             [[-1.0943, -0.7495, 1.3461, -1.1652],
                              [0.2034, 0.6763, -1.2357, 0.5231],
                              [-1.0062, 1.2592, 1.4225, -0.3951],
                              [-0.1242, -1.6240, 0.1932, 2.7181]],
                             [[-1.6271, -1.0276, 0.0578, -0.2997],
                              [-0.9684, -1.6946, -1.3188, -1.1938],
                              [-1.6744, -0.8917, -0.6556,
                               1.0073], [-0.1205, 0.3671, -0.3731, -0.5347]]],
                            [[[0.7035, 0.2089, -0.1774, 3.4670],
                              [-0.8505, -0.9278, 1.4714, 0.1644],
                              [0.0898, 0.3531, -0.4007, 0.1927],
                              [1.2569, -0.2636, -0.5223, 0.0616]],
                             [[0.1760, -0.7639, -0.4600, -1.3260],
                              [-0.9921, -0.2970, -0.8955, 1.0508],
                              [1.3515, -0.1641, 1.9679, 1.1986],
                              [-0.3616, 0.6287, 0.4933, 0.3360]],
                             [[-0.5860, 0.2124, -0.8700, 2.4200],
                              [-0.0551, -1.5103, -1.6779, 0.8399],
                              [0.8431, 1.2414, -1.1243, -0.3887],
                              [-2.1254, 0.6047, -0.3515, 0.7254]]]],
                           device='cuda',
                           requires_grad=True)

    bbox = torch.tensor(
        [[[[1.3080e+01, 1.2688e+01, 1.1214e+01, 9.3944e+01, -9.1905e-01],
           [3.8104e+01, 1.0134e+01, 1.4659e+02, 9.0306e+01, -9.8211e-01],
           [-5.3213e+01, 4.9508e+01, 5.1513e+01, 3.2055e+01, -3.1954e-01],
           [2.6974e+01, 2.5248e+01, 5.4495e+01, 3.1083e+00, -6.2127e-01]],
          [[-1.5604e+01, -5.1908e+01, 2.3998e+02, 1.5008e+01, -1.2546e+00],
           [3.1354e+01, -7.3635e+00, 6.7879e+01, 3.5081e+01, -3.3851e-01],
           [-5.3292e+00, 9.1946e+00, 1.2834e+01, 1.0485e+01, -1.3039e+00],
           [-2.3925e+01, 3.6623e+01, 3.9875e+01, 7.2009e+01, -6.5934e-01]],
          [[7.2114e+01, -2.3781e+01, 2.9106e+01, 8.4501e+01, -1.1340e+00],
           [2.6258e+01, -7.7034e+00, 1.7629e+02, 1.0615e+02, -1.2156e+00],
           [3.8057e+01, 4.6016e+01, 1.2965e+01, 6.9384e+00, -1.0855e+00],
           [2.4428e+01, -1.6189e+01, 2.0572e+02, 3.1622e+01, -1.5719e-01]],
          [[3.8226e+00, 2.9608e+01, 1.4457e+01, 6.8179e+01, -9.1997e-01],
           [2.5003e+01, -4.2490e+01, 9.6007e+01, 4.9086e+01, -1.4786e+00],
           [8.5983e+01, 5.4980e+01, 7.8080e+01, 1.0003e+02, -1.0926e+00],
           [9.9065e+00, 4.1457e+01, 5.9799e+00, 1.7973e+01, -5.6313e-01]]],
         [[[-1.8244e+01, 4.6309e+00, 5.3010e+01, 2.4310e+01, -7.0345e-01],
           [1.9419e+01, 3.6704e+01, 5.2390e+01, 5.4133e+01, -3.7730e-01],
           [5.6387e+01, 2.3752e+01, 9.0441e+00, 1.7792e+01, -1.5583e+00],
           [3.6303e+01, 1.6396e+01, 2.0283e+01, 1.9148e+01, -8.3419e-01]],
          [[3.2169e+01, 3.0521e+01, 2.6283e+01, 1.9680e+02, -3.0454e-01],
           [2.5788e+01, -3.2189e+01, 8.8882e+01, 1.0207e+02, -1.5328e+00],
           [8.4676e+00, -1.6668e+01, 2.4657e+01, 1.1275e+02, -4.0388e-01],
           [-1.0799e+01, 6.0422e+00, 9.5807e+00, 3.3677e+01, -3.5438e-01]],
          [[6.9363e+01, 1.0850e+01, 2.5968e+01, 2.2311e+01, -1.6408e-01],
           [2.8140e+00, 4.6843e+00, 3.1289e+00, 2.1480e+01, -6.7583e-01],
           [2.6661e+01, 4.5290e+01, 6.1679e+00, 3.0005e+01, -8.9806e-01],
           [5.0871e+00, 1.3234e+01, 9.2087e+01, 4.9622e+01, -2.8020e-01]],
          [[-1.2643e+01, 2.5176e+01, 5.0488e+01, 5.4246e+01, -4.4840e-01],
           [-3.4521e+01, 9.8435e-01, 5.2413e+01, 9.7996e+00, -8.4218e-01],
           [4.9829e+01, -1.0808e+01, 2.9848e+01, 7.3579e+01, -6.2672e-01],
           [8.0446e+01, 2.8064e+01, 4.5273e+01, 5.3809e+01, -1.2359e+00]]]],
        device='cuda',
        requires_grad=True)

    expected_output = torch.tensor([[[[1.1095, -0.2172, -0.5222, -0.6225],
                                      [0.9144, 0.7662, 1.0487, -0.9690],
                                      [-0.8949, -1.6384, -0.9093, -0.3961],
                                      [-0.8604, 0.5062, -0.7947, -0.7397]],
                                     [[-0.3961, -0.7495, 1.3461, 1.5528],
                                      [0.2034, 0.5522, -1.6722, 0.5231],
                                      [-1.0062, 1.1350, 1.4225, -0.3951],
                                      [-0.4826, -1.6240, 0.1932, 2.7181]],
                                     [[-2.6436, -1.0276, 0.0578, -0.8344],
                                      [-0.9684, -1.8151, -2.1843, -1.1938],
                                      [-1.6744, -1.0121, -0.6556, 1.0073],
                                      [-0.8474, 0.3671, -0.3731, -0.5347]]],
                                    [[[0.7035, 0.2089, -0.1774, 3.4670],
                                      [-0.8505, -0.9278, 1.4714, 0.1644],
                                      [0.0898, 0.3064, -0.4007, 0.5849],
                                      [1.2569, -0.2636, -0.5223, 0.0616]],
                                     [[0.1760, -0.7639, -0.4600, -1.3260],
                                      [-0.9921, -0.2970, -0.8955, 1.0508],
                                      [1.3515, -0.6125, 1.9679, 0.5550],
                                      [-0.3616, 0.6287, 0.4933, 0.3360]],
                                     [[-0.5860, 0.2124, -0.8700, 2.4200],
                                      [-0.0551, -1.5103, -1.6779, 0.8399],
                                      [0.8431, 0.8455, -1.1243, -1.5994],
                                      [-2.1254, 0.6047, -0.3515,
                                       0.7254]]]]).cuda()

    expected_grad = torch.tensor([[[[1.0000, 1.8507, 1.1493, 1.5222],
                                    [1.0000, 1.1511, 1.2139, 1.4778],
                                    [1.0000, 1.2629, 1.3721, 1.0000],
                                    [3.0000, 1.0000, 1.0000, 2.0000]],
                                   [[1.0000, 1.8507, 1.1493, 1.5222],
                                    [1.0000, 1.1511, 1.2139, 1.4778],
                                    [1.0000, 1.2629, 1.3721, 1.0000],
                                    [3.0000, 1.0000, 1.0000, 2.0000]],
                                   [[1.0000, 1.8507, 1.1493, 1.5222],
                                    [1.0000, 1.1511, 1.2139, 1.4778],
                                    [1.0000, 1.2629, 1.3721, 1.0000],
                                    [3.0000, 1.0000, 1.0000, 2.0000]]],
                                  [[[1.2687, 1.5055, 1.2382, 1.0000],
                                    [1.1458, 1.4258, 1.4160, 1.0000],
                                    [1.0000, 1.0000, 1.0000, 1.0000],
                                    [1.0000, 1.0000, 1.0000, 1.0000]],
                                   [[1.2687, 1.5055, 1.2382, 1.0000],
                                    [1.1458, 1.4258, 1.4160, 1.0000],
                                    [1.0000, 1.0000, 1.0000, 1.0000],
                                    [1.0000, 1.0000, 1.0000, 1.0000]],
                                   [[1.2687, 1.5055, 1.2382, 1.0000],
                                    [1.1458, 1.4258, 1.4160, 1.0000],
                                    [1.0000, 1.0000, 1.0000, 1.0000],
                                    [1.0000, 1.0000, 1.0000,
                                     1.0000]]]]).cuda()

    output = rotated_feature_align(
        feature, bbox, spatial_scale=1 / 8, points=1)
    output.backward(torch.ones_like(output))
    assert torch.allclose(output, expected_output, 1e-2)
    assert torch.allclose(feature.grad, expected_grad, 1e-2)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_saconv.py
================================================
import torch
import torch.nn as nn

from mmcv.ops import SAConv2d


def test_sacconv():

    # test with normal cast
    x = torch.rand(1, 3, 256, 256)
    saconv = SAConv2d(3, 5, kernel_size=3, padding=1)
    sac_out = saconv(x)
    refer_conv = nn.Conv2d(3, 5, kernel_size=3, padding=1)
    refer_out = refer_conv(x)
    assert sac_out.shape == refer_out.shape

    # test with dilation >= 2
    dalited_saconv = SAConv2d(3, 5, kernel_size=3, padding=2, dilation=2)
    dalited_sac_out = dalited_saconv(x)
    refer_conv = nn.Conv2d(3, 5, kernel_size=3, padding=2, dilation=2)
    refer_out = refer_conv(x)
    assert dalited_sac_out.shape == refer_out.shape

    # test with deform
    deform_saconv = SAConv2d(3, 5, kernel_size=3, padding=1, use_deform=True)
    if torch.cuda.is_available():
        x = torch.rand(1, 3, 256, 256).cuda()
        deform_saconv = SAConv2d(
            3, 5, kernel_size=3, padding=1, use_deform=True).cuda()
        deform_sac_out = deform_saconv(x).cuda()
        refer_conv = nn.Conv2d(3, 5, kernel_size=3, padding=1).cuda()
        refer_out = refer_conv(x)
        assert deform_sac_out.shape == refer_out.shape
    else:
        deform_sac_out = deform_saconv(x)
        refer_conv = nn.Conv2d(3, 5, kernel_size=3, padding=1)
        refer_out = refer_conv(x)
        assert deform_sac_out.shape == refer_out.shape

    # test with groups >= 2
    x = torch.rand(1, 4, 256, 256)
    group_saconv = SAConv2d(4, 4, kernel_size=3, padding=1, groups=2)
    group_sac_out = group_saconv(x)
    refer_conv = nn.Conv2d(4, 4, kernel_size=3, padding=1, groups=2)
    refer_out = refer_conv(x)
    assert group_sac_out.shape == refer_out.shape


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_scatter_points.py
================================================
import pytest
import torch
from torch.autograd import gradcheck

from mmcv.ops import DynamicScatter


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_dynamic_scatter():
    feats = torch.rand(
        size=(200000, 3), dtype=torch.float32, device='cuda') * 100 - 50
    coors = torch.randint(
        low=-1, high=20, size=(200000, 3), dtype=torch.int32, device='cuda')

    dsmean = DynamicScatter([0.32, 0.32, 6],
                            [-74.88, -74.88, -2, 74.88, 74.88, 4], True)
    dsmax = DynamicScatter([0.32, 0.32, 6],
                           [-74.88, -74.88, -2, 74.88, 74.88, 4], False)

    # test empty input
    empty_feats = torch.empty(size=(0, 3), dtype=torch.float32, device='cuda')
    empty_coors = torch.empty(size=(0, 3), dtype=torch.int32, device='cuda')

    empty_feats.requires_grad_()
    empty_feats_out_mean, empty_coors_out_mean = dsmean(
        empty_feats, empty_coors)
    empty_feats_out_mean.sum().backward()
    empty_feats_out_max, empty_coors_out_max = dsmax(empty_feats, empty_coors)
    empty_feats_out_max.sum().backward()

    assert empty_feats_out_mean.shape == empty_feats.shape
    assert empty_feats_out_max.shape == empty_feats.shape
    assert empty_coors_out_mean.shape == empty_coors.shape
    assert empty_coors_out_max.shape == empty_coors.shape

    # test empty reduced output
    empty_o_feats = torch.rand(
        size=(200000, 3), dtype=torch.float32, device='cuda') * 100 - 50
    empty_o_coors = torch.randint(
        low=-1, high=0, size=(200000, 3), dtype=torch.int32, device='cuda')

    empty_o_feats.requires_grad_()
    empty_o_feats_out_mean, empty_o_coors_out_mean = dsmean(
        empty_o_feats, empty_o_coors)
    empty_o_feats_out_mean.sum().backward()
    assert (empty_o_feats.grad == 0).all()

    empty_o_feats_out_max, empty_o_coors_out_max = dsmax(
        empty_o_feats, empty_o_coors)
    empty_o_feats_out_max.sum().backward()
    assert (empty_o_feats.grad == 0).all()

    # test non-empty input
    ref_voxel_coors = coors.unique(dim=0, sorted=True)
    ref_voxel_coors = ref_voxel_coors[ref_voxel_coors.min(dim=-1).values >= 0]
    ref_voxel_feats_mean = []
    ref_voxel_feats_max = []
    for ref_voxel_coor in ref_voxel_coors:
        voxel_mask = (coors == ref_voxel_coor).all(dim=-1)
        ref_voxel_feats_mean.append(feats[voxel_mask].mean(dim=0))
        ref_voxel_feats_max.append(feats[voxel_mask].max(dim=0).values)
    ref_voxel_feats_mean = torch.stack(ref_voxel_feats_mean)
    ref_voxel_feats_max = torch.stack(ref_voxel_feats_max)

    feats_out_mean, coors_out_mean = dsmean(feats, coors)
    seq_mean = (coors_out_mean[:, 0] * 400 + coors_out_mean[:, 1] * 20 +
                coors_out_mean[:, 2]).argsort()
    feats_out_mean = feats_out_mean[seq_mean]
    coors_out_mean = coors_out_mean[seq_mean]

    feats_out_max, coors_out_max = dsmax(feats, coors)
    seq_max = (coors_out_max[:, 0] * 400 + coors_out_max[:, 1] * 20 +
               coors_out_max[:, 2]).argsort()
    feats_out_max = feats_out_max[seq_max]
    coors_cout_max = coors_out_max[seq_max]

    assert (coors_out_mean == ref_voxel_coors).all()
    assert torch.allclose(
        feats_out_mean, ref_voxel_feats_mean, atol=1e-2, rtol=1e-5)
    assert (coors_cout_max == ref_voxel_coors).all()
    assert torch.allclose(
        feats_out_max, ref_voxel_feats_max, atol=1e-2, rtol=1e-5)

    # test grad #
    feats = torch.rand(
        size=(100, 4), dtype=torch.float32, device='cuda') * 100 - 50
    coors = torch.randint(
        low=-1, high=3, size=(100, 3), dtype=torch.int32, device='cuda')
    feats.requires_grad_()
    gradcheck(dsmean, (feats, coors), eps=1e-2, atol=1e-2, rtol=1e-5)
    gradcheck(dsmax, (feats, coors), eps=1e-2, atol=1e-2, rtol=1e-5)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_syncbn.py
================================================
import os
import platform

import numpy as np
import pytest
import torch
import torch.distributed as dist
import torch.nn as nn

if platform.system() == 'Windows':
    import regex as re
else:
    import re


class TestSyncBN(object):

    def dist_init(self):
        rank = int(os.environ['SLURM_PROCID'])
        world_size = int(os.environ['SLURM_NTASKS'])
        local_rank = int(os.environ['SLURM_LOCALID'])
        node_list = str(os.environ['SLURM_NODELIST'])

        node_parts = re.findall('[0-9]+', node_list)
        os.environ['MASTER_ADDR'] = (f'{node_parts[1]}.{node_parts[2]}' +
                                     f'.{node_parts[3]}.{node_parts[4]}')
        os.environ['MASTER_PORT'] = '12341'
        os.environ['WORLD_SIZE'] = str(world_size)
        os.environ['RANK'] = str(rank)

        dist.init_process_group('nccl')
        torch.cuda.set_device(local_rank)

    def _test_syncbn_train(self, size=1, half=False):

        if 'SLURM_NTASKS' not in os.environ or int(
                os.environ['SLURM_NTASKS']) != 4:
            print('must run with slurm has 4 processes!\n'
                  'srun -p test --gres=gpu:4 -n4')
            return
        else:
            print('Running syncbn test')
        from mmcv.ops import SyncBatchNorm

        assert size in (1, 2, 4)
        if not dist.is_initialized():
            self.dist_init()
        rank = dist.get_rank()

        torch.manual_seed(9)
        torch.cuda.manual_seed(9)

        self.x = torch.rand(16, 3, 2, 3).cuda()
        self.y_bp = torch.rand(16, 3, 2, 3).cuda()

        if half:
            self.x = self.x.half()
            self.y_bp = self.y_bp.half()
        dist.broadcast(self.x, src=0)
        dist.broadcast(self.y_bp, src=0)

        torch.cuda.synchronize()
        if size == 1:
            groups = [None, None, None, None]
            groups[0] = dist.new_group([0])
            groups[1] = dist.new_group([1])
            groups[2] = dist.new_group([2])
            groups[3] = dist.new_group([3])
            group = groups[rank]
        elif size == 2:
            groups = [None, None, None, None]
            groups[0] = groups[1] = dist.new_group([0, 1])
            groups[2] = groups[3] = dist.new_group([2, 3])
            group = groups[rank]
        elif size == 4:
            group = dist.group.WORLD
        syncbn = SyncBatchNorm(3, group=group).cuda()
        syncbn.weight.data[0] = 0.2
        syncbn.weight.data[1] = 0.5
        syncbn.weight.data[2] = 0.7
        syncbn.train()

        bn = nn.BatchNorm2d(3).cuda()
        bn.weight.data[0] = 0.2
        bn.weight.data[1] = 0.5
        bn.weight.data[2] = 0.7
        bn.train()

        sx = self.x[rank * 4:rank * 4 + 4]
        sx.requires_grad_()
        sy = syncbn(sx)
        sy.backward(self.y_bp[rank * 4:rank * 4 + 4])

        smean = syncbn.running_mean
        svar = syncbn.running_var
        sx_grad = sx.grad
        sw_grad = syncbn.weight.grad
        sb_grad = syncbn.bias.grad

        if size == 1:
            x = self.x[rank * 4:rank * 4 + 4]
            y_bp = self.y_bp[rank * 4:rank * 4 + 4]
        elif size == 2:
            x = self.x[rank // 2 * 8:rank // 2 * 8 + 8]
            y_bp = self.y_bp[rank // 2 * 8:rank // 2 * 8 + 8]
        elif size == 4:
            x = self.x
            y_bp = self.y_bp
        x.requires_grad_()
        y = bn(x)
        y.backward(y_bp)

        if size == 2:
            y = y[rank % 2 * 4:rank % 2 * 4 + 4]
        elif size == 4:
            y = y[rank * 4:rank * 4 + 4]

        mean = bn.running_mean
        var = bn.running_var
        if size == 1:
            x_grad = x.grad
            w_grad = bn.weight.grad
            b_grad = bn.bias.grad
        elif size == 2:
            x_grad = x.grad[rank % 2 * 4:rank % 2 * 4 + 4]
            w_grad = bn.weight.grad / 2
            b_grad = bn.bias.grad / 2
        elif size == 4:
            x_grad = x.grad[rank * 4:rank * 4 + 4]
            w_grad = bn.weight.grad / 4
            b_grad = bn.bias.grad / 4

        assert np.allclose(mean.data.cpu().numpy(),
                           smean.data.cpu().numpy(), 1e-3)
        assert np.allclose(var.data.cpu().numpy(),
                           svar.data.cpu().numpy(), 1e-3)
        assert np.allclose(y.data.cpu().numpy(), sy.data.cpu().numpy(), 1e-3)
        assert np.allclose(w_grad.data.cpu().numpy(),
                           sw_grad.data.cpu().numpy(), 1e-3)
        assert np.allclose(b_grad.data.cpu().numpy(),
                           sb_grad.data.cpu().numpy(), 1e-3)
        assert np.allclose(x_grad.data.cpu().numpy(),
                           sx_grad.data.cpu().numpy(), 1e-2)

    def _test_syncbn_empty_train(self, size=1, half=False):

        if 'SLURM_NTASKS' not in os.environ or int(
                os.environ['SLURM_NTASKS']) != 4:
            print('must run with slurm has 4 processes!\n'
                  'srun -p test --gres=gpu:4 -n4')
            return
        else:
            print('Running syncbn test')
        from mmcv.ops import SyncBatchNorm

        assert size in (1, 2, 4)
        if not dist.is_initialized():
            self.dist_init()
        rank = dist.get_rank()

        torch.manual_seed(9)
        torch.cuda.manual_seed(9)

        self.x = torch.rand(0, 3, 2, 3).cuda()
        self.y_bp = torch.rand(0, 3, 2, 3).cuda()

        if half:
            self.x = self.x.half()
            self.y_bp = self.y_bp.half()
        dist.broadcast(self.x, src=0)
        dist.broadcast(self.y_bp, src=0)

        torch.cuda.synchronize()
        if size == 1:
            groups = [None, None, None, None]
            groups[0] = dist.new_group([0])
            groups[1] = dist.new_group([1])
            groups[2] = dist.new_group([2])
            groups[3] = dist.new_group([3])
            group = groups[rank]
        elif size == 2:
            groups = [None, None, None, None]
            groups[0] = groups[1] = dist.new_group([0, 1])
            groups[2] = groups[3] = dist.new_group([2, 3])
            group = groups[rank]
        elif size == 4:
            group = dist.group.WORLD

        syncbn = SyncBatchNorm(3, group=group, stats_mode='N').cuda()
        syncbn.weight.data[0] = 0.2
        syncbn.weight.data[1] = 0.5
        syncbn.weight.data[2] = 0.7
        syncbn.train()

        bn = nn.BatchNorm2d(3).cuda()
        bn.weight.data[0] = 0.2
        bn.weight.data[1] = 0.5
        bn.weight.data[2] = 0.7
        bn.train()

        sx = self.x[rank * 4:rank * 4 + 4]
        sx.requires_grad_()
        sy = syncbn(sx)
        sy.backward(self.y_bp[rank * 4:rank * 4 + 4])
        smean = syncbn.running_mean
        svar = syncbn.running_var
        sx_grad = sx.grad
        sw_grad = syncbn.weight.grad
        sb_grad = syncbn.bias.grad

        if size == 1:
            x = self.x[rank * 4:rank * 4 + 4]
            y_bp = self.y_bp[rank * 4:rank * 4 + 4]
        elif size == 2:
            x = self.x[rank // 2 * 8:rank // 2 * 8 + 8]
            y_bp = self.y_bp[rank // 2 * 8:rank // 2 * 8 + 8]
        elif size == 4:
            x = self.x
            y_bp = self.y_bp
        x.requires_grad_()
        y = bn(x)
        y.backward(y_bp)

        if size == 2:
            y = y[rank % 2 * 4:rank % 2 * 4 + 4]
        elif size == 4:
            y = y[rank * 4:rank * 4 + 4]

        mean = bn.running_mean
        var = bn.running_var
        if size == 1:
            x_grad = x.grad
            w_grad = bn.weight.grad
            b_grad = bn.bias.grad
        elif size == 2:
            x_grad = x.grad[rank % 2 * 4:rank % 2 * 4 + 4]
            w_grad = bn.weight.grad / 2
            b_grad = bn.bias.grad / 2
        elif size == 4:
            x_grad = x.grad[rank * 4:rank * 4 + 4]
            w_grad = bn.weight.grad / 4
            b_grad = bn.bias.grad / 4

        assert np.allclose(mean.data.cpu().numpy(),
                           smean.data.cpu().numpy(), 1e-3)
        assert np.allclose(var.data.cpu().numpy(),
                           svar.data.cpu().numpy(), 1e-3)
        assert np.allclose(y.data.cpu().numpy(), sy.data.cpu().numpy(), 1e-3)
        assert np.allclose(w_grad.data.cpu().numpy(),
                           sw_grad.data.cpu().numpy(), 1e-3)
        assert np.allclose(b_grad.data.cpu().numpy(),
                           sb_grad.data.cpu().numpy(), 1e-3)
        assert np.allclose(x_grad.data.cpu().numpy(),
                           sx_grad.data.cpu().numpy(), 1e-2)

        # 'stats_mode' only allows 'default' and 'N'
        with pytest.raises(AssertionError):
            SyncBatchNorm(3, group=group, stats_mode='X')

    def test_syncbn_1(self):
        self._test_syncbn_train(size=1)

    def test_syncbn_2(self):
        self._test_syncbn_train(size=2)

    def test_syncbn_4(self):
        self._test_syncbn_train(size=4)

    def test_syncbn_1_half(self):
        self._test_syncbn_train(size=1, half=True)

    def test_syncbn_2_half(self):
        self._test_syncbn_train(size=2, half=True)

    def test_syncbn_4_half(self):
        self._test_syncbn_train(size=4, half=True)

    def test_syncbn_empty_1(self):
        self._test_syncbn_empty_train(size=1)

    def test_syncbn_empty_2(self):
        self._test_syncbn_empty_train(size=2)

    def test_syncbn_empty_4(self):
        self._test_syncbn_empty_train(size=4)

    def test_syncbn_empty_1_half(self):
        self._test_syncbn_empty_train(size=1, half=True)

    def test_syncbn_empty_2_half(self):
        self._test_syncbn_empty_train(size=2, half=True)

    def test_syncbn_empty_4_half(self):
        self._test_syncbn_empty_train(size=4, half=True)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_tensorrt.py
================================================
import os
from functools import partial
from typing import Callable

import numpy as np
import onnx
import pytest
import torch
import torch.nn as nn
import torch.nn.functional as F

try:
    from mmcv.tensorrt import (TRTWrapper, is_tensorrt_plugin_loaded, onnx2trt,
                               save_trt_engine)
except ImportError:
    pytest.skip(
        'TensorRT should be installed from source.', allow_module_level=True)

if not torch.cuda.is_available():
    pytest.skip(
        'CUDA is required for this test module', allow_module_level=True)

if not is_tensorrt_plugin_loaded():
    pytest.skip(
        'Test requires to complie TensorRT plugins in mmcv',
        allow_module_level=True)


class WrapFunction(nn.Module):

    def __init__(self, wrapped_function):
        super(WrapFunction, self).__init__()
        self.wrapped_function = wrapped_function

    def forward(self, *args, **kwargs):
        return self.wrapped_function(*args, **kwargs)


onnx_file = 'tmp.onnx'
trt_file = 'tmp.engine'


def test_roialign():
    try:
        from mmcv.ops import RoIAlign
    except (ImportError, ModuleNotFoundError):
        pytest.skip('test requires compilation')

    # trt config
    fp16_mode = False
    max_workspace_size = 1 << 30

    # roi align config
    pool_h = 2
    pool_w = 2
    spatial_scale = 1.0
    sampling_ratio = 2

    inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
              ([[[[1., 2.], [3., 4.]], [[4., 3.],
                                        [2., 1.]]]], [[0., 0., 0., 1., 1.]]),
              ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
                  [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])]

    wrapped_model = RoIAlign((pool_w, pool_h), spatial_scale, sampling_ratio,
                             'avg', True).cuda()
    for case in inputs:
        np_input = np.array(case[0], dtype=np.float32)
        np_rois = np.array(case[1], dtype=np.float32)
        input = torch.from_numpy(np_input).cuda()
        rois = torch.from_numpy(np_rois).cuda()

        with torch.no_grad():
            torch.onnx.export(
                wrapped_model, (input, rois),
                onnx_file,
                export_params=True,
                keep_initializers_as_inputs=True,
                input_names=['input', 'rois'],
                output_names=['roi_feat'],
                opset_version=11)
        onnx_model = onnx.load(onnx_file)

        # create trt engine and wrapper
        opt_shape_dict = {
            'input': [list(input.shape),
                      list(input.shape),
                      list(input.shape)],
            'rois': [list(rois.shape),
                     list(rois.shape),
                     list(rois.shape)]
        }
        trt_engine = onnx2trt(
            onnx_model,
            opt_shape_dict,
            fp16_mode=fp16_mode,
            max_workspace_size=max_workspace_size)
        save_trt_engine(trt_engine, trt_file)
        trt_model = TRTWrapper(trt_file, ['input', 'rois'], ['roi_feat'])

        with torch.no_grad():
            trt_outputs = trt_model({'input': input, 'rois': rois})
            trt_roi_feat = trt_outputs['roi_feat']

        # compute pytorch_output
        with torch.no_grad():
            pytorch_roi_feat = wrapped_model(input, rois)

        # allclose
        if os.path.exists(onnx_file):
            os.remove(onnx_file)
        if os.path.exists(trt_file):
            os.remove(trt_file)
        assert torch.allclose(pytorch_roi_feat, trt_roi_feat)


def test_nms():
    try:
        import mmcv
        from mmcv.ops import nms
    except (ImportError, ModuleNotFoundError):
        pytest.skip('test requires compilation')
    os.environ['ONNX_BACKEND'] = 'MMCVTensorRT'
    # trt config
    fp16_mode = False
    max_workspace_size = 1 << 30
    data = mmcv.load('./tests/data/batched_nms_data.pkl')
    boxes = torch.from_numpy(data['boxes']).cuda()
    scores = torch.from_numpy(data['scores']).cuda()
    nms = partial(
        nms, iou_threshold=0.7, offset=0, score_threshold=0.1, max_num=100)
    wrapped_model = WrapFunction(nms)
    wrapped_model.cpu().eval()
    with torch.no_grad():
        torch.onnx.export(
            wrapped_model, (boxes.detach().cpu(), scores.detach().cpu()),
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=['boxes', 'scores'],
            output_names=['dets', 'inds'],
            opset_version=11)
    onnx_model = onnx.load(onnx_file)

    # create trt engine and wrapper
    opt_shape_dict = {
        'boxes': [list(boxes.shape),
                  list(boxes.shape),
                  list(boxes.shape)],
        'scores': [list(scores.shape),
                   list(scores.shape),
                   list(scores.shape)]
    }
    trt_engine = onnx2trt(
        onnx_model,
        opt_shape_dict,
        fp16_mode=fp16_mode,
        max_workspace_size=max_workspace_size)
    save_trt_engine(trt_engine, trt_file)
    trt_model = TRTWrapper(trt_file, ['boxes', 'scores'], ['dets', 'inds'])

    with torch.no_grad():
        trt_outputs = trt_model({'boxes': boxes, 'scores': scores})
        trt_dets = trt_outputs['dets']
        trt_inds = trt_outputs['inds']
        trt_inds = trt_inds.long()

    # compute pytorch_output
    with torch.no_grad():
        pytorch_outputs = wrapped_model(boxes, scores)
        pytorch_dets, pytorch_inds = pytorch_outputs

    # allclose
    if os.path.exists(onnx_file):
        os.remove(onnx_file)
    if os.path.exists(trt_file):
        os.remove(trt_file)
    num_boxes = pytorch_dets.shape[0]
    trt_dets = trt_dets[:num_boxes, ...]
    trt_inds = trt_inds[:num_boxes]
    trt_scores = trt_dets[:, 4]
    pytorch_scores = pytorch_dets[:, 4]
    os.environ.pop('ONNX_BACKEND')
    assert torch.allclose(pytorch_scores, trt_scores, atol=1e-3)
    assert torch.equal(pytorch_inds, trt_inds)


def test_batched_nms():
    try:
        import mmcv
        from mmcv.ops import batched_nms
    except (ImportError, ModuleNotFoundError):
        pytest.skip('test requires compilation')

    # trt config
    os.environ['ONNX_BACKEND'] = 'MMCVTensorRT'
    fp16_mode = False
    max_workspace_size = 1 << 30
    data = mmcv.load('./tests/data/batched_nms_data.pkl')
    nms_cfg = dict(type='nms', iou_threshold=0.7, score_threshold=0.1)
    boxes = torch.from_numpy(data['boxes']).cuda()
    scores = torch.from_numpy(data['scores']).cuda()
    idxs = torch.from_numpy(data['idxs']).cuda()
    class_agnostic = False

    nms = partial(batched_nms, nms_cfg=nms_cfg, class_agnostic=class_agnostic)
    wrapped_model = WrapFunction(nms)
    wrapped_model.cpu().eval()
    input_data = (boxes.detach().cpu(), scores.detach().cpu(),
                  idxs.detach().cpu())
    input_names = ['boxes', 'scores', 'idxs']
    output_names = ['dets', 'inds']
    with torch.no_grad():
        torch.onnx.export(
            wrapped_model,
            input_data,
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=input_names,
            output_names=output_names,
            opset_version=11)
    onnx_model = onnx.load(onnx_file)
    # create trt engine and wrapper
    opt_shape_dict = {
        'boxes': [list(boxes.shape),
                  list(boxes.shape),
                  list(boxes.shape)],
        'scores': [list(scores.shape),
                   list(scores.shape),
                   list(scores.shape)],
        'idxs': [list(idxs.shape),
                 list(idxs.shape),
                 list(idxs.shape)]
    }
    trt_engine = onnx2trt(
        onnx_model,
        opt_shape_dict,
        fp16_mode=fp16_mode,
        max_workspace_size=max_workspace_size)
    save_trt_engine(trt_engine, trt_file)
    trt_model = TRTWrapper(trt_file, input_names, output_names)

    with torch.no_grad():
        trt_outputs = trt_model({
            'boxes': boxes,
            'scores': scores,
            'idxs': idxs
        })
        trt_dets = trt_outputs['dets']
        trt_inds = trt_outputs['inds']
        trt_inds = trt_inds.long()

    # compute pytorch_output
    with torch.no_grad():
        pytorch_outputs = wrapped_model(boxes, scores, idxs)
        pytorch_dets, pytorch_inds = pytorch_outputs
    # allclose
    if os.path.exists(onnx_file):
        os.remove(onnx_file)
    if os.path.exists(trt_file):
        os.remove(trt_file)
    num_boxes = pytorch_dets.shape[0]
    trt_dets = trt_dets[:num_boxes, ...]
    trt_inds = trt_inds[:num_boxes]
    trt_scores = trt_dets[:, 4]
    pytorch_scores = pytorch_dets[:, 4]

    os.environ.pop('ONNX_BACKEND')
    assert torch.allclose(pytorch_scores, trt_scores)
    assert torch.equal(pytorch_inds, trt_inds)


def test_scatternd():

    def func(data):
        data[:, :-2] += 1
        data[:2, :] -= 1
        return data

    data = torch.zeros(4, 4).cuda()
    wrapped_model = WrapFunction(func).eval().cuda()

    input_names = ['input']
    output_names = ['output']

    with torch.no_grad():
        torch.onnx.export(
            wrapped_model, (data.clone(), ),
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=input_names,
            output_names=output_names,
            opset_version=11)

    onnx_model = onnx.load(onnx_file)

    # create trt engine and wrapper
    opt_shape_dict = {
        'input': [list(data.shape),
                  list(data.shape),
                  list(data.shape)],
    }
    # trt config
    fp16_mode = False
    max_workspace_size = 1 << 30

    trt_engine = onnx2trt(
        onnx_model,
        opt_shape_dict,
        fp16_mode=fp16_mode,
        max_workspace_size=max_workspace_size)

    save_trt_engine(trt_engine, trt_file)
    trt_model = TRTWrapper(trt_file, input_names, output_names)

    with torch.no_grad():
        trt_outputs = trt_model({'input': data.clone()})
        trt_results = trt_outputs['output']

    # compute pytorch_output
    with torch.no_grad():
        pytorch_results = wrapped_model(data.clone())

    # allclose
    if os.path.exists(onnx_file):
        os.remove(onnx_file)
    if os.path.exists(trt_file):
        os.remove(trt_file)
    assert torch.allclose(pytorch_results, trt_results)


def test_deform_conv():
    try:
        from mmcv.ops import DeformConv2dPack
    except (ImportError, ModuleNotFoundError):
        pytest.skip('test requires compilation')

    input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
    offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]],
                     [[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]],
                     [[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]],
                     [[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]]
    offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7]
    deform_weight = [[[0.4, 0.2, 0.1, 0.9]]]

    c_in = 1
    c_out = 1
    x = torch.Tensor(input).cuda()
    x.requires_grad = True
    model = DeformConv2dPack(c_in, c_out, 2, stride=1, padding=0)
    model.conv_offset.weight.data = torch.nn.Parameter(
        torch.Tensor(offset_weight).reshape(8, 1, 2, 2))
    model.conv_offset.bias.data = torch.nn.Parameter(
        torch.Tensor(offset_bias).reshape(8))
    model.weight.data = torch.nn.Parameter(
        torch.Tensor(deform_weight).reshape(1, 1, 2, 2))
    model.cuda().eval()

    input_names = ['input']
    output_names = ['output']

    with torch.no_grad():
        torch.onnx.export(
            model, (x.clone(), ),
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=input_names,
            output_names=output_names,
            opset_version=11)

    onnx_model = onnx.load(onnx_file)

    # create trt engine and wrapper
    opt_shape_dict = {
        'input': [list(x.shape), list(x.shape),
                  list(x.shape)],
    }
    # trt config
    fp16_mode = False
    max_workspace_size = 1 << 30

    trt_engine = onnx2trt(
        onnx_model,
        opt_shape_dict,
        fp16_mode=fp16_mode,
        max_workspace_size=max_workspace_size)

    save_trt_engine(trt_engine, trt_file)
    trt_model = TRTWrapper(trt_file, input_names, output_names)

    with torch.no_grad():
        trt_outputs = trt_model({'input': x.clone()})
        trt_results = trt_outputs['output']

    # compute pytorch_output
    with torch.no_grad():
        pytorch_results = model(x.clone())

    # allclose
    if os.path.exists(onnx_file):
        os.remove(onnx_file)
    if os.path.exists(trt_file):
        os.remove(trt_file)
    assert torch.allclose(pytorch_results, trt_results)


@pytest.mark.parametrize('with_bias', [True, False])
def test_modulated_deform_conv(with_bias):
    try:
        from mmcv.ops import ModulatedDeformConv2dPack
    except (ImportError, ModuleNotFoundError):
        pytest.skip('test requires compilation')

    input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]

    x = torch.Tensor(input).cuda()
    model = ModulatedDeformConv2dPack(
        1,
        1,
        kernel_size=(2, 2),
        stride=1,
        padding=1,
        deform_groups=1,
        bias=with_bias)
    model.weight.data.fill_(1.)
    model.type(torch.float32)
    model = model.cuda().eval()

    input_names = ['input']
    output_names = ['output']

    with torch.no_grad():
        torch.onnx.export(
            model, (x.clone(), ),
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=input_names,
            output_names=output_names,
            opset_version=11)

    onnx_model = onnx.load(onnx_file)

    # create trt engine and wrapper
    opt_shape_dict = {
        'input': [list(x.shape), list(x.shape),
                  list(x.shape)],
    }
    # trt config
    fp16_mode = False
    max_workspace_size = 1 << 30

    trt_engine = onnx2trt(
        onnx_model,
        opt_shape_dict,
        fp16_mode=fp16_mode,
        max_workspace_size=max_workspace_size)

    save_trt_engine(trt_engine, trt_file)
    trt_model = TRTWrapper(trt_file, input_names, output_names)

    with torch.no_grad():
        trt_outputs = trt_model({'input': x.clone()})
        trt_results = trt_outputs['output']

    # compute pytorch_output
    with torch.no_grad():
        pytorch_results = model(x.clone())

    # allclose
    if os.path.exists(onnx_file):
        os.remove(onnx_file)
    if os.path.exists(trt_file):
        os.remove(trt_file)
    torch.testing.assert_allclose(pytorch_results, trt_results)


@pytest.mark.parametrize('mode', ['bilinear', 'nearest'])
@pytest.mark.parametrize('padding_mode', ['zeros', 'border', 'reflection'])
@pytest.mark.parametrize('align_corners', [True, False])
def test_grid_sample(mode, padding_mode, align_corners):
    from mmcv.onnx.symbolic import register_extra_symbolics

    register_extra_symbolics(11)

    input = torch.rand(1, 1, 10, 10).cuda()
    grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
    grid = F.affine_grid(grid, (1, 1, 15, 15)).type_as(input).cuda()

    def func(input, grid):
        return F.grid_sample(
            input,
            grid,
            mode=mode,
            padding_mode=padding_mode,
            align_corners=align_corners)

    wrapped_model = WrapFunction(func).eval().cuda()

    input_names = ['input', 'grid']
    output_names = ['output']

    with torch.no_grad():
        torch.onnx.export(
            wrapped_model, (input.clone(), grid.clone()),
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=input_names,
            output_names=output_names,
            opset_version=11)

    onnx_model = onnx.load(onnx_file)

    # create trt engine and wrapper
    opt_shape_dict = {
        'input': [list(input.shape),
                  list(input.shape),
                  list(input.shape)],
        'grid': [list(grid.shape),
                 list(grid.shape),
                 list(grid.shape)],
    }
    # trt config
    fp16_mode = False
    max_workspace_size = 1 << 30

    trt_engine = onnx2trt(
        onnx_model,
        opt_shape_dict,
        fp16_mode=fp16_mode,
        max_workspace_size=max_workspace_size)

    save_trt_engine(trt_engine, trt_file)
    trt_model = TRTWrapper(trt_file, input_names, output_names)

    with torch.no_grad():
        trt_outputs = trt_model({'input': input.clone(), 'grid': grid.clone()})
        trt_results = trt_outputs['output']

    # compute pytorch_output
    with torch.no_grad():
        pytorch_results = wrapped_model(input.clone(), grid.clone())

    # allclose
    if os.path.exists(onnx_file):
        os.remove(onnx_file)
    if os.path.exists(trt_file):
        os.remove(trt_file)
    assert torch.allclose(pytorch_results, trt_results)


@pytest.mark.parametrize('func', [torch.cummax, torch.cummin])
def test_cummin_cummax(func: Callable):
    # Note generally `cummax` or `cummin` is exportable to ONNX
    # as long as the pytorch version >= 1.5.0, since `torch.cummax`
    # is only supported with torch >= 1.5.0.
    # But when `cummax` or `cummin` serves as an intermediate component
    # whose outputs is used as inputs for another modules, it's expected
    # that pytorch version must be >= 1.7.0. Otherwise error appears like:
    # `RuntimeError: tuple  appears in op that does not forward tuples,
    # unsupported 'kind: prim::PythonOp`.
    from packaging import version
    if version.parse(torch.__version__) < version.parse('1.7.0'):
        pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0')

    opset = 11
    # register custom op `mmcv::cummax` and `mmcv::cummin`
    from mmcv.onnx.symbolic import register_extra_symbolics
    register_extra_symbolics(opset)

    input_list = [
        # arbitrary shape, e.g. 1-D, 2-D, 3-D, ...
        torch.rand((2, 3, 4, 1, 5)).cuda(),
        torch.rand((1)).cuda()
    ]

    input_names = ['input']
    output_names = ['output', 'indices']

    for input in input_list:
        ndims = input.dim()
        # valid dim range is [-ndims, ndims-1]
        # test for all `dim` value which is valid
        for dim in range(-ndims, ndims):
            cummax_func = partial(func, dim=dim)
            wrapped_model = WrapFunction(cummax_func).eval().cuda()

            with torch.no_grad():
                torch.onnx.export(
                    wrapped_model,
                    input,
                    onnx_file,
                    export_params=True,
                    keep_initializers_as_inputs=False,
                    input_names=input_names,
                    output_names=output_names,
                    opset_version=opset)

            onnx_model = onnx.load(onnx_file)

            # create trt engine and wrapper
            opt_shape_dict = {
                'input':
                [list(input.shape),
                 list(input.shape),
                 list(input.shape)]
            }
            # trt config
            fp16_mode = False
            max_workspace_size = 1 << 30

            trt_engine = onnx2trt(
                onnx_model,
                opt_shape_dict,
                fp16_mode=fp16_mode,
                max_workspace_size=max_workspace_size)

            # remove ONNX model after conversion
            if os.path.exists(onnx_file):
                os.remove(onnx_file)

            # save TensorRT model
            save_trt_engine(trt_engine, trt_file)

            # load and wrap TensorRT model
            trt_model = TRTWrapper(trt_file)

            # remove trt model after loading
            if os.path.exists(trt_file):
                os.remove(trt_file)

            # compute trt output
            with torch.no_grad():
                trt_results = trt_model({'input': input.contiguous().clone()})
                trt_output = trt_results['output']
                trt_indices = trt_results['indices']

            # compute pytorch output
            with torch.no_grad():
                pytorch_results = wrapped_model(input.clone())
                pytorch_output = pytorch_results[0]
                pytorch_indices = pytorch_results[1]

            torch.testing.assert_allclose(trt_output, pytorch_output)
            torch.testing.assert_allclose(trt_indices, pytorch_indices)


@pytest.mark.parametrize('dynamic_export', [True, False])
@pytest.mark.parametrize('fp16_mode', [True, False])
def test_instance_norm(dynamic_export, fp16_mode):

    n, c, h, w = 2, 3, 10, 10
    data = torch.randn(n, c, h, w).cuda()
    norm = nn.InstanceNorm2d(c, affine=True)

    wrapped_model = WrapFunction(norm).eval().cuda()

    input_names = ['input']
    output_names = ['output']
    dynamic_axes = None
    if dynamic_export:
        dynamic_axes = {
            'input': {
                0: 'n',
                2: 'h',
                3: 'w',
            },
            'output': {
                0: 'n',
                2: 'h',
                3: 'w',
            },
        }
    with torch.no_grad():
        torch.onnx.export(
            wrapped_model, (data.clone(), ),
            onnx_file,
            export_params=True,
            keep_initializers_as_inputs=True,
            input_names=input_names,
            output_names=output_names,
            dynamic_axes=dynamic_axes,
            opset_version=11)

    onnx_model = onnx.load(onnx_file)

    # create trt engine and wrapper
    if dynamic_export:
        opt_shape_dict = {
            'input':
            [list(data.shape),
             list(data.shape), [2 * n, c, 2 * h, 2 * w]],
        }
    else:
        opt_shape_dict = {
            'input': [list(data.shape),
                      list(data.shape),
                      list(data.shape)],
        }
    # trt config
    max_workspace_size = 1 << 30

    trt_engine = onnx2trt(
        onnx_model,
        opt_shape_dict,
        fp16_mode=fp16_mode,
        max_workspace_size=max_workspace_size)

    save_trt_engine(trt_engine, trt_file)
    trt_model = TRTWrapper(trt_file, input_names, output_names)

    with torch.no_grad():
        trt_outputs = trt_model({'input': data.clone()})
        trt_results = trt_outputs['output']

    # compute pytorch_output
    with torch.no_grad():
        pytorch_results = wrapped_model(data.clone())

    # allclose
    if os.path.exists(onnx_file):
        os.remove(onnx_file)
    if os.path.exists(trt_file):
        os.remove(trt_file)
    assert torch.allclose(pytorch_results, trt_results)


@pytest.mark.parametrize('mode', ['top', 'bottom', 'left', 'right'])
def test_corner_pool(mode):
    try:
        from mmcv.ops import CornerPool
    except (ImportError, ModuleNotFoundError):
        pytest.skip('test requires compilation')

    opset = 11
    # register custom op `mmcv::MMCVCornerPool`
    from mmcv.onnx.symbolic import register_extra_symbolics
    register_extra_symbolics(opset)

    # trt config
    fp16_mode = False
    max_workspace_size = 1 << 30

    inputs = [
        # (n, c, h, w)
        torch.rand((2, 3, 5, 5)),
        torch.rand((1, 2, 4, 6)),
        torch.rand((2, 1, 3, 2)),
    ]

    class CornerPoolWrapper(CornerPool):

        def __init__(self, mode):
            super(CornerPoolWrapper, self).__init__(mode)

        def forward(self, x):
            # no use `torch.cummax`, instead `corner_pool` is used
            # for various torch version
            return self.corner_pool.apply(x)

    wrapped_model = CornerPoolWrapper(mode).cuda()
    for input in inputs:
        input = input.cuda()

        with torch.no_grad():
            torch.onnx.export(
                wrapped_model, (input, ),
                onnx_file,
                export_params=True,
                keep_initializers_as_inputs=True,
                input_names=['input'],
                output_names=['output'],
                opset_version=opset)
        onnx_model = onnx.load(onnx_file)

        # create trt engine and wrapper
        opt_shape_dict = {
            'input': [list(input.shape),
                      list(input.shape),
                      list(input.shape)],
        }
        trt_engine = onnx2trt(
            onnx_model,
            opt_shape_dict,
            fp16_mode=fp16_mode,
            max_workspace_size=max_workspace_size)
        save_trt_engine(trt_engine, trt_file)
        trt_model = TRTWrapper(trt_file, ['input'], ['output'])

        with torch.no_grad():
            trt_outputs = trt_model({'input': input})
            trt_pool_feat = trt_outputs['output']

        # compute pytorch_output
        with torch.no_grad():
            pytorch_pool_feat = wrapped_model(input)

        # allclose
        if os.path.exists(onnx_file):
            os.remove(onnx_file)
        if os.path.exists(trt_file):
            os.remove(trt_file)
        assert torch.allclose(pytorch_pool_feat, trt_pool_feat, atol=1e-5)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_tensorrt_preprocess.py
================================================
import os
from functools import wraps

import onnx
import torch

from mmcv.ops import nms
from mmcv.tensorrt.preprocess import preprocess_onnx


def remove_tmp_file(func):

    @wraps(func)
    def wrapper(*args, **kwargs):
        onnx_file = 'tmp.onnx'
        kwargs['onnx_file'] = onnx_file
        try:
            result = func(*args, **kwargs)
        finally:
            if os.path.exists(onnx_file):
                os.remove(onnx_file)
        return result

    return wrapper


@remove_tmp_file
def export_nms_module_to_onnx(module, onnx_file):
    torch_model = module()
    torch_model.eval()

    input = (torch.rand([100, 4], dtype=torch.float32),
             torch.rand([100], dtype=torch.float32))

    torch.onnx.export(
        torch_model,
        input,
        onnx_file,
        opset_version=11,
        input_names=['boxes', 'scores'],
        output_names=['output'])

    onnx_model = onnx.load(onnx_file)
    return onnx_model


def test_can_handle_nms_with_constant_maxnum():

    class ModuleNMS(torch.nn.Module):

        def forward(self, boxes, scores):
            return nms(boxes, scores, iou_threshold=0.4, max_num=10)

    onnx_model = export_nms_module_to_onnx(ModuleNMS)
    preprocess_onnx_model = preprocess_onnx(onnx_model)
    for node in preprocess_onnx_model.graph.node:
        if 'NonMaxSuppression' in node.name:
            assert len(node.attribute) == 5, 'The NMS must have 5 attributes.'


def test_can_handle_nms_with_undefined_maxnum():

    class ModuleNMS(torch.nn.Module):

        def forward(self, boxes, scores):
            return nms(boxes, scores, iou_threshold=0.4)

    onnx_model = export_nms_module_to_onnx(ModuleNMS)
    preprocess_onnx_model = preprocess_onnx(onnx_model)
    for node in preprocess_onnx_model.graph.node:
        if 'NonMaxSuppression' in node.name:
            assert len(node.attribute) == 5, \
                'The NMS must have 5 attributes.'
            assert node.attribute[2].i > 0, \
                'The max_output_boxes_per_class is not defined correctly.'


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_three_interpolate.py
================================================
import pytest
import torch

from mmcv.ops import three_interpolate


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_three_interpolate():
    features = torch.tensor([[[2.4350, 4.7516, 4.4995, 2.4350, 2.4350, 2.4350],
                              [3.1236, 2.6278, 3.0447, 3.1236, 3.1236, 3.1236],
                              [2.6732, 2.8677, 2.6436, 2.6732, 2.6732, 2.6732],
                              [0.0124, 7.0150, 7.0199, 0.0124, 0.0124, 0.0124],
                              [0.3207, 0.0000, 0.3411, 0.3207, 0.3207,
                               0.3207]],
                             [[0.0000, 0.9544, 2.4532, 0.0000, 0.0000, 0.0000],
                              [0.5346, 1.9176, 1.4715, 0.5346, 0.5346, 0.5346],
                              [0.0000, 0.2744, 2.0842, 0.0000, 0.0000, 0.0000],
                              [0.3414, 1.5063, 1.6209, 0.3414, 0.3414, 0.3414],
                              [0.5814, 0.0103, 0.0000, 0.5814, 0.5814,
                               0.5814]]]).cuda()

    idx = torch.tensor([[[0, 1, 2], [2, 3, 4], [2, 3, 4], [0, 1, 2], [0, 1, 2],
                         [0, 1, 3]],
                        [[0, 2, 3], [1, 3, 4], [2, 1, 4], [0, 2, 4], [0, 2, 4],
                         [0, 1, 2]]]).int().cuda()

    weight = torch.tensor([[[3.3333e-01, 3.3333e-01, 3.3333e-01],
                            [1.0000e+00, 5.8155e-08, 2.2373e-08],
                            [1.0000e+00, 1.7737e-08, 1.7356e-08],
                            [3.3333e-01, 3.3333e-01, 3.3333e-01],
                            [3.3333e-01, 3.3333e-01, 3.3333e-01],
                            [3.3333e-01, 3.3333e-01, 3.3333e-01]],
                           [[3.3333e-01, 3.3333e-01, 3.3333e-01],
                            [1.0000e+00, 1.3651e-08, 7.7312e-09],
                            [1.0000e+00, 1.7148e-08, 1.4070e-08],
                            [3.3333e-01, 3.3333e-01, 3.3333e-01],
                            [3.3333e-01, 3.3333e-01, 3.3333e-01],
                            [3.3333e-01, 3.3333e-01, 3.3333e-01]]]).cuda()

    output = three_interpolate(features, idx, weight)
    expected_output = torch.tensor([[[
        3.8953e+00, 4.4995e+00, 4.4995e+00, 3.8953e+00, 3.8953e+00, 3.2072e+00
    ], [
        2.9320e+00, 3.0447e+00, 3.0447e+00, 2.9320e+00, 2.9320e+00, 2.9583e+00
    ], [
        2.7281e+00, 2.6436e+00, 2.6436e+00, 2.7281e+00, 2.7281e+00, 2.7380e+00
    ], [
        4.6824e+00, 7.0199e+00, 7.0199e+00, 4.6824e+00, 4.6824e+00, 2.3466e+00
    ], [
        2.2060e-01, 3.4110e-01, 3.4110e-01, 2.2060e-01, 2.2060e-01, 2.1380e-01
    ]],
                                    [[
                                        8.1773e-01, 9.5440e-01, 2.4532e+00,
                                        8.1773e-01, 8.1773e-01, 1.1359e+00
                                    ],
                                     [
                                         8.4689e-01, 1.9176e+00, 1.4715e+00,
                                         8.4689e-01, 8.4689e-01, 1.3079e+00
                                     ],
                                     [
                                         6.9473e-01, 2.7440e-01, 2.0842e+00,
                                         6.9473e-01, 6.9473e-01, 7.8619e-01
                                     ],
                                     [
                                         7.6789e-01, 1.5063e+00, 1.6209e+00,
                                         7.6789e-01, 7.6789e-01, 1.1562e+00
                                     ],
                                     [
                                         3.8760e-01, 1.0300e-02, 8.3569e-09,
                                         3.8760e-01, 3.8760e-01, 1.9723e-01
                                     ]]]).cuda()

    assert torch.allclose(output, expected_output, 1e-4)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_three_nn.py
================================================
import pytest
import torch

from mmcv.ops import three_nn


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_three_nn():
    known = torch.tensor([[[-1.8373, 3.5605,
                            -0.7867], [0.7615, 2.9420, 0.2314],
                           [-0.6503, 3.6637, -1.0622],
                           [-1.8373, 3.5605, -0.7867],
                           [-1.8373, 3.5605, -0.7867]],
                          [[-1.3399, 1.9991, -0.3698],
                           [-0.0799, 0.9698,
                            -0.8457], [0.0858, 2.4721, -0.1928],
                           [-1.3399, 1.9991, -0.3698],
                           [-1.3399, 1.9991, -0.3698]]]).cuda()

    unknown = torch.tensor([[[-1.8373, 3.5605, -0.7867],
                             [0.7615, 2.9420, 0.2314],
                             [-0.6503, 3.6637, -1.0622],
                             [-1.5237, 2.3976, -0.8097],
                             [-0.0722, 3.4017, -0.2880],
                             [0.5198, 3.0661, -0.4605],
                             [-2.0185, 3.5019, -0.3236],
                             [0.5098, 3.1020, 0.5799],
                             [-1.6137, 3.8443, -0.5269],
                             [0.7341, 2.9626, -0.3189]],
                            [[-1.3399, 1.9991, -0.3698],
                             [-0.0799, 0.9698, -0.8457],
                             [0.0858, 2.4721, -0.1928],
                             [-0.9022, 1.6560, -1.3090],
                             [0.1156, 1.6901, -0.4366],
                             [-0.6477, 2.3576, -0.1563],
                             [-0.8482, 1.1466, -1.2704],
                             [-0.8753, 2.0845, -0.3460],
                             [-0.5621, 1.4233, -1.2858],
                             [-0.5883, 1.3114, -1.2899]]]).cuda()

    dist, idx = three_nn(unknown, known)
    expected_dist = torch.tensor([[[0.0000, 0.0000, 0.0000],
                                   [0.0000, 2.0463, 2.8588],
                                   [0.0000, 1.2229, 1.2229],
                                   [1.2047, 1.2047, 1.2047],
                                   [1.0011, 1.0845, 1.8411],
                                   [0.7433, 1.4451, 2.4304],
                                   [0.5007, 0.5007, 0.5007],
                                   [0.4587, 2.0875, 2.7544],
                                   [0.4450, 0.4450, 0.4450],
                                   [0.5514, 1.7206, 2.6811]],
                                  [[0.0000, 0.0000, 0.0000],
                                   [0.0000, 1.6464, 1.6952],
                                   [0.0000, 1.5125, 1.5125],
                                   [1.0915, 1.0915, 1.0915],
                                   [0.8197, 0.8511, 1.4894],
                                   [0.7433, 0.8082, 0.8082],
                                   [0.8955, 1.3340, 1.3340],
                                   [0.4730, 0.4730, 0.4730],
                                   [0.7949, 1.3325, 1.3325],
                                   [0.7566, 1.3727, 1.3727]]]).cuda()
    expected_idx = torch.tensor([[[0, 3, 4], [1, 2, 0], [2, 0, 3], [0, 3, 4],
                                  [2, 1, 0], [1, 2, 0], [0, 3, 4], [1, 2, 0],
                                  [0, 3, 4], [1, 2, 0]],
                                 [[0, 3, 4], [1, 2, 0], [2, 0, 3], [0, 3, 4],
                                  [2, 1, 0], [2, 0, 3], [1, 0, 3], [0, 3, 4],
                                  [1, 0, 3], [1, 0, 3]]]).cuda()

    assert torch.allclose(dist, expected_dist, 1e-4)
    assert torch.all(idx == expected_idx)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_tin_shift.py
================================================
import os

import numpy as np
import pytest
import torch

_USING_PARROTS = True
try:
    from parrots.autograd import gradcheck
except ImportError:
    from torch.autograd import gradcheck

    _USING_PARROTS = False

cur_dir = os.path.dirname(os.path.abspath(__file__))

inputs = ([[[[0.88572276, 0.46422583], [0.97408265, 0.59547687],
             [0.030812204, 0.96236038], [0.75418317, 0.44058233],
             [0.33279222, 0.00084149837], [0.7069388, 0.23255438],
             [0.13547045, 0.81549376], [0.40174931, 0.36317211]],
            [[0.57444429, 0.15905505], [0.39897251, 0.25790238],
             [0.93282568, 0.18451685], [0.92526674, 0.18283755],
             [0.31664443, 0.59323865], [0.1957739, 0.42505842],
             [0.081158757, 0.81340349], [0.43456328, 0.30195212]],
            [[0.8198145, 0.05990988], [0.98062474, 0.34803438],
             [0.10412294, 0.37183142], [0.15021622, 0.038857818],
             [0.40985721, 0.42253625], [0.71150124, 0.59778064],
             [0.83851069, 0.15194464], [0.097513378, 0.74820143]],
            [[0.80680406, 0.49327564], [0.17821097, 0.12980539],
             [0.50657678, 0.14446253], [0.04178369, 0.53071898],
             [0.84983683, 0.3826949], [0.32193625, 0.91275406],
             [0.75628334, 0.52934098], [0.27994192, 0.3053292]]],
           [[[0.082397044, 0.4210068], [0.23563534, 0.7938987],
             [0.63669145, 0.69397897], [0.8844561, 0.97854084],
             [0.79027033, 0.60640401], [0.63528901, 0.72172403],
             [0.0097346902, 0.70800996], [0.87891227, 0.13674974]],
            [[0.74329448, 0.0243572], [0.82178867, 0.85750699],
             [0.7568835, 0.73146772], [0.5031184, 0.30479157],
             [0.28713053, 0.47414285], [0.4682079, 0.067471564],
             [0.48368263, 0.14590704], [0.25397325, 0.19946373]],
            [[0.4291026, 0.068739474], [0.7159555, 0.79903615],
             [0.76412082, 0.85348046], [0.081224024, 0.82264912],
             [0.97173303, 0.24291694], [0.48957139, 0.43488795],
             [0.67382395, 0.21889746], [0.36712623, 0.67127824]],
            [[0.12054044, 0.18096751], [0.86675781, 0.54755616],
             [0.68208277, 0.15164375], [0.79991871, 0.80811197],
             [0.85256428, 0.68253738], [0.185983, 0.95642138],
             [0.48102546, 0.28009653], [0.35726011, 0.58168036]]]])

shifts = [([[1, 0, 1, -2], [-2, 1, -1, 1]]), ([[2, 1, 2, -1], [-1, 2, 0, 2]])]

outputs = [([[[[0.0, 0.0], [0.0, 0.0], [0.030812, 0.96236], [0.75418, 0.44058],
               [0.0, 0.0], [0.0, 0.0], [0.83851, 0.15194], [0.097513, 0.7482]],
              [[0.88572, 0.46423], [0.97408, 0.59548], [0.93283, 0.18452],
               [0.92527, 0.18284], [0.33279, 0.0008415], [0.70694, 0.23255],
               [0.75628, 0.52934], [0.27994, 0.30533]],
              [[0.57444, 0.15906], [0.39897, 0.2579], [0.10412, 0.37183],
               [0.15022, 0.038858], [0.31664, 0.59324], [0.19577, 0.42506],
               [0.0, 0.0], [0.0, 0.0]],
              [[0.81981, 0.05991], [0.98062, 0.34803], [0.50658, 0.14446],
               [0.041784, 0.53072], [0.40986, 0.42254], [0.7115, 0.59778],
               [0.0, 0.0], [0.0, 0.0]]],
             [[[0.4291, 0.068739], [0.71596, 0.79904], [0.0, 0.0], [0.0, 0.0],
               [0.28713, 0.47414], [0.46821, 0.067472], [0.0, 0.0], [0.0,
                                                                     0.0]],
              [[0.12054, 0.18097], [0.86676, 0.54756], [0.63669, 0.69398],
               [0.88446, 0.97854], [0.97173, 0.24292], [0.48957, 0.43489],
               [0.0097347, 0.70801], [0.87891, 0.13675]],
              [[0.0, 0.0], [0.0, 0.0], [0.75688, 0.73147], [0.50312, 0.30479],
               [0.85256, 0.68254], [0.18598, 0.95642], [0.48368, 0.14591],
               [0.25397, 0.19946]],
              [[0.0, 0.0], [0.0, 0.0], [0.76412, 0.85348], [0.081224, 0.82265],
               [0.0, 0.0], [0.0, 0.0], [0.67382, 0.2189], [0.36713,
                                                           0.67128]]]]),
           ([[[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
               [0.0, 0.0], [0.081159, 0.8134], [0.43456, 0.30195]],
              [[0.0, 0.0], [0.0, 0.0], [0.030812, 0.96236], [0.75418, 0.44058],
               [0.0, 0.0], [0.0, 0.0], [0.83851, 0.15194], [0.097513, 0.7482]],
              [[0.88572, 0.46423], [0.97408, 0.59548], [0.93283, 0.18452],
               [0.92527, 0.18284], [0.33279, 0.0008415], [0.70694, 0.23255],
               [0.75628, 0.52934], [0.27994, 0.30533]],
              [[0.57444, 0.15906], [0.39897, 0.2579], [0.10412, 0.37183],
               [0.15022, 0.038858], [0.31664, 0.59324], [0.19577, 0.42506],
               [0.0, 0.0], [0.0, 0.0]]],
             [[[0.74329, 0.024357], [0.82179, 0.85751], [0.0, 0.0], [0.0, 0.0],
               [0.79027, 0.6064], [0.63529, 0.72172], [0.0, 0.0], [0.0, 0.0]],
              [[0.4291, 0.068739], [0.71596, 0.79904], [0.0, 0.0], [0.0, 0.0],
               [0.28713, 0.47414], [0.46821, 0.067472], [0.0, 0.0], [0.0,
                                                                     0.0]],
              [[0.12054, 0.18097], [0.86676, 0.54756], [0.63669, 0.69398],
               [0.88446, 0.97854], [0.97173, 0.24292], [0.48957, 0.43489],
               [0.0097347, 0.70801], [0.87891, 0.13675]],
              [[0.0, 0.0], [0.0, 0.0], [0.75688, 0.73147], [0.50312, 0.30479],
               [0.85256, 0.68254], [0.18598, 0.95642], [0.48368, 0.14591],
               [0.25397, 0.19946]]]])]

grads = [
    [[[[0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.],
       [1., 1.]],
      [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.],
       [1., 1.]],
      [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [0., 0.],
       [0., 0.]],
      [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [0., 0.],
       [0., 0.]]],
     [[[1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.],
       [0., 0.]],
      [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.],
       [1., 1.]],
      [[0., 0.], [0., 0.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.],
       [1., 1.]],
      [[0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.],
       [1., 1.]]]],
    [[[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], [1., 1.],
       [1., 1.]],
      [[0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.],
       [1., 1.]],
      [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.],
       [1., 1.]],
      [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [0., 0.],
       [0., 0.]]],
     [[[1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.],
       [0., 0.]],
      [[1., 1.], [1., 1.], [0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.],
       [0., 0.]],
      [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.],
       [1., 1.]],
      [[0., 0.], [0., 0.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.],
       [1., 1.]]]]
]


def _test_tinshift_gradcheck(dtype):
    try:
        from mmcv.ops import tin_shift
    except ModuleNotFoundError:
        pytest.skip('TINShift op is not successfully compiled')

    if dtype == torch.half:
        pytest.skip('"add_cpu/sub_cpu" not implemented for Half')

    for shift in shifts:
        np_input = np.array(inputs)
        np_shift = np.array(shift)

        x = torch.tensor(
            np_input, dtype=dtype, device='cuda', requires_grad=True)
        shift = torch.tensor(np_shift, device='cuda').int()
        if torch.__version__ == 'parrots':
            gradcheck(tin_shift, (x, shift))
        else:
            gradcheck(tin_shift, (x, shift), atol=1, rtol=0.1)


def _test_tinshift_allclose(dtype):
    try:
        from mmcv.ops import tin_shift
    except ModuleNotFoundError:
        pytest.skip('TINShift op is not successfully compiled')

    for shift, output, grad in zip(shifts, outputs, grads):
        np_input = np.array(inputs)
        np_shift = np.array(shift)
        np_output = np.array(output)
        np_grad = np.array(grad)

        x = torch.tensor(
            np_input, dtype=dtype, device='cuda', requires_grad=True)
        shift = torch.tensor(np_shift, device='cuda').int()

        output = tin_shift(x, shift)
        output.backward(torch.ones_like(output))
        assert np.allclose(
            output.data.type(torch.float).cpu().numpy(), np_output, 1e-3)
        assert np.allclose(
            x.grad.data.type(torch.float).cpu().numpy(), np_grad, 1e-3)


def _test_tinshift_assert(dtype):
    try:
        from mmcv.ops import tin_shift
    except ModuleNotFoundError:
        pytest.skip('TINShift op is not successfully compiled')

    inputs = [torch.rand(2, 3, 4, 2), torch.rand(2, 3, 4, 2)]
    shifts = [torch.rand(2, 3), torch.rand(2, 5)]

    for x, shift in zip(inputs, shifts):
        x = x.cuda()
        shift = shift.cuda()

        # A ValueError should be raised if ops get inputs with wrong shapes.
        with pytest.raises(ValueError):
            tin_shift(x, shift)


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
@pytest.mark.parametrize('dtype', [torch.float, torch.double, torch.half])
def test_tinshift(dtype):
    _test_tinshift_allclose(dtype=dtype)
    _test_tinshift_gradcheck(dtype=dtype)
    _test_tinshift_assert(dtype=dtype)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_upfirdn2d.py
================================================
import pytest
import torch

_USING_PARROTS = True
try:
    from parrots.autograd import gradcheck
except ImportError:
    from torch.autograd import gradcheck, gradgradcheck
    _USING_PARROTS = False


class TestUpFirDn2d(object):
    """Unit test for UpFirDn2d.

    Here, we just test the basic case of upsample version. More gerneal tests
    will be included in other unit test for UpFirDnUpsample and
    UpFirDnDownSample modules.
    """

    @classmethod
    def setup_class(cls):
        kernel_1d = torch.tensor([1., 3., 3., 1.])
        cls.kernel = kernel_1d[:, None] * kernel_1d[None, :]
        cls.kernel = cls.kernel / cls.kernel.sum()
        cls.factor = 2
        pad = cls.kernel.shape[0] - cls.factor
        cls.pad = ((pad + 1) // 2 + cls.factor - 1, pad // 2)

        cls.input_tensor = torch.randn((2, 3, 4, 4), requires_grad=True)

    @pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda')
    def test_upfirdn2d(self):
        from mmcv.ops import upfirdn2d
        if _USING_PARROTS:
            gradcheck(
                upfirdn2d,
                (self.input_tensor.cuda(),
                 self.kernel.type_as(
                     self.input_tensor).cuda(), self.factor, 1, self.pad),
                delta=1e-4,
                pt_atol=1e-3)
        else:
            gradcheck(
                upfirdn2d,
                (self.input_tensor.cuda(),
                 self.kernel.type_as(
                     self.input_tensor).cuda(), self.factor, 1, self.pad),
                eps=1e-4,
                atol=1e-3)

            gradgradcheck(
                upfirdn2d,
                (self.input_tensor.cuda(),
                 self.kernel.type_as(
                     self.input_tensor).cuda(), self.factor, 1, self.pad),
                eps=1e-4,
                atol=1e-3)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_ops/test_voxelization.py
================================================
import numpy as np
import pytest
import torch

from mmcv.ops import Voxelization


def _get_voxel_points_indices(points, coors, voxel):
    result_form = np.equal(coors, voxel)
    return result_form[:, 0] & result_form[:, 1] & result_form[:, 2]


@pytest.mark.parametrize('device_type', [
    'cpu',
    pytest.param(
        'cuda:0',
        marks=pytest.mark.skipif(
            not torch.cuda.is_available(), reason='requires CUDA support'))
])
def test_voxelization(device_type):
    voxel_size = [0.5, 0.5, 0.5]
    point_cloud_range = [0, -40, -3, 70.4, 40, 1]

    voxel_dict = np.load(
        'tests/data/for_3d_ops/test_voxel.npy', allow_pickle=True).item()
    expected_coors = voxel_dict['coors']
    expected_voxels = voxel_dict['voxels']
    expected_num_points_per_voxel = voxel_dict['num_points_per_voxel']
    points = voxel_dict['points']

    points = torch.tensor(points)
    max_num_points = -1
    dynamic_voxelization = Voxelization(voxel_size, point_cloud_range,
                                        max_num_points)
    max_num_points = 1000
    hard_voxelization = Voxelization(voxel_size, point_cloud_range,
                                     max_num_points)

    device = torch.device(device_type)

    # test hard_voxelization on cpu/gpu
    points = points.contiguous().to(device)
    coors, voxels, num_points_per_voxel = hard_voxelization.forward(points)
    coors = coors.cpu().detach().numpy()
    voxels = voxels.cpu().detach().numpy()
    num_points_per_voxel = num_points_per_voxel.cpu().detach().numpy()
    assert np.all(coors == expected_coors)
    assert np.all(voxels == expected_voxels)
    assert np.all(num_points_per_voxel == expected_num_points_per_voxel)

    # test dynamic_voxelization on cpu/gpu
    coors = dynamic_voxelization.forward(points)
    coors = coors.cpu().detach().numpy()
    points = points.cpu().detach().numpy()
    for i in range(expected_voxels.shape[0]):
        indices = _get_voxel_points_indices(points, coors, expected_voxels[i])
        num_points_current_voxel = points[indices].shape[0]
        assert num_points_current_voxel > 0
        assert np.all(
            points[indices] == expected_coors[i][:num_points_current_voxel])
        assert num_points_current_voxel == expected_num_points_per_voxel[i]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_parallel.py
================================================
from unittest.mock import MagicMock, patch

import pytest
import torch
import torch.nn as nn
from torch.nn.parallel import DataParallel, DistributedDataParallel

from mmcv.parallel import (MODULE_WRAPPERS, MMDataParallel,
                           MMDistributedDataParallel, is_module_wrapper)
from mmcv.parallel._functions import Scatter, get_input_device, scatter
from mmcv.parallel.distributed_deprecated import \
    MMDistributedDataParallel as DeprecatedMMDDP


def mock(*args, **kwargs):
    pass


@patch('torch.distributed._broadcast_coalesced', mock)
@patch('torch.distributed.broadcast', mock)
@patch('torch.nn.parallel.DistributedDataParallel._ddp_init_helper', mock)
def test_is_module_wrapper():

    class Model(nn.Module):

        def __init__(self):
            super().__init__()
            self.conv = nn.Conv2d(2, 2, 1)

        def forward(self, x):
            return self.conv(x)

    # _verify_model_across_ranks is added in torch1.9.0 so we should check
    # whether _verify_model_across_ranks is the member of torch.distributed
    # before mocking
    if hasattr(torch.distributed, '_verify_model_across_ranks'):
        torch.distributed._verify_model_across_ranks = mock

    model = Model()
    assert not is_module_wrapper(model)

    dp = DataParallel(model)
    assert is_module_wrapper(dp)

    mmdp = MMDataParallel(model)
    assert is_module_wrapper(mmdp)

    ddp = DistributedDataParallel(model, process_group=MagicMock())
    assert is_module_wrapper(ddp)

    mmddp = MMDistributedDataParallel(model, process_group=MagicMock())
    assert is_module_wrapper(mmddp)

    deprecated_mmddp = DeprecatedMMDDP(model)
    assert is_module_wrapper(deprecated_mmddp)

    # test module wrapper registry
    @MODULE_WRAPPERS.register_module()
    class ModuleWrapper(object):

        def __init__(self, module):
            self.module = module

        def forward(self, *args, **kwargs):
            return self.module(*args, **kwargs)

    module_wraper = ModuleWrapper(model)
    assert is_module_wrapper(module_wraper)


def test_get_input_device():
    # if the device is CPU, return -1
    input = torch.zeros([1, 3, 3, 3])
    assert get_input_device(input) == -1
    inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
    assert get_input_device(inputs) == -1

    # if the device is GPU, return the index of device
    if torch.cuda.is_available():
        input = torch.zeros([1, 3, 3, 3]).cuda()
        assert get_input_device(input) == 0
        inputs = [
            torch.zeros([1, 3, 3, 3]).cuda(),
            torch.zeros([1, 4, 4, 4]).cuda()
        ]
        assert get_input_device(inputs) == 0

    # input should be a tensor or list of tensor
    with pytest.raises(Exception):
        get_input_device(5)


def test_scatter():
    # if the device is CPU, just return the input
    input = torch.zeros([1, 3, 3, 3])
    output = scatter(input=input, devices=[-1])
    assert torch.allclose(input, output)

    inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
    outputs = scatter(input=inputs, devices=[-1])
    for input, output in zip(inputs, outputs):
        assert torch.allclose(input, output)

    # if the device is GPU, copy the input from CPU to GPU
    if torch.cuda.is_available():
        input = torch.zeros([1, 3, 3, 3])
        output = scatter(input=input, devices=[0])
        assert torch.allclose(input.cuda(), output)

        inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
        outputs = scatter(input=inputs, devices=[0])
        for input, output in zip(inputs, outputs):
            assert torch.allclose(input.cuda(), output)

    # input should be a tensor or list of tensor
    with pytest.raises(Exception):
        scatter(5, [-1])


def test_Scatter():
    # if the device is CPU, just return the input
    target_gpus = [-1]
    input = torch.zeros([1, 3, 3, 3])
    outputs = Scatter.forward(target_gpus, input)
    assert isinstance(outputs, tuple)
    assert torch.allclose(input, outputs[0])

    target_gpus = [-1]
    inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
    outputs = Scatter.forward(target_gpus, inputs)
    assert isinstance(outputs, tuple)
    for input, output in zip(inputs, outputs):
        assert torch.allclose(input, output)

    # if the device is GPU, copy the input from CPU to GPU
    if torch.cuda.is_available():
        target_gpus = [0]
        input = torch.zeros([1, 3, 3, 3])
        outputs = Scatter.forward(target_gpus, input)
        assert isinstance(outputs, tuple)
        assert torch.allclose(input.cuda(), outputs[0])

        target_gpus = [0]
        inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
        outputs = Scatter.forward(target_gpus, inputs)
        assert isinstance(outputs, tuple)
        for input, output in zip(inputs, outputs):
            assert torch.allclose(input.cuda(), output[0])


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_basemodule.py
================================================
import tempfile

import pytest
import torch
from torch import nn

import mmcv
from mmcv.cnn.utils.weight_init import update_init_info
from mmcv.runner import BaseModule, ModuleDict, ModuleList, Sequential
from mmcv.utils import Registry, build_from_cfg

COMPONENTS = Registry('component')
FOOMODELS = Registry('model')


@COMPONENTS.register_module()
class FooConv1d(BaseModule):

    def __init__(self, init_cfg=None):
        super().__init__(init_cfg)
        self.conv1d = nn.Conv1d(4, 1, 4)

    def forward(self, x):
        return self.conv1d(x)


@COMPONENTS.register_module()
class FooConv2d(BaseModule):

    def __init__(self, init_cfg=None):
        super().__init__(init_cfg)
        self.conv2d = nn.Conv2d(3, 1, 3)

    def forward(self, x):
        return self.conv2d(x)


@COMPONENTS.register_module()
class FooLinear(BaseModule):

    def __init__(self, init_cfg=None):
        super().__init__(init_cfg)
        self.linear = nn.Linear(3, 4)

    def forward(self, x):
        return self.linear(x)


@COMPONENTS.register_module()
class FooLinearConv1d(BaseModule):

    def __init__(self, linear=None, conv1d=None, init_cfg=None):
        super().__init__(init_cfg)
        if linear is not None:
            self.linear = build_from_cfg(linear, COMPONENTS)
        if conv1d is not None:
            self.conv1d = build_from_cfg(conv1d, COMPONENTS)

    def forward(self, x):
        x = self.linear(x)
        return self.conv1d(x)


@FOOMODELS.register_module()
class FooModel(BaseModule):

    def __init__(self,
                 component1=None,
                 component2=None,
                 component3=None,
                 component4=None,
                 init_cfg=None) -> None:
        super().__init__(init_cfg)
        if component1 is not None:
            self.component1 = build_from_cfg(component1, COMPONENTS)
        if component2 is not None:
            self.component2 = build_from_cfg(component2, COMPONENTS)
        if component3 is not None:
            self.component3 = build_from_cfg(component3, COMPONENTS)
        if component4 is not None:
            self.component4 = build_from_cfg(component4, COMPONENTS)

        # its type is not BaseModule, it can be initialized
        # with "override" key.
        self.reg = nn.Linear(3, 4)


def test_initilization_info_logger():
    # 'override' has higher priority

    import torch.nn as nn
    from mmcv.utils.logging import get_logger
    import os

    class OverloadInitConv(nn.Conv2d, BaseModule):

        def init_weights(self):
            for p in self.parameters():
                with torch.no_grad():
                    p.fill_(1)

    class CheckLoggerModel(BaseModule):

        def __init__(self, init_cfg=None):
            super(CheckLoggerModel, self).__init__(init_cfg)
            self.conv1 = nn.Conv2d(1, 1, 1, 1)
            self.conv2 = OverloadInitConv(1, 1, 1, 1)
            self.conv3 = nn.Conv2d(1, 1, 1, 1)
            self.fc1 = nn.Linear(1, 1)

    init_cfg = [
        dict(
            type='Normal',
            layer='Conv2d',
            std=0.01,
            override=dict(
                type='Normal', name='conv3', std=0.01, bias_prob=0.01)),
        dict(type='Constant', layer='Linear', val=0., bias=1.)
    ]

    model = CheckLoggerModel(init_cfg=init_cfg)

    train_log = '20210720_132454.log'
    workdir = tempfile.mkdtemp()
    log_file = os.path.join(workdir, train_log)
    # create a logger
    get_logger('init_logger', log_file=log_file)
    assert not hasattr(model, '_params_init_info')
    model.init_weights()
    # assert `_params_init_info` would be deleted after `init_weights`
    assert not hasattr(model, '_params_init_info')
    # assert initialization information has been dumped
    assert os.path.exists(log_file)

    lines = mmcv.list_from_file(log_file)

    # check initialization information is right
    for i, line in enumerate(lines):
        if 'conv1.weight' in line:
            assert 'NormalInit' in lines[i + 1]
        if 'conv2.weight' in line:
            assert 'OverloadInitConv' in lines[i + 1]
        if 'fc1.weight' in line:
            assert 'ConstantInit' in lines[i + 1]

    # test corner case

    class OverloadInitConvFc(nn.Conv2d, BaseModule):

        def __init__(self, *args, **kwargs):
            super(OverloadInitConvFc, self).__init__(*args, **kwargs)
            self.conv1 = nn.Linear(1, 1)

        def init_weights(self):
            for p in self.parameters():
                with torch.no_grad():
                    p.fill_(1)

    class CheckLoggerModel(BaseModule):

        def __init__(self, init_cfg=None):
            super(CheckLoggerModel, self).__init__(init_cfg)
            self.conv1 = nn.Conv2d(1, 1, 1, 1)
            self.conv2 = OverloadInitConvFc(1, 1, 1, 1)
            self.conv3 = nn.Conv2d(1, 1, 1, 1)
            self.fc1 = nn.Linear(1, 1)

    class TopLevelModule(BaseModule):

        def __init__(self, init_cfg=None, checklog_init_cfg=None):
            super(TopLevelModule, self).__init__(init_cfg)
            self.module1 = CheckLoggerModel(checklog_init_cfg)
            self.module2 = OverloadInitConvFc(1, 1, 1, 1)

    checklog_init_cfg = [
        dict(
            type='Normal',
            layer='Conv2d',
            std=0.01,
            override=dict(
                type='Normal', name='conv3', std=0.01, bias_prob=0.01)),
        dict(type='Constant', layer='Linear', val=0., bias=1.)
    ]

    top_level_init_cfg = [
        dict(
            type='Normal',
            layer='Conv2d',
            std=0.01,
            override=dict(
                type='Normal', name='module2', std=0.01, bias_prob=0.01))
    ]

    model = TopLevelModule(
        init_cfg=top_level_init_cfg, checklog_init_cfg=checklog_init_cfg)

    model.module1.init_weights()
    model.module2.init_weights()
    model.init_weights()
    model.module1.init_weights()
    model.module2.init_weights()

    assert not hasattr(model, '_params_init_info')
    model.init_weights()
    # assert `_params_init_info` would be deleted after `init_weights`
    assert not hasattr(model, '_params_init_info')
    # assert initialization information has been dumped
    assert os.path.exists(log_file)

    lines = mmcv.list_from_file(log_file)
    # check initialization information is right
    for i, line in enumerate(lines):
        if 'TopLevelModule' in line and 'init_cfg' not in line:
            # have been set init_flag
            assert 'the same' in line


def test_update_init_info():

    class DummyModel(BaseModule):

        def __init__(self, init_cfg=None):
            super().__init__(init_cfg)
            self.conv1 = nn.Conv2d(1, 1, 1, 1)
            self.conv3 = nn.Conv2d(1, 1, 1, 1)
            self.fc1 = nn.Linear(1, 1)

    model = DummyModel()
    from collections import defaultdict
    model._params_init_info = defaultdict(dict)
    for name, param in model.named_parameters():
        model._params_init_info[param]['init_info'] = 'init'
        model._params_init_info[param]['tmp_mean_value'] = param.data.mean()

    with torch.no_grad():
        for p in model.parameters():
            p.fill_(1)

    update_init_info(model, init_info='fill_1')

    for item in model._params_init_info.values():
        assert item['init_info'] == 'fill_1'
        assert item['tmp_mean_value'] == 1

    # test assert for new parameters
    model.conv1.bias = nn.Parameter(torch.ones_like(model.conv1.bias))
    with pytest.raises(AssertionError):
        update_init_info(model, init_info=' ')


def test_model_weight_init():
    """
    Config
    model (FooModel, Linear: weight=1, bias=2, Conv1d: weight=3, bias=4,
                     Conv2d: weight=5, bias=6)
    ├──component1 (FooConv1d)
    ├──component2 (FooConv2d)
    ├──component3 (FooLinear)
    ├──component4 (FooLinearConv1d)
        ├──linear (FooLinear)
        ├──conv1d (FooConv1d)
    ├──reg (nn.Linear)

    Parameters after initialization
    model (FooModel)
    ├──component1 (FooConv1d, weight=3, bias=4)
    ├──component2 (FooConv2d, weight=5, bias=6)
    ├──component3 (FooLinear, weight=1, bias=2)
    ├──component4 (FooLinearConv1d)
        ├──linear (FooLinear, weight=1, bias=2)
        ├──conv1d (FooConv1d, weight=3, bias=4)
    ├──reg (nn.Linear, weight=1, bias=2)
    """
    model_cfg = dict(
        type='FooModel',
        init_cfg=[
            dict(type='Constant', val=1, bias=2, layer='Linear'),
            dict(type='Constant', val=3, bias=4, layer='Conv1d'),
            dict(type='Constant', val=5, bias=6, layer='Conv2d')
        ],
        component1=dict(type='FooConv1d'),
        component2=dict(type='FooConv2d'),
        component3=dict(type='FooLinear'),
        component4=dict(
            type='FooLinearConv1d',
            linear=dict(type='FooLinear'),
            conv1d=dict(type='FooConv1d')))

    model = build_from_cfg(model_cfg, FOOMODELS)
    model.init_weights()

    assert torch.equal(model.component1.conv1d.weight,
                       torch.full(model.component1.conv1d.weight.shape, 3.0))
    assert torch.equal(model.component1.conv1d.bias,
                       torch.full(model.component1.conv1d.bias.shape, 4.0))
    assert torch.equal(model.component2.conv2d.weight,
                       torch.full(model.component2.conv2d.weight.shape, 5.0))
    assert torch.equal(model.component2.conv2d.bias,
                       torch.full(model.component2.conv2d.bias.shape, 6.0))
    assert torch.equal(model.component3.linear.weight,
                       torch.full(model.component3.linear.weight.shape, 1.0))
    assert torch.equal(model.component3.linear.bias,
                       torch.full(model.component3.linear.bias.shape, 2.0))
    assert torch.equal(
        model.component4.linear.linear.weight,
        torch.full(model.component4.linear.linear.weight.shape, 1.0))
    assert torch.equal(
        model.component4.linear.linear.bias,
        torch.full(model.component4.linear.linear.bias.shape, 2.0))
    assert torch.equal(
        model.component4.conv1d.conv1d.weight,
        torch.full(model.component4.conv1d.conv1d.weight.shape, 3.0))
    assert torch.equal(
        model.component4.conv1d.conv1d.bias,
        torch.full(model.component4.conv1d.conv1d.bias.shape, 4.0))
    assert torch.equal(model.reg.weight, torch.full(model.reg.weight.shape,
                                                    1.0))
    assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 2.0))


def test_nest_components_weight_init():
    """
    Config
    model (FooModel, Linear: weight=1, bias=2, Conv1d: weight=3, bias=4,
                     Conv2d: weight=5, bias=6)
    ├──component1 (FooConv1d, Conv1d: weight=7, bias=8)
    ├──component2 (FooConv2d, Conv2d: weight=9, bias=10)
    ├──component3 (FooLinear)
    ├──component4 (FooLinearConv1d, Linear: weight=11, bias=12)
        ├──linear (FooLinear, Linear: weight=11, bias=12)
        ├──conv1d (FooConv1d)
    ├──reg (nn.Linear, weight=13, bias=14)

    Parameters after initialization
    model (FooModel)
    ├──component1 (FooConv1d, weight=7, bias=8)
    ├──component2 (FooConv2d, weight=9, bias=10)
    ├──component3 (FooLinear, weight=1, bias=2)
    ├──component4 (FooLinearConv1d)
        ├──linear (FooLinear, weight=1, bias=2)
        ├──conv1d (FooConv1d, weight=3, bias=4)
    ├──reg (nn.Linear, weight=13, bias=14)
    """

    model_cfg = dict(
        type='FooModel',
        init_cfg=[
            dict(
                type='Constant',
                val=1,
                bias=2,
                layer='Linear',
                override=dict(type='Constant', name='reg', val=13, bias=14)),
            dict(type='Constant', val=3, bias=4, layer='Conv1d'),
            dict(type='Constant', val=5, bias=6, layer='Conv2d'),
        ],
        component1=dict(
            type='FooConv1d',
            init_cfg=dict(type='Constant', layer='Conv1d', val=7, bias=8)),
        component2=dict(
            type='FooConv2d',
            init_cfg=dict(type='Constant', layer='Conv2d', val=9, bias=10)),
        component3=dict(type='FooLinear'),
        component4=dict(
            type='FooLinearConv1d',
            linear=dict(type='FooLinear'),
            conv1d=dict(type='FooConv1d')))

    model = build_from_cfg(model_cfg, FOOMODELS)
    model.init_weights()

    assert torch.equal(model.component1.conv1d.weight,
                       torch.full(model.component1.conv1d.weight.shape, 7.0))
    assert torch.equal(model.component1.conv1d.bias,
                       torch.full(model.component1.conv1d.bias.shape, 8.0))
    assert torch.equal(model.component2.conv2d.weight,
                       torch.full(model.component2.conv2d.weight.shape, 9.0))
    assert torch.equal(model.component2.conv2d.bias,
                       torch.full(model.component2.conv2d.bias.shape, 10.0))
    assert torch.equal(model.component3.linear.weight,
                       torch.full(model.component3.linear.weight.shape, 1.0))
    assert torch.equal(model.component3.linear.bias,
                       torch.full(model.component3.linear.bias.shape, 2.0))
    assert torch.equal(
        model.component4.linear.linear.weight,
        torch.full(model.component4.linear.linear.weight.shape, 1.0))
    assert torch.equal(
        model.component4.linear.linear.bias,
        torch.full(model.component4.linear.linear.bias.shape, 2.0))
    assert torch.equal(
        model.component4.conv1d.conv1d.weight,
        torch.full(model.component4.conv1d.conv1d.weight.shape, 3.0))
    assert torch.equal(
        model.component4.conv1d.conv1d.bias,
        torch.full(model.component4.conv1d.conv1d.bias.shape, 4.0))
    assert torch.equal(model.reg.weight,
                       torch.full(model.reg.weight.shape, 13.0))
    assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 14.0))


def test_without_layer_weight_init():
    model_cfg = dict(
        type='FooModel',
        init_cfg=[
            dict(type='Constant', val=1, bias=2, layer='Linear'),
            dict(type='Constant', val=3, bias=4, layer='Conv1d'),
            dict(type='Constant', val=5, bias=6, layer='Conv2d')
        ],
        component1=dict(
            type='FooConv1d', init_cfg=dict(type='Constant', val=7, bias=8)),
        component2=dict(type='FooConv2d'),
        component3=dict(type='FooLinear'))
    model = build_from_cfg(model_cfg, FOOMODELS)
    model.init_weights()

    assert torch.equal(model.component1.conv1d.weight,
                       torch.full(model.component1.conv1d.weight.shape, 3.0))
    assert torch.equal(model.component1.conv1d.bias,
                       torch.full(model.component1.conv1d.bias.shape, 4.0))

    # init_cfg in component1 does not have layer key, so it does nothing
    assert torch.equal(model.component2.conv2d.weight,
                       torch.full(model.component2.conv2d.weight.shape, 5.0))
    assert torch.equal(model.component2.conv2d.bias,
                       torch.full(model.component2.conv2d.bias.shape, 6.0))
    assert torch.equal(model.component3.linear.weight,
                       torch.full(model.component3.linear.weight.shape, 1.0))
    assert torch.equal(model.component3.linear.bias,
                       torch.full(model.component3.linear.bias.shape, 2.0))

    assert torch.equal(model.reg.weight, torch.full(model.reg.weight.shape,
                                                    1.0))
    assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 2.0))


def test_override_weight_init():

    # only initialize 'override'
    model_cfg = dict(
        type='FooModel',
        init_cfg=[
            dict(type='Constant', val=10, bias=20, override=dict(name='reg'))
        ],
        component1=dict(type='FooConv1d'),
        component3=dict(type='FooLinear'))
    model = build_from_cfg(model_cfg, FOOMODELS)
    model.init_weights()
    assert torch.equal(model.reg.weight,
                       torch.full(model.reg.weight.shape, 10.0))
    assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 20.0))
    # do not initialize others
    assert not torch.equal(
        model.component1.conv1d.weight,
        torch.full(model.component1.conv1d.weight.shape, 10.0))
    assert not torch.equal(
        model.component1.conv1d.bias,
        torch.full(model.component1.conv1d.bias.shape, 20.0))
    assert not torch.equal(
        model.component3.linear.weight,
        torch.full(model.component3.linear.weight.shape, 10.0))
    assert not torch.equal(
        model.component3.linear.bias,
        torch.full(model.component3.linear.bias.shape, 20.0))

    # 'override' has higher priority
    model_cfg = dict(
        type='FooModel',
        init_cfg=[
            dict(
                type='Constant',
                val=1,
                bias=2,
                override=dict(name='reg', type='Constant', val=30, bias=40))
        ],
        component1=dict(type='FooConv1d'),
        component2=dict(type='FooConv2d'),
        component3=dict(type='FooLinear'))
    model = build_from_cfg(model_cfg, FOOMODELS)
    model.init_weights()

    assert torch.equal(model.reg.weight,
                       torch.full(model.reg.weight.shape, 30.0))
    assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 40.0))


def test_sequential_model_weight_init():
    seq_model_cfg = [
        dict(
            type='FooConv1d',
            init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)),
        dict(
            type='FooConv2d',
            init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)),
    ]
    layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg]
    seq_model = Sequential(*layers)
    seq_model.init_weights()
    assert torch.equal(seq_model[0].conv1d.weight,
                       torch.full(seq_model[0].conv1d.weight.shape, 0.))
    assert torch.equal(seq_model[0].conv1d.bias,
                       torch.full(seq_model[0].conv1d.bias.shape, 1.))
    assert torch.equal(seq_model[1].conv2d.weight,
                       torch.full(seq_model[1].conv2d.weight.shape, 2.))
    assert torch.equal(seq_model[1].conv2d.bias,
                       torch.full(seq_model[1].conv2d.bias.shape, 3.))
    # inner init_cfg has higher priority
    layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg]
    seq_model = Sequential(
        *layers,
        init_cfg=dict(
            type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.))
    seq_model.init_weights()
    assert torch.equal(seq_model[0].conv1d.weight,
                       torch.full(seq_model[0].conv1d.weight.shape, 0.))
    assert torch.equal(seq_model[0].conv1d.bias,
                       torch.full(seq_model[0].conv1d.bias.shape, 1.))
    assert torch.equal(seq_model[1].conv2d.weight,
                       torch.full(seq_model[1].conv2d.weight.shape, 2.))
    assert torch.equal(seq_model[1].conv2d.bias,
                       torch.full(seq_model[1].conv2d.bias.shape, 3.))


def test_modulelist_weight_init():
    models_cfg = [
        dict(
            type='FooConv1d',
            init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)),
        dict(
            type='FooConv2d',
            init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)),
    ]
    layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg]
    modellist = ModuleList(layers)
    modellist.init_weights()
    assert torch.equal(modellist[0].conv1d.weight,
                       torch.full(modellist[0].conv1d.weight.shape, 0.))
    assert torch.equal(modellist[0].conv1d.bias,
                       torch.full(modellist[0].conv1d.bias.shape, 1.))
    assert torch.equal(modellist[1].conv2d.weight,
                       torch.full(modellist[1].conv2d.weight.shape, 2.))
    assert torch.equal(modellist[1].conv2d.bias,
                       torch.full(modellist[1].conv2d.bias.shape, 3.))
    # inner init_cfg has higher priority
    layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg]
    modellist = ModuleList(
        layers,
        init_cfg=dict(
            type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.))
    modellist.init_weights()
    assert torch.equal(modellist[0].conv1d.weight,
                       torch.full(modellist[0].conv1d.weight.shape, 0.))
    assert torch.equal(modellist[0].conv1d.bias,
                       torch.full(modellist[0].conv1d.bias.shape, 1.))
    assert torch.equal(modellist[1].conv2d.weight,
                       torch.full(modellist[1].conv2d.weight.shape, 2.))
    assert torch.equal(modellist[1].conv2d.bias,
                       torch.full(modellist[1].conv2d.bias.shape, 3.))


def test_moduledict_weight_init():
    models_cfg = dict(
        foo_conv_1d=dict(
            type='FooConv1d',
            init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)),
        foo_conv_2d=dict(
            type='FooConv2d',
            init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)),
    )
    layers = {
        name: build_from_cfg(cfg, COMPONENTS)
        for name, cfg in models_cfg.items()
    }
    modeldict = ModuleDict(layers)
    modeldict.init_weights()
    assert torch.equal(
        modeldict['foo_conv_1d'].conv1d.weight,
        torch.full(modeldict['foo_conv_1d'].conv1d.weight.shape, 0.))
    assert torch.equal(
        modeldict['foo_conv_1d'].conv1d.bias,
        torch.full(modeldict['foo_conv_1d'].conv1d.bias.shape, 1.))
    assert torch.equal(
        modeldict['foo_conv_2d'].conv2d.weight,
        torch.full(modeldict['foo_conv_2d'].conv2d.weight.shape, 2.))
    assert torch.equal(
        modeldict['foo_conv_2d'].conv2d.bias,
        torch.full(modeldict['foo_conv_2d'].conv2d.bias.shape, 3.))
    # inner init_cfg has higher priority
    layers = {
        name: build_from_cfg(cfg, COMPONENTS)
        for name, cfg in models_cfg.items()
    }
    modeldict = ModuleDict(
        layers,
        init_cfg=dict(
            type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.))
    modeldict.init_weights()
    assert torch.equal(
        modeldict['foo_conv_1d'].conv1d.weight,
        torch.full(modeldict['foo_conv_1d'].conv1d.weight.shape, 0.))
    assert torch.equal(
        modeldict['foo_conv_1d'].conv1d.bias,
        torch.full(modeldict['foo_conv_1d'].conv1d.bias.shape, 1.))
    assert torch.equal(
        modeldict['foo_conv_2d'].conv2d.weight,
        torch.full(modeldict['foo_conv_2d'].conv2d.weight.shape, 2.))
    assert torch.equal(
        modeldict['foo_conv_2d'].conv2d.bias,
        torch.full(modeldict['foo_conv_2d'].conv2d.bias.shape, 3.))


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_checkpoint.py
================================================
import sys
from collections import OrderedDict
from tempfile import TemporaryDirectory
from unittest.mock import MagicMock, patch

import pytest
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.parallel import DataParallel

from mmcv.fileio.file_client import PetrelBackend
from mmcv.parallel.registry import MODULE_WRAPPERS
from mmcv.runner.checkpoint import (_load_checkpoint_with_prefix,
                                    get_state_dict, load_checkpoint,
                                    load_from_local, load_from_pavi,
                                    save_checkpoint)

sys.modules['petrel_client'] = MagicMock()
sys.modules['petrel_client.client'] = MagicMock()


@MODULE_WRAPPERS.register_module()
class DDPWrapper(object):

    def __init__(self, module):
        self.module = module


class Block(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(3, 3, 1)
        self.norm = nn.BatchNorm2d(3)


class Model(nn.Module):

    def __init__(self):
        super().__init__()
        self.block = Block()
        self.conv = nn.Conv2d(3, 3, 1)


class Mockpavimodel(object):

    def __init__(self, name='fakename'):
        self.name = name

    def download(self, file):
        pass


def assert_tensor_equal(tensor_a, tensor_b):
    assert tensor_a.eq(tensor_b).all()


def test_get_state_dict():
    if torch.__version__ == 'parrots':
        state_dict_keys = set([
            'block.conv.weight', 'block.conv.bias', 'block.norm.weight',
            'block.norm.bias', 'block.norm.running_mean',
            'block.norm.running_var', 'conv.weight', 'conv.bias'
        ])
    else:
        state_dict_keys = set([
            'block.conv.weight', 'block.conv.bias', 'block.norm.weight',
            'block.norm.bias', 'block.norm.running_mean',
            'block.norm.running_var', 'block.norm.num_batches_tracked',
            'conv.weight', 'conv.bias'
        ])

    model = Model()
    state_dict = get_state_dict(model)
    assert isinstance(state_dict, OrderedDict)
    assert set(state_dict.keys()) == state_dict_keys

    assert_tensor_equal(state_dict['block.conv.weight'],
                        model.block.conv.weight)
    assert_tensor_equal(state_dict['block.conv.bias'], model.block.conv.bias)
    assert_tensor_equal(state_dict['block.norm.weight'],
                        model.block.norm.weight)
    assert_tensor_equal(state_dict['block.norm.bias'], model.block.norm.bias)
    assert_tensor_equal(state_dict['block.norm.running_mean'],
                        model.block.norm.running_mean)
    assert_tensor_equal(state_dict['block.norm.running_var'],
                        model.block.norm.running_var)
    if torch.__version__ != 'parrots':
        assert_tensor_equal(state_dict['block.norm.num_batches_tracked'],
                            model.block.norm.num_batches_tracked)
    assert_tensor_equal(state_dict['conv.weight'], model.conv.weight)
    assert_tensor_equal(state_dict['conv.bias'], model.conv.bias)

    wrapped_model = DDPWrapper(model)
    state_dict = get_state_dict(wrapped_model)
    assert isinstance(state_dict, OrderedDict)
    assert set(state_dict.keys()) == state_dict_keys
    assert_tensor_equal(state_dict['block.conv.weight'],
                        wrapped_model.module.block.conv.weight)
    assert_tensor_equal(state_dict['block.conv.bias'],
                        wrapped_model.module.block.conv.bias)
    assert_tensor_equal(state_dict['block.norm.weight'],
                        wrapped_model.module.block.norm.weight)
    assert_tensor_equal(state_dict['block.norm.bias'],
                        wrapped_model.module.block.norm.bias)
    assert_tensor_equal(state_dict['block.norm.running_mean'],
                        wrapped_model.module.block.norm.running_mean)
    assert_tensor_equal(state_dict['block.norm.running_var'],
                        wrapped_model.module.block.norm.running_var)
    if torch.__version__ != 'parrots':
        assert_tensor_equal(
            state_dict['block.norm.num_batches_tracked'],
            wrapped_model.module.block.norm.num_batches_tracked)
    assert_tensor_equal(state_dict['conv.weight'],
                        wrapped_model.module.conv.weight)
    assert_tensor_equal(state_dict['conv.bias'],
                        wrapped_model.module.conv.bias)

    # wrapped inner module
    for name, module in wrapped_model.module._modules.items():
        module = DataParallel(module)
        wrapped_model.module._modules[name] = module
    state_dict = get_state_dict(wrapped_model)
    assert isinstance(state_dict, OrderedDict)
    assert set(state_dict.keys()) == state_dict_keys
    assert_tensor_equal(state_dict['block.conv.weight'],
                        wrapped_model.module.block.module.conv.weight)
    assert_tensor_equal(state_dict['block.conv.bias'],
                        wrapped_model.module.block.module.conv.bias)
    assert_tensor_equal(state_dict['block.norm.weight'],
                        wrapped_model.module.block.module.norm.weight)
    assert_tensor_equal(state_dict['block.norm.bias'],
                        wrapped_model.module.block.module.norm.bias)
    assert_tensor_equal(state_dict['block.norm.running_mean'],
                        wrapped_model.module.block.module.norm.running_mean)
    assert_tensor_equal(state_dict['block.norm.running_var'],
                        wrapped_model.module.block.module.norm.running_var)
    if torch.__version__ != 'parrots':
        assert_tensor_equal(
            state_dict['block.norm.num_batches_tracked'],
            wrapped_model.module.block.module.norm.num_batches_tracked)
    assert_tensor_equal(state_dict['conv.weight'],
                        wrapped_model.module.conv.module.weight)
    assert_tensor_equal(state_dict['conv.bias'],
                        wrapped_model.module.conv.module.bias)


def test_load_pavimodel_dist():

    sys.modules['pavi'] = MagicMock()
    sys.modules['pavi.modelcloud'] = MagicMock()
    pavimodel = Mockpavimodel()
    import pavi
    pavi.modelcloud.get = MagicMock(return_value=pavimodel)
    with pytest.raises(AssertionError):
        # test pavi prefix
        _ = load_from_pavi('MyPaviFolder/checkpoint.pth')

    with pytest.raises(FileNotFoundError):
        # there is not such checkpoint for us to load
        _ = load_from_pavi('pavi://checkpoint.pth')


def test_load_checkpoint_with_prefix():

    class FooModule(nn.Module):

        def __init__(self):
            super().__init__()
            self.linear = nn.Linear(1, 2)
            self.conv2d = nn.Conv2d(3, 1, 3)
            self.conv2d_2 = nn.Conv2d(3, 2, 3)

    model = FooModule()
    nn.init.constant_(model.linear.weight, 1)
    nn.init.constant_(model.linear.bias, 2)
    nn.init.constant_(model.conv2d.weight, 3)
    nn.init.constant_(model.conv2d.bias, 4)
    nn.init.constant_(model.conv2d_2.weight, 5)
    nn.init.constant_(model.conv2d_2.bias, 6)

    with TemporaryDirectory():
        torch.save(model.state_dict(), 'model.pth')
        prefix = 'conv2d'
        state_dict = _load_checkpoint_with_prefix(prefix, 'model.pth')
        assert torch.equal(model.conv2d.state_dict()['weight'],
                           state_dict['weight'])
        assert torch.equal(model.conv2d.state_dict()['bias'],
                           state_dict['bias'])

        # test whether prefix is in pretrained model
        with pytest.raises(AssertionError):
            prefix = 'back'
            _load_checkpoint_with_prefix(prefix, 'model.pth')


def test_load_checkpoint():
    import os

    import re
    import tempfile

    class PrefixModel(nn.Module):

        def __init__(self):
            super().__init__()
            self.backbone = Model()

    pmodel = PrefixModel()
    model = Model()
    checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth')

    # add prefix
    torch.save(model.state_dict(), checkpoint_path)
    state_dict = load_checkpoint(
        pmodel, checkpoint_path, revise_keys=[(r'^', 'backbone.')])
    for key in pmodel.backbone.state_dict().keys():
        assert torch.equal(pmodel.backbone.state_dict()[key], state_dict[key])
    # strip prefix
    torch.save(pmodel.state_dict(), checkpoint_path)
    state_dict = load_checkpoint(
        model, checkpoint_path, revise_keys=[(r'^backbone\.', '')])

    for key in state_dict.keys():
        key_stripped = re.sub(r'^backbone\.', '', key)
        assert torch.equal(model.state_dict()[key_stripped], state_dict[key])
    os.remove(checkpoint_path)


def test_load_checkpoint_metadata():
    import os

    import tempfile

    from mmcv.runner import load_checkpoint, save_checkpoint

    class ModelV1(nn.Module):

        def __init__(self):
            super().__init__()
            self.block = Block()
            self.conv1 = nn.Conv2d(3, 3, 1)
            self.conv2 = nn.Conv2d(3, 3, 1)
            nn.init.normal_(self.conv1.weight)
            nn.init.normal_(self.conv2.weight)

    class ModelV2(nn.Module):
        _version = 2

        def __init__(self):
            super().__init__()
            self.block = Block()
            self.conv0 = nn.Conv2d(3, 3, 1)
            self.conv1 = nn.Conv2d(3, 3, 1)
            nn.init.normal_(self.conv0.weight)
            nn.init.normal_(self.conv1.weight)

        def _load_from_state_dict(self, state_dict, prefix, local_metadata,
                                  *args, **kwargs):
            """load checkpoints."""

            # Names of some parameters in has been changed.
            version = local_metadata.get('version', None)
            if version is None or version < 2:
                state_dict_keys = list(state_dict.keys())
                convert_map = {'conv1': 'conv0', 'conv2': 'conv1'}
                for k in state_dict_keys:
                    for ori_str, new_str in convert_map.items():
                        if k.startswith(prefix + ori_str):
                            new_key = k.replace(ori_str, new_str)
                            state_dict[new_key] = state_dict[k]
                            del state_dict[k]

            super()._load_from_state_dict(state_dict, prefix, local_metadata,
                                          *args, **kwargs)

    model_v1 = ModelV1()
    model_v1_conv0_weight = model_v1.conv1.weight.detach()
    model_v1_conv1_weight = model_v1.conv2.weight.detach()
    model_v2 = ModelV2()
    model_v2_conv0_weight = model_v2.conv0.weight.detach()
    model_v2_conv1_weight = model_v2.conv1.weight.detach()
    ckpt_v1_path = os.path.join(tempfile.gettempdir(), 'checkpoint_v1.pth')
    ckpt_v2_path = os.path.join(tempfile.gettempdir(), 'checkpoint_v2.pth')

    # Save checkpoint
    save_checkpoint(model_v1, ckpt_v1_path)
    save_checkpoint(model_v2, ckpt_v2_path)

    # test load v1 model
    load_checkpoint(model_v2, ckpt_v1_path)
    assert torch.allclose(model_v2.conv0.weight, model_v1_conv0_weight)
    assert torch.allclose(model_v2.conv1.weight, model_v1_conv1_weight)

    # test load v2 model
    load_checkpoint(model_v2, ckpt_v2_path)
    assert torch.allclose(model_v2.conv0.weight, model_v2_conv0_weight)
    assert torch.allclose(model_v2.conv1.weight, model_v2_conv1_weight)


def test_load_classes_name():
    import os

    import tempfile

    from mmcv.runner import load_checkpoint, save_checkpoint
    checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth')
    model = Model()
    save_checkpoint(model, checkpoint_path)
    checkpoint = load_checkpoint(model, checkpoint_path)
    assert 'meta' in checkpoint and 'CLASSES' not in checkpoint['meta']

    model.CLASSES = ('class1', 'class2')
    save_checkpoint(model, checkpoint_path)
    checkpoint = load_checkpoint(model, checkpoint_path)
    assert 'meta' in checkpoint and 'CLASSES' in checkpoint['meta']
    assert checkpoint['meta']['CLASSES'] == ('class1', 'class2')

    model = Model()
    wrapped_model = DDPWrapper(model)
    save_checkpoint(wrapped_model, checkpoint_path)
    checkpoint = load_checkpoint(wrapped_model, checkpoint_path)
    assert 'meta' in checkpoint and 'CLASSES' not in checkpoint['meta']

    wrapped_model.module.CLASSES = ('class1', 'class2')
    save_checkpoint(wrapped_model, checkpoint_path)
    checkpoint = load_checkpoint(wrapped_model, checkpoint_path)
    assert 'meta' in checkpoint and 'CLASSES' in checkpoint['meta']
    assert checkpoint['meta']['CLASSES'] == ('class1', 'class2')

    # remove the temp file
    os.remove(checkpoint_path)


def test_checkpoint_loader():
    import os

    import tempfile

    from mmcv.runner import CheckpointLoader, _load_checkpoint, save_checkpoint
    checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth')
    model = Model()
    save_checkpoint(model, checkpoint_path)
    checkpoint = _load_checkpoint(checkpoint_path)
    assert 'meta' in checkpoint and 'CLASSES' not in checkpoint['meta']
    # remove the temp file
    os.remove(checkpoint_path)

    filenames = [
        'http://xx.xx/xx.pth', 'https://xx.xx/xx.pth',
        'modelzoo://xx.xx/xx.pth', 'torchvision://xx.xx/xx.pth',
        'open-mmlab://xx.xx/xx.pth', 'openmmlab://xx.xx/xx.pth',
        'mmcls://xx.xx/xx.pth', 'pavi://xx.xx/xx.pth', 's3://xx.xx/xx.pth',
        'ss3://xx.xx/xx.pth', ' s3://xx.xx/xx.pth',
        'open-mmlab:s3://xx.xx/xx.pth', 'openmmlab:s3://xx.xx/xx.pth',
        'openmmlabs3://xx.xx/xx.pth', ':s3://xx.xx/xx.path'
    ]
    fn_names = [
        'load_from_http', 'load_from_http', 'load_from_torchvision',
        'load_from_torchvision', 'load_from_openmmlab', 'load_from_openmmlab',
        'load_from_mmcls', 'load_from_pavi', 'load_from_ceph',
        'load_from_local', 'load_from_local', 'load_from_ceph',
        'load_from_ceph', 'load_from_local', 'load_from_local'
    ]

    for filename, fn_name in zip(filenames, fn_names):
        loader = CheckpointLoader._get_checkpoint_loader(filename)
        assert loader.__name__ == fn_name

    @CheckpointLoader.register_scheme(prefixes='ftp://')
    def load_from_ftp(filename, map_location):
        return dict(filename=filename)

    # test register_loader
    filename = 'ftp://xx.xx/xx.pth'
    loader = CheckpointLoader._get_checkpoint_loader(filename)
    assert loader.__name__ == 'load_from_ftp'

    def load_from_ftp1(filename, map_location):
        return dict(filename=filename)

    # test duplicate registered error
    with pytest.raises(KeyError):
        CheckpointLoader.register_scheme('ftp://', load_from_ftp1)

    # test force param
    CheckpointLoader.register_scheme('ftp://', load_from_ftp1, force=True)
    checkpoint = CheckpointLoader.load_checkpoint(filename)
    assert checkpoint['filename'] == filename

    # test print function name
    loader = CheckpointLoader._get_checkpoint_loader(filename)
    assert loader.__name__ == 'load_from_ftp1'

    # test sort
    @CheckpointLoader.register_scheme(prefixes='a/b')
    def load_from_ab(filename, map_location):
        return dict(filename=filename)

    @CheckpointLoader.register_scheme(prefixes='a/b/c')
    def load_from_abc(filename, map_location):
        return dict(filename=filename)

    filename = 'a/b/c/d'
    loader = CheckpointLoader._get_checkpoint_loader(filename)
    assert loader.__name__ == 'load_from_abc'


def test_save_checkpoint(tmp_path):
    model = Model()
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
    # meta is not a dict
    with pytest.raises(TypeError):
        save_checkpoint(model, '/path/of/your/filename', meta='invalid type')

    # 1. save to disk
    filename = str(tmp_path / 'checkpoint1.pth')
    save_checkpoint(model, filename)

    filename = str(tmp_path / 'checkpoint2.pth')
    save_checkpoint(model, filename, optimizer)

    filename = str(tmp_path / 'checkpoint3.pth')
    save_checkpoint(model, filename, meta={'test': 'test'})

    filename = str(tmp_path / 'checkpoint4.pth')
    save_checkpoint(model, filename, file_client_args={'backend': 'disk'})

    # 2. save to petrel oss
    with patch.object(PetrelBackend, 'put') as mock_method:
        filename = 's3://path/of/your/checkpoint1.pth'
        save_checkpoint(model, filename)
    mock_method.assert_called()

    with patch.object(PetrelBackend, 'put') as mock_method:
        filename = 's3://path//of/your/checkpoint2.pth'
        save_checkpoint(
            model, filename, file_client_args={'backend': 'petrel'})
    mock_method.assert_called()


def test_load_from_local():
    import os
    home_path = os.path.expanduser('~')
    checkpoint_path = os.path.join(
        home_path, 'dummy_checkpoint_used_to_test_load_from_local.pth')
    model = Model()
    save_checkpoint(model, checkpoint_path)
    checkpoint = load_from_local(
        '~/dummy_checkpoint_used_to_test_load_from_local.pth',
        map_location=None)
    assert_tensor_equal(checkpoint['state_dict']['block.conv.weight'],
                        model.block.conv.weight)
    os.remove(checkpoint_path)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_dist_utils.py
================================================
import os
from unittest.mock import patch

import pytest

from mmcv.runner import init_dist


@patch('torch.cuda.device_count', return_value=1)
@patch('torch.cuda.set_device')
@patch('torch.distributed.init_process_group')
@patch('subprocess.getoutput', return_value='127.0.0.1')
def test_init_dist(mock_getoutput, mock_dist_init, mock_set_device,
                   mock_device_count):
    with pytest.raises(ValueError):
        # launcher must be one of {'pytorch', 'mpi', 'slurm'}
        init_dist('invaliad_launcher')

    # test initialize with slurm launcher
    os.environ['SLURM_PROCID'] = '0'
    os.environ['SLURM_NTASKS'] = '1'
    os.environ['SLURM_NODELIST'] = '[0]'  # haven't check the correct form

    init_dist('slurm')
    # no port is specified, use default port 29500
    assert os.environ['MASTER_PORT'] == '29500'
    assert os.environ['MASTER_ADDR'] == '127.0.0.1'
    assert os.environ['WORLD_SIZE'] == '1'
    assert os.environ['RANK'] == '0'
    mock_set_device.assert_called_with(0)
    mock_getoutput.assert_called_with('scontrol show hostname [0] | head -n1')
    mock_dist_init.assert_called_with(backend='nccl')

    init_dist('slurm', port=29505)
    # port is specified with argument 'port'
    assert os.environ['MASTER_PORT'] == '29505'
    assert os.environ['MASTER_ADDR'] == '127.0.0.1'
    assert os.environ['WORLD_SIZE'] == '1'
    assert os.environ['RANK'] == '0'
    mock_set_device.assert_called_with(0)
    mock_getoutput.assert_called_with('scontrol show hostname [0] | head -n1')
    mock_dist_init.assert_called_with(backend='nccl')

    init_dist('slurm')
    # port is specified by environment variable 'MASTER_PORT'
    assert os.environ['MASTER_PORT'] == '29505'
    assert os.environ['MASTER_ADDR'] == '127.0.0.1'
    assert os.environ['WORLD_SIZE'] == '1'
    assert os.environ['RANK'] == '0'
    mock_set_device.assert_called_with(0)
    mock_getoutput.assert_called_with('scontrol show hostname [0] | head -n1')
    mock_dist_init.assert_called_with(backend='nccl')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_eval_hook.py
================================================
import json
import os.path as osp
import sys
import tempfile
import unittest.mock as mock
from collections import OrderedDict
from unittest.mock import MagicMock, patch

import pytest
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

from mmcv.fileio.file_client import PetrelBackend
from mmcv.runner import DistEvalHook as BaseDistEvalHook
from mmcv.runner import EpochBasedRunner
from mmcv.runner import EvalHook as BaseEvalHook
from mmcv.runner import IterBasedRunner
from mmcv.utils import get_logger, scandir

sys.modules['petrel_client'] = MagicMock()
sys.modules['petrel_client.client'] = MagicMock()


class ExampleDataset(Dataset):

    def __init__(self):
        self.index = 0
        self.eval_result = [1, 4, 3, 7, 2, -3, 4, 6]

    def __getitem__(self, idx):
        results = dict(x=torch.tensor([1]))
        return results

    def __len__(self):
        return 1

    @mock.create_autospec
    def evaluate(self, results, logger=None):
        pass


class EvalDataset(ExampleDataset):

    def evaluate(self, results, logger=None):
        acc = self.eval_result[self.index]
        output = OrderedDict(
            acc=acc, index=self.index, score=acc, loss_top=acc)
        self.index += 1
        return output


class Model(nn.Module):

    def __init__(self):
        super().__init__()
        self.param = nn.Parameter(torch.tensor([1.0]))

    def forward(self, x, **kwargs):
        return self.param * x

    def train_step(self, data_batch, optimizer, **kwargs):
        return {'loss': torch.sum(self(data_batch['x']))}

    def val_step(self, data_batch, optimizer, **kwargs):
        return {'loss': torch.sum(self(data_batch['x']))}


def _build_epoch_runner():

    model = Model()
    tmp_dir = tempfile.mkdtemp()

    runner = EpochBasedRunner(
        model=model, work_dir=tmp_dir, logger=get_logger('demo'))
    return runner


def _build_iter_runner():

    model = Model()
    tmp_dir = tempfile.mkdtemp()

    runner = IterBasedRunner(
        model=model, work_dir=tmp_dir, logger=get_logger('demo'))
    return runner


class EvalHook(BaseEvalHook):

    _default_greater_keys = ['acc', 'top']
    _default_less_keys = ['loss', 'loss_top']

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)


class DistEvalHook(BaseDistEvalHook):

    greater_keys = ['acc', 'top']
    less_keys = ['loss', 'loss_top']

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)


def test_eval_hook():
    with pytest.raises(AssertionError):
        # `save_best` should be a str
        test_dataset = Model()
        data_loader = DataLoader(test_dataset)
        EvalHook(data_loader, save_best=True)

    with pytest.raises(TypeError):
        # dataloader must be a pytorch DataLoader
        test_dataset = Model()
        data_loader = [DataLoader(test_dataset)]
        EvalHook(data_loader)

    with pytest.raises(ValueError):
        # key_indicator must be valid when rule_map is None
        test_dataset = ExampleDataset()
        data_loader = DataLoader(test_dataset)
        EvalHook(data_loader, save_best='unsupport')

    with pytest.raises(KeyError):
        # rule must be in keys of rule_map
        test_dataset = ExampleDataset()
        data_loader = DataLoader(test_dataset)
        EvalHook(data_loader, save_best='auto', rule='unsupport')

    # if eval_res is an empty dict, print a warning information
    with pytest.warns(UserWarning) as record_warnings:

        class _EvalDataset(ExampleDataset):

            def evaluate(self, results, logger=None):
                return {}

        test_dataset = _EvalDataset()
        data_loader = DataLoader(test_dataset)
        eval_hook = EvalHook(data_loader, save_best='auto')
        runner = _build_epoch_runner()
        runner.register_hook(eval_hook)
        runner.run([data_loader], [('train', 1)], 1)
    # Since there will be many warnings thrown, we just need to check if the
    # expected exceptions are thrown
    expected_message = ('Since `eval_res` is an empty dict, the behavior to '
                        'save the best checkpoint will be skipped in this '
                        'evaluation.')
    for warning in record_warnings:
        if str(warning.message) == expected_message:
            break
    else:
        assert False

    test_dataset = ExampleDataset()
    loader = DataLoader(test_dataset)
    model = Model()
    data_loader = DataLoader(test_dataset)
    eval_hook = EvalHook(data_loader, save_best=None)

    with tempfile.TemporaryDirectory() as tmpdir:

        # total_epochs = 1
        logger = get_logger('test_eval')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_hook(eval_hook)
        runner.run([loader], [('train', 1)], 1)
        test_dataset.evaluate.assert_called_with(
            test_dataset, [torch.tensor([1])], logger=runner.logger)
        assert runner.meta is None or 'best_score' not in runner.meta[
            'hook_msgs']
        assert runner.meta is None or 'best_ckpt' not in runner.meta[
            'hook_msgs']

    # when `save_best` is set to 'auto', first metric will be used.
    loader = DataLoader(EvalDataset())
    model = Model()
    data_loader = DataLoader(EvalDataset())
    eval_hook = EvalHook(data_loader, interval=1, save_best='auto')

    with tempfile.TemporaryDirectory() as tmpdir:
        logger = get_logger('test_eval')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_checkpoint_hook(dict(interval=1))
        runner.register_hook(eval_hook)
        runner.run([loader], [('train', 1)], 8)

        ckpt_path = osp.join(tmpdir, 'best_acc_epoch_4.pth')

        assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
        assert osp.exists(ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == 7

    # total_epochs = 8, return the best acc and corresponding epoch
    loader = DataLoader(EvalDataset())
    model = Model()
    data_loader = DataLoader(EvalDataset())
    eval_hook = EvalHook(data_loader, interval=1, save_best='acc')

    with tempfile.TemporaryDirectory() as tmpdir:
        logger = get_logger('test_eval')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_checkpoint_hook(dict(interval=1))
        runner.register_hook(eval_hook)
        runner.run([loader], [('train', 1)], 8)

        ckpt_path = osp.join(tmpdir, 'best_acc_epoch_4.pth')

        assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
        assert osp.exists(ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == 7

    # total_epochs = 8, return the best loss_top and corresponding epoch
    loader = DataLoader(EvalDataset())
    model = Model()
    data_loader = DataLoader(EvalDataset())
    eval_hook = EvalHook(data_loader, interval=1, save_best='loss_top')

    with tempfile.TemporaryDirectory() as tmpdir:
        logger = get_logger('test_eval')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_checkpoint_hook(dict(interval=1))
        runner.register_hook(eval_hook)
        runner.run([loader], [('train', 1)], 8)

        ckpt_path = osp.join(tmpdir, 'best_loss_top_epoch_6.pth')

        assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
        assert osp.exists(ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == -3

    # total_epochs = 8, return the best score and corresponding epoch
    data_loader = DataLoader(EvalDataset())
    eval_hook = EvalHook(
        data_loader, interval=1, save_best='score', rule='greater')
    with tempfile.TemporaryDirectory() as tmpdir:
        logger = get_logger('test_eval')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_checkpoint_hook(dict(interval=1))
        runner.register_hook(eval_hook)
        runner.run([loader], [('train', 1)], 8)

        ckpt_path = osp.join(tmpdir, 'best_score_epoch_4.pth')

        assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
        assert osp.exists(ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == 7

    # total_epochs = 8, return the best score using less compare func
    # and indicate corresponding epoch
    data_loader = DataLoader(EvalDataset())
    eval_hook = EvalHook(data_loader, save_best='acc', rule='less')
    with tempfile.TemporaryDirectory() as tmpdir:
        logger = get_logger('test_eval')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_checkpoint_hook(dict(interval=1))
        runner.register_hook(eval_hook)
        runner.run([loader], [('train', 1)], 8)

        ckpt_path = osp.join(tmpdir, 'best_acc_epoch_6.pth')

        assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
        assert osp.exists(ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == -3

    # Test the EvalHook when resume happened
    data_loader = DataLoader(EvalDataset())
    eval_hook = EvalHook(data_loader, save_best='acc')
    with tempfile.TemporaryDirectory() as tmpdir:
        logger = get_logger('test_eval')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_checkpoint_hook(dict(interval=1))
        runner.register_hook(eval_hook)
        runner.run([loader], [('train', 1)], 2)

        old_ckpt_path = osp.join(tmpdir, 'best_acc_epoch_2.pth')

        assert runner.meta['hook_msgs']['best_ckpt'] == old_ckpt_path
        assert osp.exists(old_ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == 4

        resume_from = old_ckpt_path
        loader = DataLoader(ExampleDataset())
        eval_hook = EvalHook(data_loader, save_best='acc')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_checkpoint_hook(dict(interval=1))
        runner.register_hook(eval_hook)

        runner.resume(resume_from)
        assert runner.meta['hook_msgs']['best_ckpt'] == old_ckpt_path
        assert osp.exists(old_ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == 4

        runner.run([loader], [('train', 1)], 8)

        ckpt_path = osp.join(tmpdir, 'best_acc_epoch_4.pth')

        assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
        assert osp.exists(ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == 7
        assert not osp.exists(old_ckpt_path)

    # test EvalHook with customer test_fn and greater/less keys
    loader = DataLoader(EvalDataset())
    model = Model()
    data_loader = DataLoader(EvalDataset())

    eval_hook = EvalHook(
        data_loader,
        save_best='acc',
        test_fn=mock.MagicMock(return_value={}),
        greater_keys=[],
        less_keys=['acc'])

    with tempfile.TemporaryDirectory() as tmpdir:
        logger = get_logger('test_eval')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_checkpoint_hook(dict(interval=1))
        runner.register_hook(eval_hook)
        runner.run([loader], [('train', 1)], 8)

        ckpt_path = osp.join(tmpdir, 'best_acc_epoch_6.pth')

        assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
        assert osp.exists(ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == -3

    # test EvalHook with specified `out_dir`
    loader = DataLoader(EvalDataset())
    model = Model()
    data_loader = DataLoader(EvalDataset())
    out_dir = 's3://user/data'
    eval_hook = EvalHook(
        data_loader, interval=1, save_best='auto', out_dir=out_dir)

    with patch.object(PetrelBackend, 'put') as mock_put, \
         patch.object(PetrelBackend, 'remove') as mock_remove, \
         patch.object(PetrelBackend, 'isfile') as mock_isfile, \
         tempfile.TemporaryDirectory() as tmpdir:
        logger = get_logger('test_eval')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_checkpoint_hook(dict(interval=1))
        runner.register_hook(eval_hook)
        runner.run([loader], [('train', 1)], 8)

        basename = osp.basename(runner.work_dir.rstrip(osp.sep))
        ckpt_path = f'{out_dir}/{basename}/best_acc_epoch_4.pth'

        assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
        assert runner.meta['hook_msgs']['best_score'] == 7

    assert mock_put.call_count == 3
    assert mock_remove.call_count == 2
    assert mock_isfile.call_count == 2


@patch('mmcv.engine.single_gpu_test', MagicMock)
@patch('mmcv.engine.multi_gpu_test', MagicMock)
@pytest.mark.parametrize('EvalHookParam', [EvalHook, DistEvalHook])
@pytest.mark.parametrize('_build_demo_runner,by_epoch',
                         [(_build_epoch_runner, True),
                          (_build_iter_runner, False)])
def test_start_param(EvalHookParam, _build_demo_runner, by_epoch):
    # create dummy data
    dataloader = DataLoader(EvalDataset())

    # 0.1. dataloader is not a DataLoader object
    with pytest.raises(TypeError):
        EvalHookParam(dataloader=MagicMock(), interval=-1)

    # 0.2. negative interval
    with pytest.raises(ValueError):
        EvalHookParam(dataloader, interval=-1)

    # 0.3. negative start
    with pytest.raises(ValueError):
        EvalHookParam(dataloader, start=-1)

    # 1. start=None, interval=1: perform evaluation after each epoch.
    runner = _build_demo_runner()
    evalhook = EvalHookParam(dataloader, interval=1, by_epoch=by_epoch)
    evalhook.evaluate = MagicMock()
    runner.register_hook(evalhook)
    runner.run([dataloader], [('train', 1)], 2)
    assert evalhook.evaluate.call_count == 2  # after epoch 1 & 2

    # 2. start=1, interval=1: perform evaluation after each epoch.
    runner = _build_demo_runner()
    evalhook = EvalHookParam(
        dataloader, start=1, interval=1, by_epoch=by_epoch)
    evalhook.evaluate = MagicMock()
    runner.register_hook(evalhook)
    runner.run([dataloader], [('train', 1)], 2)
    assert evalhook.evaluate.call_count == 2  # after epoch 1 & 2

    # 3. start=None, interval=2: perform evaluation after epoch 2, 4, 6, etc
    runner = _build_demo_runner()
    evalhook = EvalHookParam(dataloader, interval=2, by_epoch=by_epoch)
    evalhook.evaluate = MagicMock()
    runner.register_hook(evalhook)
    runner.run([dataloader], [('train', 1)], 2)
    assert evalhook.evaluate.call_count == 1  # after epoch 2

    # 4. start=1, interval=2: perform evaluation after epoch 1, 3, 5, etc
    runner = _build_demo_runner()
    evalhook = EvalHookParam(
        dataloader, start=1, interval=2, by_epoch=by_epoch)
    evalhook.evaluate = MagicMock()
    runner.register_hook(evalhook)
    runner.run([dataloader], [('train', 1)], 3)
    assert evalhook.evaluate.call_count == 2  # after epoch 1 & 3

    # 5. start=0, interval=1: perform evaluation after each epoch and
    #    before epoch 1.
    runner = _build_demo_runner()
    evalhook = EvalHookParam(dataloader, start=0, by_epoch=by_epoch)
    evalhook.evaluate = MagicMock()
    runner.register_hook(evalhook)
    runner.run([dataloader], [('train', 1)], 2)
    assert evalhook.evaluate.call_count == 3  # before epoch1 and after e1 & e2

    # 6. resuming from epoch i, start = x (x<=i), interval =1: perform
    #    evaluation after each epoch and before the first epoch.
    runner = _build_demo_runner()
    evalhook = EvalHookParam(dataloader, start=1, by_epoch=by_epoch)
    evalhook.evaluate = MagicMock()
    runner.register_hook(evalhook)
    if by_epoch:
        runner._epoch = 2
    else:
        runner._iter = 2
    runner.run([dataloader], [('train', 1)], 3)
    assert evalhook.evaluate.call_count == 2  # before & after epoch 3

    # 7. resuming from epoch i, start = i+1/None, interval =1: perform
    #    evaluation after each epoch.
    runner = _build_demo_runner()
    evalhook = EvalHookParam(dataloader, start=2, by_epoch=by_epoch)
    evalhook.evaluate = MagicMock()
    runner.register_hook(evalhook)
    if by_epoch:
        runner._epoch = 1
    else:
        runner._iter = 1
    runner.run([dataloader], [('train', 1)], 3)
    assert evalhook.evaluate.call_count == 2  # after epoch 2 & 3


@pytest.mark.parametrize('runner,by_epoch,eval_hook_priority',
                         [(EpochBasedRunner, True, 'NORMAL'),
                          (EpochBasedRunner, True, 'LOW'),
                          (IterBasedRunner, False, 'LOW')])
def test_logger(runner, by_epoch, eval_hook_priority):
    loader = DataLoader(EvalDataset())
    model = Model()
    data_loader = DataLoader(EvalDataset())
    eval_hook = EvalHook(
        data_loader, interval=1, by_epoch=by_epoch, save_best='acc')

    with tempfile.TemporaryDirectory() as tmpdir:
        logger = get_logger('test_logger')
        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
        runner = EpochBasedRunner(
            model=model, optimizer=optimizer, work_dir=tmpdir, logger=logger)
        runner.register_logger_hooks(
            dict(
                interval=1,
                hooks=[dict(type='TextLoggerHook', by_epoch=by_epoch)]))
        runner.register_timer_hook(dict(type='IterTimerHook'))
        runner.register_hook(eval_hook, priority=eval_hook_priority)
        runner.run([loader], [('train', 1)], 1)

        path = osp.join(tmpdir, next(scandir(tmpdir, '.json')))
        with open(path) as fr:
            fr.readline()  # skip the first line which is `hook_msg`
            train_log = json.loads(fr.readline())
            assert train_log['mode'] == 'train' and 'time' in train_log
            val_log = json.loads(fr.readline())
            assert val_log['mode'] == 'val' and 'time' not in val_log


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_fp16.py
================================================
import numpy as np
import pytest
import torch
import torch.nn as nn

from mmcv.runner.fp16_utils import auto_fp16, cast_tensor_type, force_fp32


def test_cast_tensor_type():
    inputs = torch.FloatTensor([5.])
    src_type = torch.float32
    dst_type = torch.int32
    outputs = cast_tensor_type(inputs, src_type, dst_type)
    assert isinstance(outputs, torch.Tensor)
    assert outputs.dtype == dst_type

    # convert torch.float to torch.half
    inputs = torch.FloatTensor([5.])
    src_type = torch.float
    dst_type = torch.half
    outputs = cast_tensor_type(inputs, src_type, dst_type)
    assert isinstance(outputs, torch.Tensor)
    assert outputs.dtype == dst_type

    # skip the conversion when the type of input is not the same as src_type
    inputs = torch.IntTensor([5])
    src_type = torch.float
    dst_type = torch.half
    outputs = cast_tensor_type(inputs, src_type, dst_type)
    assert isinstance(outputs, torch.Tensor)
    assert outputs.dtype == inputs.dtype

    inputs = 'tensor'
    src_type = str
    dst_type = str
    outputs = cast_tensor_type(inputs, src_type, dst_type)
    assert isinstance(outputs, str)

    inputs = np.array([5.])
    src_type = np.ndarray
    dst_type = np.ndarray
    outputs = cast_tensor_type(inputs, src_type, dst_type)
    assert isinstance(outputs, np.ndarray)

    inputs = dict(
        tensor_a=torch.FloatTensor([1.]), tensor_b=torch.FloatTensor([2.]))
    src_type = torch.float32
    dst_type = torch.int32
    outputs = cast_tensor_type(inputs, src_type, dst_type)
    assert isinstance(outputs, dict)
    assert outputs['tensor_a'].dtype == dst_type
    assert outputs['tensor_b'].dtype == dst_type

    inputs = [torch.FloatTensor([1.]), torch.FloatTensor([2.])]
    src_type = torch.float32
    dst_type = torch.int32
    outputs = cast_tensor_type(inputs, src_type, dst_type)
    assert isinstance(outputs, list)
    assert outputs[0].dtype == dst_type
    assert outputs[1].dtype == dst_type

    inputs = 5
    outputs = cast_tensor_type(inputs, None, None)
    assert isinstance(outputs, int)


def test_auto_fp16():

    with pytest.raises(TypeError):
        # ExampleObject is not a subclass of nn.Module

        class ExampleObject(object):

            @auto_fp16()
            def __call__(self, x):
                return x

        model = ExampleObject()
        input_x = torch.ones(1, dtype=torch.float32)
        model(input_x)

    # apply to all input args
    class ExampleModule(nn.Module):

        @auto_fp16()
        def forward(self, x, y):
            return x, y

    model = ExampleModule()
    input_x = torch.ones(1, dtype=torch.float32)
    input_y = torch.ones(1, dtype=torch.float32)
    output_x, output_y = model(input_x, input_y)
    assert output_x.dtype == torch.float32
    assert output_y.dtype == torch.float32

    model.fp16_enabled = True
    output_x, output_y = model(input_x, input_y)
    assert output_x.dtype == torch.half
    assert output_y.dtype == torch.half

    if torch.cuda.is_available():
        model.cuda()
        output_x, output_y = model(input_x.cuda(), input_y.cuda())
        assert output_x.dtype == torch.half
        assert output_y.dtype == torch.half

    # apply to specified input args
    class ExampleModule(nn.Module):

        @auto_fp16(apply_to=('x', ))
        def forward(self, x, y):
            return x, y

    model = ExampleModule()
    input_x = torch.ones(1, dtype=torch.float32)
    input_y = torch.ones(1, dtype=torch.float32)
    output_x, output_y = model(input_x, input_y)
    assert output_x.dtype == torch.float32
    assert output_y.dtype == torch.float32

    model.fp16_enabled = True
    output_x, output_y = model(input_x, input_y)
    assert output_x.dtype == torch.half
    assert output_y.dtype == torch.float32

    if torch.cuda.is_available():
        model.cuda()
        output_x, output_y = model(input_x.cuda(), input_y.cuda())
        assert output_x.dtype == torch.half
        assert output_y.dtype == torch.float32

    # apply to optional input args
    class ExampleModule(nn.Module):

        @auto_fp16(apply_to=('x', 'y'))
        def forward(self, x, y=None, z=None):
            return x, y, z

    model = ExampleModule()
    input_x = torch.ones(1, dtype=torch.float32)
    input_y = torch.ones(1, dtype=torch.float32)
    input_z = torch.ones(1, dtype=torch.float32)
    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)
    assert output_x.dtype == torch.float32
    assert output_y.dtype == torch.float32
    assert output_z.dtype == torch.float32

    model.fp16_enabled = True
    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)
    assert output_x.dtype == torch.half
    assert output_y.dtype == torch.half
    assert output_z.dtype == torch.float32

    if torch.cuda.is_available():
        model.cuda()
        output_x, output_y, output_z = model(
            input_x.cuda(), y=input_y.cuda(), z=input_z.cuda())
        assert output_x.dtype == torch.half
        assert output_y.dtype == torch.half
        assert output_z.dtype == torch.float32

    # out_fp32=True
    class ExampleModule(nn.Module):

        @auto_fp16(apply_to=('x', 'y'), out_fp32=True)
        def forward(self, x, y=None, z=None):
            return x, y, z

    model = ExampleModule()
    input_x = torch.ones(1, dtype=torch.half)
    input_y = torch.ones(1, dtype=torch.float32)
    input_z = torch.ones(1, dtype=torch.float32)
    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)
    assert output_x.dtype == torch.half
    assert output_y.dtype == torch.float32
    assert output_z.dtype == torch.float32

    model.fp16_enabled = True
    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)
    assert output_x.dtype == torch.float32
    assert output_y.dtype == torch.float32
    assert output_z.dtype == torch.float32

    if torch.cuda.is_available():
        model.cuda()
        output_x, output_y, output_z = model(
            input_x.cuda(), y=input_y.cuda(), z=input_z.cuda())
        assert output_x.dtype == torch.float32
        assert output_y.dtype == torch.float32
        assert output_z.dtype == torch.float32


def test_force_fp32():

    with pytest.raises(TypeError):
        # ExampleObject is not a subclass of nn.Module

        class ExampleObject(object):

            @force_fp32()
            def __call__(self, x):
                return x

        model = ExampleObject()
        input_x = torch.ones(1, dtype=torch.float32)
        model(input_x)

    # apply to all input args
    class ExampleModule(nn.Module):

        @force_fp32()
        def forward(self, x, y):
            return x, y

    model = ExampleModule()
    input_x = torch.ones(1, dtype=torch.half)
    input_y = torch.ones(1, dtype=torch.half)
    output_x, output_y = model(input_x, input_y)
    assert output_x.dtype == torch.half
    assert output_y.dtype == torch.half

    model.fp16_enabled = True
    output_x, output_y = model(input_x, input_y)
    assert output_x.dtype == torch.float32
    assert output_y.dtype == torch.float32

    if torch.cuda.is_available():
        model.cuda()
        output_x, output_y = model(input_x.cuda(), input_y.cuda())
        assert output_x.dtype == torch.float32
        assert output_y.dtype == torch.float32

    # apply to specified input args
    class ExampleModule(nn.Module):

        @force_fp32(apply_to=('x', ))
        def forward(self, x, y):
            return x, y

    model = ExampleModule()
    input_x = torch.ones(1, dtype=torch.half)
    input_y = torch.ones(1, dtype=torch.half)
    output_x, output_y = model(input_x, input_y)
    assert output_x.dtype == torch.half
    assert output_y.dtype == torch.half

    model.fp16_enabled = True
    output_x, output_y = model(input_x, input_y)
    assert output_x.dtype == torch.float32
    assert output_y.dtype == torch.half

    if torch.cuda.is_available():
        model.cuda()
        output_x, output_y = model(input_x.cuda(), input_y.cuda())
        assert output_x.dtype == torch.float32
        assert output_y.dtype == torch.half

    # apply to optional input args
    class ExampleModule(nn.Module):

        @force_fp32(apply_to=('x', 'y'))
        def forward(self, x, y=None, z=None):
            return x, y, z

    model = ExampleModule()
    input_x = torch.ones(1, dtype=torch.half)
    input_y = torch.ones(1, dtype=torch.half)
    input_z = torch.ones(1, dtype=torch.half)
    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)
    assert output_x.dtype == torch.half
    assert output_y.dtype == torch.half
    assert output_z.dtype == torch.half

    model.fp16_enabled = True
    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)
    assert output_x.dtype == torch.float32
    assert output_y.dtype == torch.float32
    assert output_z.dtype == torch.half

    if torch.cuda.is_available():
        model.cuda()
        output_x, output_y, output_z = model(
            input_x.cuda(), y=input_y.cuda(), z=input_z.cuda())
        assert output_x.dtype == torch.float32
        assert output_y.dtype == torch.float32
        assert output_z.dtype == torch.half

    # out_fp16=True
    class ExampleModule(nn.Module):

        @force_fp32(apply_to=('x', 'y'), out_fp16=True)
        def forward(self, x, y=None, z=None):
            return x, y, z

    model = ExampleModule()
    input_x = torch.ones(1, dtype=torch.float32)
    input_y = torch.ones(1, dtype=torch.half)
    input_z = torch.ones(1, dtype=torch.half)
    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)
    assert output_x.dtype == torch.float32
    assert output_y.dtype == torch.half
    assert output_z.dtype == torch.half

    model.fp16_enabled = True
    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)
    assert output_x.dtype == torch.half
    assert output_y.dtype == torch.half
    assert output_z.dtype == torch.half

    if torch.cuda.is_available():
        model.cuda()
        output_x, output_y, output_z = model(
            input_x.cuda(), y=input_y.cuda(), z=input_z.cuda())
        assert output_x.dtype == torch.half
        assert output_y.dtype == torch.half
        assert output_z.dtype == torch.half


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_hooks.py
================================================
"""Tests the hooks with runners.

CommandLine:
    pytest tests/test_runner/test_hooks.py
    xdoctest tests/test_hooks.py zero
"""
import logging
import os.path as osp
import platform
import random
import re
import shutil
import sys
import tempfile
from unittest.mock import MagicMock, Mock, call, patch

import pytest
import torch
import torch.nn as nn
from torch.nn.init import constant_
from torch.utils.data import DataLoader

from mmcv.fileio.file_client import PetrelBackend
from mmcv.runner import (CheckpointHook, DvcliveLoggerHook, EMAHook,
                         Fp16OptimizerHook,
                         GradientCumulativeFp16OptimizerHook,
                         GradientCumulativeOptimizerHook, IterTimerHook,
                         MlflowLoggerHook, NeptuneLoggerHook, OptimizerHook,
                         PaviLoggerHook, WandbLoggerHook, build_runner)
from mmcv.runner.fp16_utils import auto_fp16
from mmcv.runner.hooks.hook import HOOKS, Hook
from mmcv.runner.hooks.lr_updater import (CosineRestartLrUpdaterHook,
                                          CyclicLrUpdaterHook,
                                          FlatCosineAnnealingLrUpdaterHook,
                                          OneCycleLrUpdaterHook,
                                          StepLrUpdaterHook)
from mmcv.utils import TORCH_VERSION

sys.modules['petrel_client'] = MagicMock()
sys.modules['petrel_client.client'] = MagicMock()


def test_optimizerhook():

    class Model(nn.Module):

        def __init__(self):
            super().__init__()
            self.conv1 = nn.Conv2d(
                in_channels=1,
                out_channels=2,
                kernel_size=3,
                stride=1,
                padding=1,
                dilation=1)
            self.conv2 = nn.Conv2d(
                in_channels=2,
                out_channels=2,
                kernel_size=3,
                stride=1,
                padding=1,
                dilation=1)
            self.conv3 = nn.Conv2d(
                in_channels=1,
                out_channels=2,
                kernel_size=3,
                stride=1,
                padding=1,
                dilation=1)

        def forward(self, x):
            x1 = self.conv1(x)
            x2 = self.conv2(x1)
            return x1, x2

    model = Model()
    x = torch.rand(1, 1, 3, 3)

    dummy_runner = Mock()
    dummy_runner.optimizer.zero_grad = Mock(return_value=None)
    dummy_runner.optimizer.step = Mock(return_value=None)
    dummy_runner.model = model
    dummy_runner.outputs = dict()

    dummy_runner.outputs['num_samples'] = 0

    class DummyLogger():

        def __init__(self):
            self.msg = ''

        def log(self, msg=None, **kwargs):
            self.msg += msg

    dummy_runner.logger = DummyLogger()
    optimizer_hook = OptimizerHook(
        dict(max_norm=2), detect_anomalous_params=True)

    dummy_runner.outputs['loss'] = model(x)[0].sum()
    optimizer_hook.after_train_iter(dummy_runner)
    # assert the parameters of conv2 and conv3 are not in the
    # computational graph which is with x1.sum() as root.
    assert 'conv2.weight' in dummy_runner.logger.msg
    assert 'conv2.bias' in dummy_runner.logger.msg
    assert 'conv3.weight' in dummy_runner.logger.msg
    assert 'conv3.bias' in dummy_runner.logger.msg
    assert 'conv1.weight' not in dummy_runner.logger.msg
    assert 'conv1.bias' not in dummy_runner.logger.msg

    dummy_runner.outputs['loss'] = model(x)[1].sum()
    dummy_runner.logger.msg = ''
    optimizer_hook.after_train_iter(dummy_runner)
    # assert the parameters of conv3 are not in the computational graph
    assert 'conv3.weight' in dummy_runner.logger.msg
    assert 'conv3.bias' in dummy_runner.logger.msg
    assert 'conv2.weight' not in dummy_runner.logger.msg
    assert 'conv2.bias' not in dummy_runner.logger.msg
    assert 'conv1.weight' not in dummy_runner.logger.msg
    assert 'conv1.bias' not in dummy_runner.logger.msg


def test_checkpoint_hook(tmp_path):
    """xdoctest -m tests/test_runner/test_hooks.py test_checkpoint_hook."""

    # test epoch based runner
    loader = DataLoader(torch.ones((5, 2)))
    runner = _build_demo_runner('EpochBasedRunner', max_epochs=1)
    runner.meta = dict()
    checkpointhook = CheckpointHook(interval=1, by_epoch=True)
    runner.register_hook(checkpointhook)
    runner.run([loader], [('train', 1)])
    assert runner.meta['hook_msgs']['last_ckpt'] == osp.join(
        runner.work_dir, 'epoch_1.pth')
    shutil.rmtree(runner.work_dir)

    # test petrel oss when the type of runner is `EpochBasedRunner`
    runner = _build_demo_runner('EpochBasedRunner', max_epochs=4)
    runner.meta = dict()
    out_dir = 's3://user/data'
    with patch.object(PetrelBackend, 'put') as mock_put, \
            patch.object(PetrelBackend, 'remove') as mock_remove, \
            patch.object(PetrelBackend, 'isfile') as mock_isfile:
        checkpointhook = CheckpointHook(
            interval=1, out_dir=out_dir, by_epoch=True, max_keep_ckpts=2)
        runner.register_hook(checkpointhook)
        runner.run([loader], [('train', 1)])
        basename = osp.basename(runner.work_dir.rstrip(osp.sep))
        assert runner.meta['hook_msgs']['last_ckpt'] == \
               '/'.join([out_dir, basename, 'epoch_4.pth'])
    mock_put.assert_called()
    mock_remove.assert_called()
    mock_isfile.assert_called()
    shutil.rmtree(runner.work_dir)

    # test iter based runner
    runner = _build_demo_runner(
        'IterBasedRunner', max_iters=1, max_epochs=None)
    runner.meta = dict()
    checkpointhook = CheckpointHook(interval=1, by_epoch=False)
    runner.register_hook(checkpointhook)
    runner.run([loader], [('train', 1)])
    assert runner.meta['hook_msgs']['last_ckpt'] == osp.join(
        runner.work_dir, 'iter_1.pth')
    shutil.rmtree(runner.work_dir)

    # test petrel oss when the type of runner is `IterBasedRunner`
    runner = _build_demo_runner(
        'IterBasedRunner', max_iters=4, max_epochs=None)
    runner.meta = dict()
    out_dir = 's3://user/data'
    with patch.object(PetrelBackend, 'put') as mock_put, \
            patch.object(PetrelBackend, 'remove') as mock_remove, \
            patch.object(PetrelBackend, 'isfile') as mock_isfile:
        checkpointhook = CheckpointHook(
            interval=1, out_dir=out_dir, by_epoch=False, max_keep_ckpts=2)
        runner.register_hook(checkpointhook)
        runner.run([loader], [('train', 1)])
        basename = osp.basename(runner.work_dir.rstrip(osp.sep))
        assert runner.meta['hook_msgs']['last_ckpt'] == \
               '/'.join([out_dir, basename, 'iter_4.pth'])
    mock_put.assert_called()
    mock_remove.assert_called()
    mock_isfile.assert_called()
    shutil.rmtree(runner.work_dir)


def test_ema_hook():
    """xdoctest -m tests/test_hooks.py test_ema_hook."""

    class DemoModel(nn.Module):

        def __init__(self):
            super().__init__()
            self.conv = nn.Conv2d(
                in_channels=1,
                out_channels=2,
                kernel_size=1,
                padding=1,
                bias=True)
            self._init_weight()

        def _init_weight(self):
            constant_(self.conv.weight, 0)
            constant_(self.conv.bias, 0)

        def forward(self, x):
            return self.conv(x).sum()

        def train_step(self, x, optimizer, **kwargs):
            return dict(loss=self(x))

        def val_step(self, x, optimizer, **kwargs):
            return dict(loss=self(x))

    loader = DataLoader(torch.ones((1, 1, 1, 1)))
    runner = _build_demo_runner()
    demo_model = DemoModel()
    runner.model = demo_model
    emahook = EMAHook(momentum=0.1, interval=2, warm_up=100, resume_from=None)
    checkpointhook = CheckpointHook(interval=1, by_epoch=True)
    runner.register_hook(emahook, priority='HIGHEST')
    runner.register_hook(checkpointhook)
    runner.run([loader, loader], [('train', 1), ('val', 1)])
    checkpoint = torch.load(f'{runner.work_dir}/epoch_1.pth')
    contain_ema_buffer = False
    for name, value in checkpoint['state_dict'].items():
        if 'ema' in name:
            contain_ema_buffer = True
            assert value.sum() == 0
            value.fill_(1)
        else:
            assert value.sum() == 0
    assert contain_ema_buffer
    torch.save(checkpoint, f'{runner.work_dir}/epoch_1.pth')
    work_dir = runner.work_dir
    resume_ema_hook = EMAHook(
        momentum=0.5, warm_up=0, resume_from=f'{work_dir}/epoch_1.pth')
    runner = _build_demo_runner(max_epochs=2)
    runner.model = demo_model
    runner.register_hook(resume_ema_hook, priority='HIGHEST')
    checkpointhook = CheckpointHook(interval=1, by_epoch=True)
    runner.register_hook(checkpointhook)
    runner.run([loader, loader], [('train', 1), ('val', 1)])
    checkpoint = torch.load(f'{runner.work_dir}/epoch_2.pth')
    contain_ema_buffer = False
    for name, value in checkpoint['state_dict'].items():
        if 'ema' in name:
            contain_ema_buffer = True
            assert value.sum() == 2
        else:
            assert value.sum() == 1
    assert contain_ema_buffer
    shutil.rmtree(runner.work_dir)
    shutil.rmtree(work_dir)


def test_custom_hook():

    @HOOKS.register_module()
    class ToyHook(Hook):

        def __init__(self, info, *args, **kwargs):
            super().__init__()
            self.info = info

    runner = _build_demo_runner_without_hook('EpochBasedRunner', max_epochs=1)
    # test if custom_hooks is None
    runner.register_custom_hooks(None)
    assert len(runner.hooks) == 0
    # test if custom_hooks is dict list
    custom_hooks_cfg = [
        dict(type='ToyHook', priority=51, info=51),
        dict(type='ToyHook', priority=49, info=49)
    ]
    runner.register_custom_hooks(custom_hooks_cfg)
    assert [hook.info for hook in runner.hooks] == [49, 51]
    # test if custom_hooks is object and without priority
    runner.register_custom_hooks(ToyHook(info='default'))
    assert len(runner.hooks) == 3 and runner.hooks[1].info == 'default'
    shutil.rmtree(runner.work_dir)

    runner = _build_demo_runner_without_hook('EpochBasedRunner', max_epochs=1)
    # test custom_hooks with string priority setting
    priority_ranks = [
        'HIGHEST', 'VERY_HIGH', 'HIGH', 'ABOVE_NORMAL', 'NORMAL',
        'BELOW_NORMAL', 'LOW', 'VERY_LOW', 'LOWEST'
    ]
    random_priority_ranks = priority_ranks.copy()
    random.shuffle(random_priority_ranks)
    custom_hooks_cfg = [
        dict(type='ToyHook', priority=rank, info=rank)
        for rank in random_priority_ranks
    ]
    runner.register_custom_hooks(custom_hooks_cfg)
    assert [hook.info for hook in runner.hooks] == priority_ranks
    shutil.rmtree(runner.work_dir)

    runner = _build_demo_runner_without_hook('EpochBasedRunner', max_epochs=1)
    # test register_training_hooks order
    custom_hooks_cfg = [
        dict(type='ToyHook', priority=1, info='custom 1'),
        dict(type='ToyHook', priority='NORMAL', info='custom normal'),
        dict(type='ToyHook', priority=89, info='custom 89')
    ]
    runner.register_training_hooks(
        lr_config=ToyHook('lr'),
        optimizer_config=ToyHook('optimizer'),
        checkpoint_config=ToyHook('checkpoint'),
        log_config=dict(interval=1, hooks=[dict(type='ToyHook', info='log')]),
        momentum_config=ToyHook('momentum'),
        timer_config=ToyHook('timer'),
        custom_hooks_config=custom_hooks_cfg)
    # If custom hooks have same priority with default hooks, custom hooks
    # will be triggered after default hooks.
    hooks_order = [
        'custom 1', 'lr', 'momentum', 'optimizer', 'checkpoint',
        'custom normal', 'timer', 'custom 89', 'log'
    ]
    assert [hook.info for hook in runner.hooks] == hooks_order
    shutil.rmtree(runner.work_dir)


def test_pavi_hook():
    sys.modules['pavi'] = MagicMock()

    loader = DataLoader(torch.ones((5, 2)))
    runner = _build_demo_runner()
    runner.meta = dict(config_dict=dict(lr=0.02, gpu_ids=range(1)))
    hook = PaviLoggerHook(add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader, loader], [('train', 1), ('val', 1)])
    shutil.rmtree(runner.work_dir)

    assert hasattr(hook, 'writer')
    hook.writer.add_scalars.assert_called_with('val', {
        'learning_rate': 0.02,
        'momentum': 0.95
    }, 1)
    # in Windows environment, the latest checkpoint is copied from epoch_1.pth
    if platform.system() == 'Windows':
        snapshot_file_path = osp.join(runner.work_dir, 'latest.pth')
    else:
        snapshot_file_path = osp.join(runner.work_dir, 'epoch_1.pth')
    hook.writer.add_snapshot_file.assert_called_with(
        tag=runner.work_dir.split('/')[-1],
        snapshot_file_path=snapshot_file_path,
        iteration=1)


def test_sync_buffers_hook():
    loader = DataLoader(torch.ones((5, 2)))
    runner = _build_demo_runner()
    runner.register_hook_from_cfg(dict(type='SyncBuffersHook'))
    runner.run([loader, loader], [('train', 1), ('val', 1)])
    shutil.rmtree(runner.work_dir)


@pytest.mark.parametrize('multi_optimizers, max_iters, gamma, cyclic_times',
                         [(True, 8, 1, 1), (False, 8, 0.5, 2)])
def test_momentum_runner_hook(multi_optimizers, max_iters, gamma,
                              cyclic_times):
    """xdoctest -m tests/test_hooks.py test_momentum_runner_hook."""
    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((10, 2)))
    runner = _build_demo_runner(multi_optimizers=multi_optimizers)

    # add momentum scheduler
    hook_cfg = dict(
        type='CyclicMomentumUpdaterHook',
        by_epoch=False,
        target_ratio=(0.85 / 0.95, 1),
        cyclic_times=cyclic_times,
        step_ratio_up=0.4,
        gamma=gamma)
    runner.register_hook_from_cfg(hook_cfg)

    # add momentum LR scheduler
    hook_cfg = dict(
        type='CyclicLrUpdaterHook',
        by_epoch=False,
        target_ratio=(10, 1),
        cyclic_times=1,
        step_ratio_up=0.4)
    runner.register_hook_from_cfg(hook_cfg)
    runner.register_hook_from_cfg(dict(type='IterTimerHook'))

    # add pavi hook
    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    # TODO: use a more elegant way to check values
    assert hasattr(hook, 'writer')
    if multi_optimizers:
        calls = [
            call(
                'train', {
                    'learning_rate/model1': 0.01999999999999999,
                    'learning_rate/model2': 0.009999999999999995,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 1),
            call(
                'train', {
                    'learning_rate/model1': 0.2,
                    'learning_rate/model2': 0.1,
                    'momentum/model1': 0.85,
                    'momentum/model2': 0.8052631578947369,
                }, 5),
            call(
                'train', {
                    'learning_rate/model1': 0.155,
                    'learning_rate/model2': 0.0775,
                    'momentum/model1': 0.875,
                    'momentum/model2': 0.8289473684210527,
                }, 7)
        ]
    else:
        calls = [
            call('train', {
                'learning_rate': 0.01999999999999999,
                'momentum': 0.95
            }, 1),
            call('train', {
                'learning_rate': 0.11,
                'momentum': 0.85
            }, 3),
            call('train', {
                'learning_rate': 0.1879422863405995,
                'momentum': 0.95
            }, 6),
            call('train', {
                'learning_rate': 0.11000000000000001,
                'momentum': 0.9
            }, 8),
        ]
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)

    # test constant momentum warmup
    sys.modules['pavi'] = MagicMock()
    runner = _build_demo_runner(multi_optimizers=multi_optimizers)

    # add momentum scheduler
    hook_cfg = dict(
        type='StepMomentumUpdaterHook',
        by_epoch=False,
        warmup='constant',
        warmup_iters=5,
        warmup_ratio=0.5,
        step=[10],
    )
    runner.register_hook_from_cfg(hook_cfg)
    runner.register_hook_from_cfg(dict(type='IterTimerHook'))

    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    assert hasattr(hook, 'writer')
    if multi_optimizers:
        calls = [
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 1.9,
                    'momentum/model2': 1.8,
                }, 1),
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 1.9,
                    'momentum/model2': 1.8,
                }, 5),
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 10),
        ]
    else:
        calls = [
            call('train', {
                'learning_rate': 0.02,
                'momentum': 1.9
            }, 1),
            call('train', {
                'learning_rate': 0.02,
                'momentum': 1.9
            }, 5),
            call('train', {
                'learning_rate': 0.02,
                'momentum': 0.95
            }, 10),
        ]

    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)

    # test linear momentum warmup
    sys.modules['pavi'] = MagicMock()
    runner = _build_demo_runner(multi_optimizers=multi_optimizers)

    # add momentum scheduler
    hook_cfg = dict(
        type='StepMomentumUpdaterHook',
        by_epoch=False,
        warmup='linear',
        warmup_iters=5,
        warmup_ratio=0.5,
        step=[10],
    )
    runner.register_hook_from_cfg(hook_cfg)
    runner.register_hook_from_cfg(dict(type='IterTimerHook'))

    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    assert hasattr(hook, 'writer')
    if multi_optimizers:
        calls = [
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 1.9,
                    'momentum/model2': 1.8,
                }, 1),
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 1.3571428571428572,
                    'momentum/model2': 1.2857142857142858,
                }, 3),
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 10),
        ]
    else:
        calls = [
            call('train', {
                'learning_rate': 0.02,
                'momentum': 1.9
            }, 1),
            call('train', {
                'learning_rate': 0.02,
                'momentum': 1.3571428571428572
            }, 3),
            call('train', {
                'learning_rate': 0.02,
                'momentum': 0.95
            }, 10),
        ]

    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)

    # test exponentially momentum warmup
    sys.modules['pavi'] = MagicMock()
    runner = _build_demo_runner(multi_optimizers=multi_optimizers)

    # add momentum scheduler
    hook_cfg = dict(
        type='StepMomentumUpdaterHook',
        by_epoch=False,
        warmup='exp',
        warmup_iters=5,
        warmup_ratio=0.5,
        step=[10],
    )
    runner.register_hook_from_cfg(hook_cfg)
    runner.register_hook_from_cfg(dict(type='IterTimerHook'))

    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    assert hasattr(hook, 'writer')
    if multi_optimizers:
        calls = [
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 1.9,
                    'momentum/model2': 1.8,
                }, 1),
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 1.4399307381848783,
                    'momentum/model2': 1.3641449098593583,
                }, 3),
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 10),
        ]
    else:
        calls = [
            call('train', {
                'learning_rate': 0.02,
                'momentum': 1.9
            }, 1),
            call('train', {
                'learning_rate': 0.02,
                'momentum': 1.4399307381848783
            }, 3),
            call('train', {
                'learning_rate': 0.02,
                'momentum': 0.95
            }, 10),
        ]

    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)


@pytest.mark.parametrize('multi_optimizers', (True, False))
def test_cosine_runner_hook(multi_optimizers):
    """xdoctest -m tests/test_hooks.py test_cosine_runner_hook."""
    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((10, 2)))
    runner = _build_demo_runner(multi_optimizers=multi_optimizers)

    # add momentum scheduler
    hook_cfg = dict(
        type='CosineAnnealingMomentumUpdaterHook',
        min_momentum_ratio=0.99 / 0.95,
        by_epoch=False,
        warmup_iters=2,
        warmup_ratio=0.9 / 0.95)
    runner.register_hook_from_cfg(hook_cfg)

    # add momentum LR scheduler
    hook_cfg = dict(
        type='CosineAnnealingLrUpdaterHook',
        by_epoch=False,
        min_lr_ratio=0,
        warmup_iters=2,
        warmup_ratio=0.9)
    runner.register_hook_from_cfg(hook_cfg)
    runner.register_hook_from_cfg(dict(type='IterTimerHook'))
    runner.register_hook(IterTimerHook())
    # add pavi hook
    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    # TODO: use a more elegant way to check values
    assert hasattr(hook, 'writer')
    if multi_optimizers:
        calls = [
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 1),
            call(
                'train', {
                    'learning_rate/model1': 0.01,
                    'learning_rate/model2': 0.005,
                    'momentum/model1': 0.97,
                    'momentum/model2': 0.9189473684210527,
                }, 6),
            call(
                'train', {
                    'learning_rate/model1': 0.0004894348370484647,
                    'learning_rate/model2': 0.00024471741852423234,
                    'momentum/model1': 0.9890211303259032,
                    'momentum/model2': 0.9369673866245399,
                }, 10)
        ]
    else:
        calls = [
            call('train', {
                'learning_rate': 0.02,
                'momentum': 0.95
            }, 1),
            call('train', {
                'learning_rate': 0.01,
                'momentum': 0.97
            }, 6),
            call(
                'train', {
                    'learning_rate': 0.0004894348370484647,
                    'momentum': 0.9890211303259032
                }, 10)
        ]
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)


@pytest.mark.parametrize('multi_optimizers, by_epoch', [(False, False),
                                                        (True, False),
                                                        (False, True),
                                                        (True, True)])
def test_flat_cosine_runner_hook(multi_optimizers, by_epoch):
    """xdoctest -m tests/test_hooks.py test_flat_cosine_runner_hook."""
    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((10, 2)))
    max_epochs = 10 if by_epoch else 1
    runner = _build_demo_runner(
        multi_optimizers=multi_optimizers, max_epochs=max_epochs)

    with pytest.raises(ValueError):
        # start_percent: expected float between 0 and 1
        FlatCosineAnnealingLrUpdaterHook(start_percent=-0.1, min_lr_ratio=0)

    # add LR scheduler
    hook_cfg = dict(
        type='FlatCosineAnnealingLrUpdaterHook',
        by_epoch=by_epoch,
        min_lr_ratio=0,
        warmup='linear',
        warmup_iters=10 if by_epoch else 2,
        warmup_ratio=0.9,
        start_percent=0.5)
    runner.register_hook_from_cfg(hook_cfg)
    runner.register_hook_from_cfg(dict(type='IterTimerHook'))
    runner.register_hook(IterTimerHook())
    # add pavi hook
    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    # TODO: use a more elegant way to check values
    assert hasattr(hook, 'writer')
    if multi_optimizers:
        if by_epoch:
            calls = [
                call(
                    'train', {
                        'learning_rate/model1': 0.018000000000000002,
                        'learning_rate/model2': 0.009000000000000001,
                        'momentum/model1': 0.95,
                        'momentum/model2': 0.9,
                    }, 1),
                call(
                    'train', {
                        'learning_rate/model1': 0.02,
                        'learning_rate/model2': 0.01,
                        'momentum/model1': 0.95,
                        'momentum/model2': 0.9,
                    }, 11),
                call(
                    'train', {
                        'learning_rate/model1': 0.018090169943749474,
                        'learning_rate/model2': 0.009045084971874737,
                        'momentum/model1': 0.95,
                        'momentum/model2': 0.9,
                    }, 61),
                call(
                    'train', {
                        'learning_rate/model1': 0.0019098300562505265,
                        'learning_rate/model2': 0.0009549150281252633,
                        'momentum/model1': 0.95,
                        'momentum/model2': 0.9,
                    }, 100)
            ]
        else:
            calls = [
                call(
                    'train', {
                        'learning_rate/model1': 0.018000000000000002,
                        'learning_rate/model2': 0.009000000000000001,
                        'momentum/model1': 0.95,
                        'momentum/model2': 0.9
                    }, 1),
                call(
                    'train', {
                        'learning_rate/model1': 0.02,
                        'learning_rate/model2': 0.01,
                        'momentum/model1': 0.95,
                        'momentum/model2': 0.9
                    }, 6),
                call(
                    'train', {
                        'learning_rate/model1': 0.018090169943749474,
                        'learning_rate/model2': 0.009045084971874737,
                        'momentum/model1': 0.95,
                        'momentum/model2': 0.9
                    }, 7),
                call(
                    'train', {
                        'learning_rate/model1': 0.0019098300562505265,
                        'learning_rate/model2': 0.0009549150281252633,
                        'momentum/model1': 0.95,
                        'momentum/model2': 0.9
                    }, 10)
            ]
    else:
        if by_epoch:
            calls = [
                call('train', {
                    'learning_rate': 0.018000000000000002,
                    'momentum': 0.95
                }, 1),
                call('train', {
                    'learning_rate': 0.02,
                    'momentum': 0.95
                }, 11),
                call('train', {
                    'learning_rate': 0.018090169943749474,
                    'momentum': 0.95
                }, 61),
                call('train', {
                    'learning_rate': 0.0019098300562505265,
                    'momentum': 0.95
                }, 100)
            ]
        else:
            calls = [
                call('train', {
                    'learning_rate': 0.018000000000000002,
                    'momentum': 0.95
                }, 1),
                call('train', {
                    'learning_rate': 0.02,
                    'momentum': 0.95
                }, 6),
                call('train', {
                    'learning_rate': 0.018090169943749474,
                    'momentum': 0.95
                }, 7),
                call('train', {
                    'learning_rate': 0.0019098300562505265,
                    'momentum': 0.95
                }, 10)
            ]
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)


@pytest.mark.parametrize('multi_optimizers, max_iters', [(True, 10), (True, 2),
                                                         (False, 10),
                                                         (False, 2)])
def test_one_cycle_runner_hook(multi_optimizers, max_iters):
    """Test OneCycleLrUpdaterHook and OneCycleMomentumUpdaterHook."""
    with pytest.raises(AssertionError):
        # by_epoch should be False
        OneCycleLrUpdaterHook(max_lr=0.1, by_epoch=True)

    with pytest.raises(ValueError):
        # expected float between 0 and 1
        OneCycleLrUpdaterHook(max_lr=0.1, pct_start=-0.1)

    with pytest.raises(ValueError):
        # anneal_strategy should be either 'cos' or 'linear'
        OneCycleLrUpdaterHook(max_lr=0.1, anneal_strategy='sin')

    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((10, 2)))
    runner = _build_demo_runner(multi_optimizers=multi_optimizers)

    # add momentum scheduler
    hook_cfg = dict(
        type='OneCycleMomentumUpdaterHook',
        base_momentum=0.85,
        max_momentum=0.95,
        pct_start=0.5,
        anneal_strategy='cos',
        three_phase=False)
    runner.register_hook_from_cfg(hook_cfg)

    # add LR scheduler
    hook_cfg = dict(
        type='OneCycleLrUpdaterHook',
        max_lr=0.01,
        pct_start=0.5,
        anneal_strategy='cos',
        div_factor=25,
        final_div_factor=1e4,
        three_phase=False)
    runner.register_hook_from_cfg(hook_cfg)
    runner.register_hook_from_cfg(dict(type='IterTimerHook'))
    runner.register_hook(IterTimerHook())
    # add pavi hook
    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    # TODO: use a more elegant way to check values
    assert hasattr(hook, 'writer')
    if multi_optimizers:
        calls = [
            call(
                'train', {
                    'learning_rate/model1': 0.0003999999999999993,
                    'learning_rate/model2': 0.0003999999999999993,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.95,
                }, 1),
            call(
                'train', {
                    'learning_rate/model1': 0.00904508879153485,
                    'learning_rate/model2': 0.00904508879153485,
                    'momentum/model1': 0.8595491502812526,
                    'momentum/model2': 0.8595491502812526,
                }, 6),
            call(
                'train', {
                    'learning_rate/model1': 4e-08,
                    'learning_rate/model2': 4e-08,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.95,
                }, 10)
        ]
    else:
        calls = [
            call('train', {
                'learning_rate': 0.0003999999999999993,
                'momentum': 0.95
            }, 1),
            call(
                'train', {
                    'learning_rate': 0.00904508879153485,
                    'momentum': 0.8595491502812526
                }, 6),
            call('train', {
                'learning_rate': 4e-08,
                'momentum': 0.95
            }, 10)
        ]
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)

    # Test OneCycleLrUpdaterHook
    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((10, 2)))
    runner = _build_demo_runner(
        runner_type='IterBasedRunner', max_epochs=None, max_iters=max_iters)

    args = dict(
        max_lr=0.01,
        total_steps=5,
        pct_start=0.5,
        anneal_strategy='linear',
        div_factor=25,
        final_div_factor=1e4,
    )
    hook = OneCycleLrUpdaterHook(**args)
    runner.register_hook(hook)
    if max_iters == 10:
        # test total_steps < max_iters
        with pytest.raises(ValueError):
            runner.run([loader], [('train', 1)])
    else:
        # test total_steps > max_iters
        runner.run([loader], [('train', 1)])
        lr_last = runner.current_lr()
        t = torch.tensor([0.0], requires_grad=True)
        optim = torch.optim.SGD([t], lr=0.01)
        lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optim, **args)
        lr_target = []
        for _ in range(max_iters):
            optim.step()
            lr_target.append(optim.param_groups[0]['lr'])
            lr_scheduler.step()
        assert lr_target[-1] == lr_last[0]


@pytest.mark.parametrize('multi_optimizers', (True, False))
def test_cosine_restart_lr_update_hook(multi_optimizers):
    """Test CosineRestartLrUpdaterHook."""
    with pytest.raises(AssertionError):
        # either `min_lr` or `min_lr_ratio` should be specified
        CosineRestartLrUpdaterHook(
            by_epoch=False,
            periods=[2, 10],
            restart_weights=[0.5, 0.5],
            min_lr=0.1,
            min_lr_ratio=0)

    with pytest.raises(AssertionError):
        # periods and restart_weights should have the same length
        CosineRestartLrUpdaterHook(
            by_epoch=False,
            periods=[2, 10],
            restart_weights=[0.5],
            min_lr_ratio=0)

    with pytest.raises(ValueError):
        # the last cumulative_periods 7 (out of [5, 7]) should >= 10
        sys.modules['pavi'] = MagicMock()
        loader = DataLoader(torch.ones((10, 2)))
        runner = _build_demo_runner()

        # add cosine restart LR scheduler
        hook = CosineRestartLrUpdaterHook(
            by_epoch=False,
            periods=[5, 2],  # cumulative_periods [5, 7 (5 + 2)]
            restart_weights=[0.5, 0.5],
            min_lr=0.0001)
        runner.register_hook(hook)
        runner.register_hook(IterTimerHook())

        # add pavi hook
        hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
        runner.register_hook(hook)
        runner.run([loader], [('train', 1)])
        shutil.rmtree(runner.work_dir)

    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((10, 2)))
    runner = _build_demo_runner(multi_optimizers=multi_optimizers)

    # add cosine restart LR scheduler
    hook = CosineRestartLrUpdaterHook(
        by_epoch=False,
        periods=[5, 5],
        restart_weights=[0.5, 0.5],
        min_lr_ratio=0)
    runner.register_hook(hook)
    runner.register_hook(IterTimerHook())

    # add pavi hook
    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    # TODO: use a more elegant way to check values
    assert hasattr(hook, 'writer')
    if multi_optimizers:
        calls = [
            call(
                'train', {
                    'learning_rate/model1': 0.01,
                    'learning_rate/model2': 0.005,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 1),
            call(
                'train', {
                    'learning_rate/model1': 0.01,
                    'learning_rate/model2': 0.005,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 6),
            call(
                'train', {
                    'learning_rate/model1': 0.0009549150281252633,
                    'learning_rate/model2': 0.00047745751406263163,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 10)
        ]
    else:
        calls = [
            call('train', {
                'learning_rate': 0.01,
                'momentum': 0.95
            }, 1),
            call('train', {
                'learning_rate': 0.01,
                'momentum': 0.95
            }, 6),
            call('train', {
                'learning_rate': 0.0009549150281252633,
                'momentum': 0.95
            }, 10)
        ]
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)


@pytest.mark.parametrize('multi_optimizers', (True, False))
def test_step_runner_hook(multi_optimizers):
    """Test StepLrUpdaterHook."""
    with pytest.raises(TypeError):
        # `step` should be specified
        StepLrUpdaterHook()
    with pytest.raises(AssertionError):
        # if `step` is int, should be positive
        StepLrUpdaterHook(-10)
    with pytest.raises(AssertionError):
        # if `step` is list of int, should all be positive
        StepLrUpdaterHook([10, 16, -20])

    # test StepLrUpdaterHook with int `step` value
    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((30, 2)))
    runner = _build_demo_runner(multi_optimizers=multi_optimizers)

    # add momentum scheduler
    hook_cfg = dict(
        type='StepMomentumUpdaterHook',
        by_epoch=False,
        step=5,
        gamma=0.5,
        min_momentum=0.05)
    runner.register_hook_from_cfg(hook_cfg)

    # add step LR scheduler
    hook = StepLrUpdaterHook(by_epoch=False, step=5, gamma=0.5, min_lr=1e-3)
    runner.register_hook(hook)
    runner.register_hook(IterTimerHook())

    # add pavi hook
    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    # TODO: use a more elegant way to check values
    assert hasattr(hook, 'writer')
    if multi_optimizers:
        calls = [
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9
                }, 1),
            call(
                'train', {
                    'learning_rate/model1': 0.01,
                    'learning_rate/model2': 0.005,
                    'momentum/model1': 0.475,
                    'momentum/model2': 0.45
                }, 6),
            call(
                'train', {
                    'learning_rate/model1': 0.0025,
                    'learning_rate/model2': 0.00125,
                    'momentum/model1': 0.11875,
                    'momentum/model2': 0.1125
                }, 16),
            call(
                'train', {
                    'learning_rate/model1': 0.00125,
                    'learning_rate/model2': 0.001,
                    'momentum/model1': 0.059375,
                    'momentum/model2': 0.05625
                }, 21),
            call(
                'train', {
                    'learning_rate/model1': 0.001,
                    'learning_rate/model2': 0.001,
                    'momentum/model1': 0.05,
                    'momentum/model2': 0.05
                }, 26),
            call(
                'train', {
                    'learning_rate/model1': 0.001,
                    'learning_rate/model2': 0.001,
                    'momentum/model1': 0.05,
                    'momentum/model2': 0.05
                }, 30)
        ]
    else:
        calls = [
            call('train', {
                'learning_rate': 0.02,
                'momentum': 0.95
            }, 1),
            call('train', {
                'learning_rate': 0.01,
                'momentum': 0.475
            }, 6),
            call('train', {
                'learning_rate': 0.0025,
                'momentum': 0.11875
            }, 16),
            call('train', {
                'learning_rate': 0.00125,
                'momentum': 0.059375
            }, 21),
            call('train', {
                'learning_rate': 0.001,
                'momentum': 0.05
            }, 26),
            call('train', {
                'learning_rate': 0.001,
                'momentum': 0.05
            }, 30)
        ]
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)

    # test StepLrUpdaterHook with list[int] `step` value
    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((10, 2)))
    runner = _build_demo_runner(multi_optimizers=multi_optimizers)

    # add momentum scheduler
    hook_cfg = dict(
        type='StepMomentumUpdaterHook',
        by_epoch=False,
        step=[4, 6, 8],
        gamma=0.1)
    runner.register_hook_from_cfg(hook_cfg)

    # add step LR scheduler
    hook = StepLrUpdaterHook(by_epoch=False, step=[4, 6, 8], gamma=0.1)
    runner.register_hook(hook)
    runner.register_hook(IterTimerHook())

    # add pavi hook
    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    # TODO: use a more elegant way to check values
    assert hasattr(hook, 'writer')
    if multi_optimizers:
        calls = [
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9
                }, 1),
            call(
                'train', {
                    'learning_rate/model1': 0.002,
                    'learning_rate/model2': 0.001,
                    'momentum/model1': 9.5e-2,
                    'momentum/model2': 9.000000000000001e-2
                }, 5),
            call(
                'train', {
                    'learning_rate/model1': 2.0000000000000004e-4,
                    'learning_rate/model2': 1.0000000000000002e-4,
                    'momentum/model1': 9.500000000000001e-3,
                    'momentum/model2': 9.000000000000003e-3
                }, 7),
            call(
                'train', {
                    'learning_rate/model1': 2.0000000000000005e-05,
                    'learning_rate/model2': 1.0000000000000003e-05,
                    'momentum/model1': 9.500000000000002e-4,
                    'momentum/model2': 9.000000000000002e-4
                }, 9)
        ]
    else:
        calls = [
            call('train', {
                'learning_rate': 0.02,
                'momentum': 0.95
            }, 1),
            call('train', {
                'learning_rate': 0.002,
                'momentum': 0.095
            }, 5),
            call(
                'train', {
                    'learning_rate': 2.0000000000000004e-4,
                    'momentum': 9.500000000000001e-3
                }, 7),
            call(
                'train', {
                    'learning_rate': 2.0000000000000005e-05,
                    'momentum': 9.500000000000002e-4
                }, 9)
        ]
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)


@pytest.mark.parametrize('multi_optimizers, max_iters, gamma, cyclic_times',
                         [(True, 8, 1, 1), (False, 8, 0.5, 2)])
def test_cyclic_lr_update_hook(multi_optimizers, max_iters, gamma,
                               cyclic_times):
    """Test CyclicLrUpdateHook."""
    with pytest.raises(AssertionError):
        # by_epoch should be False
        CyclicLrUpdaterHook(by_epoch=True)

    with pytest.raises(AssertionError):
        # target_ratio must be either float or tuple/list of two floats
        CyclicLrUpdaterHook(by_epoch=False, target_ratio=(10.0, 0.1, 0.2))

    with pytest.raises(AssertionError):
        # step_ratio_up must be in range [0,1)
        CyclicLrUpdaterHook(by_epoch=False, step_ratio_up=1.4)

    with pytest.raises(ValueError):
        # anneal_strategy must be one of "cos" or "linear"
        CyclicLrUpdaterHook(by_epoch=False, anneal_strategy='sin')

    with pytest.raises(AssertionError):
        # gamma must be in range (0, 1]
        CyclicLrUpdaterHook(by_epoch=False, gamma=0)

    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((10, 2)))
    runner = _build_demo_runner(
        runner_type='IterBasedRunner',
        max_epochs=None,
        max_iters=max_iters,
        multi_optimizers=multi_optimizers)

    # add cyclic LR scheduler
    schedule_hook = CyclicLrUpdaterHook(
        by_epoch=False,
        target_ratio=(10.0, 1.0),
        cyclic_times=cyclic_times,
        step_ratio_up=0.5,
        anneal_strategy='linear',
        gamma=gamma)
    runner.register_hook(schedule_hook)
    runner.register_hook_from_cfg(dict(type='IterTimerHook'))
    runner.register_hook(IterTimerHook())
    # add pavi hook
    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
    runner.register_hook(hook)
    runner.run([loader], [('train', 1)])
    shutil.rmtree(runner.work_dir)

    assert hasattr(hook, 'writer')
    if multi_optimizers:
        calls = [
            call(
                'train', {
                    'learning_rate/model1': 0.02,
                    'learning_rate/model2': 0.01,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 1),
            call(
                'train', {
                    'learning_rate/model1': 0.155,
                    'learning_rate/model2': 0.0775,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 4),
            call(
                'train', {
                    'learning_rate/model1': 0.155,
                    'learning_rate/model2': 0.0775,
                    'momentum/model1': 0.95,
                    'momentum/model2': 0.9,
                }, 6)
        ]
    else:
        calls = [
            call('train', {
                'learning_rate': 0.02,
                'momentum': 0.95
            }, 1),
            call('train', {
                'learning_rate': 0.11,
                'momentum': 0.95
            }, 4),
            call('train', {
                'learning_rate': 0.065,
                'momentum': 0.95
            }, 6),
            call('train', {
                'learning_rate': 0.11,
                'momentum': 0.95
            }, 7),
        ]
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)


@pytest.mark.parametrize('log_model', (True, False))
def test_mlflow_hook(log_model):
    sys.modules['mlflow'] = MagicMock()
    sys.modules['mlflow.pytorch'] = MagicMock()

    runner = _build_demo_runner()
    loader = DataLoader(torch.ones((5, 2)))

    hook = MlflowLoggerHook(exp_name='test', log_model=log_model)
    runner.register_hook(hook)
    runner.run([loader, loader], [('train', 1), ('val', 1)])
    shutil.rmtree(runner.work_dir)

    hook.mlflow.set_experiment.assert_called_with('test')
    hook.mlflow.log_metrics.assert_called_with(
        {
            'learning_rate': 0.02,
            'momentum': 0.95
        }, step=6)
    if log_model:
        hook.mlflow_pytorch.log_model.assert_called_with(
            runner.model,
            'models',
            pip_requirements=[f'torch=={TORCH_VERSION}'])
    else:
        assert not hook.mlflow_pytorch.log_model.called


def test_wandb_hook():
    sys.modules['wandb'] = MagicMock()
    runner = _build_demo_runner()
    hook = WandbLoggerHook(log_artifact=True)
    loader = DataLoader(torch.ones((5, 2)))

    runner.register_hook(hook)
    runner.run([loader, loader], [('train', 1), ('val', 1)])

    shutil.rmtree(runner.work_dir)

    hook.wandb.init.assert_called_with()
    hook.wandb.log.assert_called_with({
        'learning_rate': 0.02,
        'momentum': 0.95
    },
                                      step=6,
                                      commit=True)
    hook.wandb.log_artifact.assert_called()
    hook.wandb.join.assert_called_with()


def test_neptune_hook():
    sys.modules['neptune'] = MagicMock()
    sys.modules['neptune.new'] = MagicMock()
    runner = _build_demo_runner()
    hook = NeptuneLoggerHook()

    loader = DataLoader(torch.ones((5, 2)))

    runner.register_hook(hook)
    runner.run([loader, loader], [('train', 1), ('val', 1)])
    shutil.rmtree(runner.work_dir)

    hook.neptune.init.assert_called_with()
    hook.run['momentum'].log.assert_called_with(0.95, step=6)
    hook.run.stop.assert_called_with()


def test_dvclive_hook():
    sys.modules['dvclive'] = MagicMock()
    runner = _build_demo_runner()

    hook = DvcliveLoggerHook()
    dvclive_mock = hook.dvclive
    loader = DataLoader(torch.ones((5, 2)))

    runner.register_hook(hook)
    runner.run([loader, loader], [('train', 1), ('val', 1)])
    shutil.rmtree(runner.work_dir)

    dvclive_mock.set_step.assert_called_with(6)
    dvclive_mock.log.assert_called_with('momentum', 0.95)


def test_dvclive_hook_model_file(tmp_path):
    sys.modules['dvclive'] = MagicMock()
    runner = _build_demo_runner()

    hook = DvcliveLoggerHook(model_file=osp.join(runner.work_dir, 'model.pth'))
    runner.register_hook(hook)

    loader = torch.utils.data.DataLoader(torch.ones((5, 2)))
    loader = DataLoader(torch.ones((5, 2)))

    runner.run([loader, loader], [('train', 1), ('val', 1)])

    assert osp.exists(osp.join(runner.work_dir, 'model.pth'))

    shutil.rmtree(runner.work_dir)


def _build_demo_runner_without_hook(runner_type='EpochBasedRunner',
                                    max_epochs=1,
                                    max_iters=None,
                                    multi_optimizers=False):

    class Model(nn.Module):

        def __init__(self):
            super().__init__()
            self.linear = nn.Linear(2, 1)
            self.conv = nn.Conv2d(3, 3, 3)

        def forward(self, x):
            return self.linear(x)

        def train_step(self, x, optimizer, **kwargs):
            return dict(loss=self(x))

        def val_step(self, x, optimizer, **kwargs):
            return dict(loss=self(x))

    model = Model()

    if multi_optimizers:
        optimizer = {
            'model1':
            torch.optim.SGD(model.linear.parameters(), lr=0.02, momentum=0.95),
            'model2':
            torch.optim.SGD(model.conv.parameters(), lr=0.01, momentum=0.9),
        }
    else:
        optimizer = torch.optim.SGD(model.parameters(), lr=0.02, momentum=0.95)

    tmp_dir = tempfile.mkdtemp()
    runner = build_runner(
        dict(type=runner_type),
        default_args=dict(
            model=model,
            work_dir=tmp_dir,
            optimizer=optimizer,
            logger=logging.getLogger(),
            max_epochs=max_epochs,
            max_iters=max_iters))
    return runner


def _build_demo_runner(runner_type='EpochBasedRunner',
                       max_epochs=1,
                       max_iters=None,
                       multi_optimizers=False):
    log_config = dict(
        interval=1, hooks=[
            dict(type='TextLoggerHook'),
        ])

    runner = _build_demo_runner_without_hook(runner_type, max_epochs,
                                             max_iters, multi_optimizers)

    runner.register_checkpoint_hook(dict(interval=1))
    runner.register_logger_hooks(log_config)
    return runner


def test_runner_with_revise_keys():
    import os

    class Model(nn.Module):

        def __init__(self):
            super().__init__()
            self.conv = nn.Conv2d(3, 3, 1)

    class PrefixModel(nn.Module):

        def __init__(self):
            super().__init__()
            self.backbone = Model()

    pmodel = PrefixModel()
    model = Model()
    checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth')

    # add prefix
    torch.save(model.state_dict(), checkpoint_path)
    runner = _build_demo_runner(runner_type='EpochBasedRunner')
    runner.model = pmodel
    state_dict = runner.load_checkpoint(
        checkpoint_path, revise_keys=[(r'^', 'backbone.')])
    for key in pmodel.backbone.state_dict().keys():
        assert torch.equal(pmodel.backbone.state_dict()[key], state_dict[key])
    # strip prefix
    torch.save(pmodel.state_dict(), checkpoint_path)
    runner.model = model
    state_dict = runner.load_checkpoint(
        checkpoint_path, revise_keys=[(r'^backbone\.', '')])
    for key in state_dict.keys():
        key_stripped = re.sub(r'^backbone\.', '', key)
        assert torch.equal(model.state_dict()[key_stripped], state_dict[key])
    os.remove(checkpoint_path)


def test_get_triggered_stages():

    class ToyHook(Hook):
        # test normal stage
        def before_run():
            pass

        # test the method mapped to multi stages.
        def after_epoch():
            pass

    hook = ToyHook()
    # stages output have order, so here is list instead of set.
    expected_stages = ['before_run', 'after_train_epoch', 'after_val_epoch']
    assert hook.get_triggered_stages() == expected_stages


def test_gradient_cumulative_optimizer_hook():

    class ToyModel(nn.Module):

        def __init__(self, with_norm=False):
            super().__init__()
            self.fp16_enabled = False
            self.fc = nn.Linear(3, 2)
            nn.init.constant_(self.fc.weight, 1.)
            nn.init.constant_(self.fc.bias, 1.)
            self.with_norm = with_norm
            if with_norm:
                self.norm = nn.BatchNorm1d(2)

        def forward(self, x):
            x = self.fc(x)
            if self.with_norm:
                x = self.norm(x)
            return x

        def train_step(self, x, optimizer, **kwargs):
            return dict(loss=self(x).mean(), num_samples=x.shape[0])

        def val_step(self, x, optimizer, **kwargs):
            return dict(loss=self(x).mean(), num_samples=x.shape[0])

    def build_toy_runner(config=dict(type='EpochBasedRunner', max_epochs=3)):
        model = ToyModel()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.02)
        tmp_dir = tempfile.mkdtemp()

        runner = build_runner(
            config,
            default_args=dict(
                model=model,
                work_dir=tmp_dir,
                optimizer=optimizer,
                logger=logging.getLogger(),
                meta=dict()))
        return runner

    with pytest.raises(AssertionError):
        # cumulative_iters only accepts int
        GradientCumulativeOptimizerHook(cumulative_iters='str')

    with pytest.raises(AssertionError):
        # cumulative_iters only accepts positive number
        GradientCumulativeOptimizerHook(cumulative_iters=-1)

    # test epoch based runner
    data = torch.rand((6, 3))
    # optimize with cumulative_iters
    loader_1 = DataLoader(data, batch_size=1)
    runner_1 = build_toy_runner()
    optimizer_hook = GradientCumulativeOptimizerHook(
        grad_clip=dict(max_norm=0.2), cumulative_iters=3)
    runner_1.register_hook(optimizer_hook)
    runner_1.run([loader_1], [('train', 1)])

    # optimize without cumulative_iters
    loader_2 = DataLoader(data, batch_size=3)
    runner_2 = build_toy_runner()
    optimizer_hook = OptimizerHook(grad_clip=dict(max_norm=0.2))
    runner_2.register_hook(optimizer_hook)
    runner_2.run([loader_2], [('train', 1)])

    # test optimizer works well
    assert (runner_1.model.fc.weight < 1).all()
    assert (runner_1.model.fc.bias < 1).all()
    # test optimizer with cumulative_iters gets the same results
    assert torch.allclose(runner_1.model.fc.weight, runner_2.model.fc.weight)
    assert torch.allclose(runner_1.model.fc.bias, runner_2.model.fc.bias)
    shutil.rmtree(runner_1.work_dir)
    shutil.rmtree(runner_2.work_dir)

    # test iter based runner
    data = torch.rand((8, 3))
    # optimize with cumulative_iters
    loader_1 = DataLoader(data, batch_size=1)
    runner_1 = build_toy_runner(dict(type='IterBasedRunner', max_iters=8))
    optimizer_hook = GradientCumulativeOptimizerHook(
        grad_clip=dict(max_norm=0.2), cumulative_iters=3)
    runner_1.register_hook(optimizer_hook)
    runner_1.run([loader_1], [('train', 1)])

    # optimize without cumulative_iters
    loader_2_divisible = DataLoader(data[:6], batch_size=3)
    loader_2_remainder = DataLoader(data[6:], batch_size=2)
    runner_2 = build_toy_runner(dict(type='IterBasedRunner', max_iters=3))
    optimizer_hook = OptimizerHook(grad_clip=dict(max_norm=0.2))
    runner_2.register_hook(optimizer_hook)
    runner_2.run([loader_2_divisible, loader_2_remainder], [('train', 2),
                                                            ('train', 1)])

    # test optimizer works well
    assert (runner_1.model.fc.weight < 1).all()
    assert (runner_1.model.fc.bias < 1).all()
    # test optimizer with cumulative_iters gets the same results
    assert torch.allclose(runner_1.model.fc.weight, runner_2.model.fc.weight)
    assert torch.allclose(runner_1.model.fc.bias, runner_2.model.fc.bias)
    shutil.rmtree(runner_1.work_dir)
    shutil.rmtree(runner_2.work_dir)

    # test has_batch_norm
    model = ToyModel(with_norm=True)
    optimizer_hook = GradientCumulativeOptimizerHook(
        grad_clip=dict(max_norm=0.2), cumulative_iters=3)
    assert optimizer_hook.has_batch_norm(model)


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_gradient_cumulative_fp16_optimizer_hook():

    class ToyModel(nn.Module):

        def __init__(self):
            super().__init__()
            self.fp16_enabled = False
            self.fc = nn.Linear(3, 2)
            nn.init.constant_(self.fc.weight, 1.)
            nn.init.constant_(self.fc.bias, 1.)

        @auto_fp16(apply_to=('x', ))
        def forward(self, x):
            x = self.fc(x)
            return x

        def train_step(self, x, optimizer, **kwargs):
            return dict(loss=self(x).mean(), num_samples=x.shape[0])

        def val_step(self, x, optimizer, **kwargs):
            return dict(loss=self(x).mean(), num_samples=x.shape[0])

    def build_toy_runner(config=dict(type='EpochBasedRunner', max_epochs=3)):
        model = ToyModel().cuda()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.02)
        tmp_dir = tempfile.mkdtemp()

        runner = build_runner(
            config,
            default_args=dict(
                model=model,
                work_dir=tmp_dir,
                optimizer=optimizer,
                logger=logging.getLogger(),
                meta=dict()))
        return runner

    # test epoch based runner
    data = torch.rand((6, 3)).cuda()
    # optimize with cumulative_iters
    loader_1 = DataLoader(data, batch_size=1)
    runner_1 = build_toy_runner()
    optimizer_hook = GradientCumulativeFp16OptimizerHook(
        grad_clip=dict(max_norm=0.2), cumulative_iters=3)
    runner_1.register_hook(optimizer_hook)
    runner_1.run([loader_1], [('train', 1)])

    # optimize without cumulative_iters
    loader_2 = DataLoader(data, batch_size=3)
    runner_2 = build_toy_runner()
    optimizer_hook = Fp16OptimizerHook(grad_clip=dict(max_norm=0.2))
    runner_2.register_hook(optimizer_hook)
    runner_2.run([loader_2], [('train', 1)])

    # test optimizer works well
    assert (runner_1.model.fc.weight < 1).all()
    assert (runner_1.model.fc.bias < 1).all()
    # test optimizer with cumulative_iters gets the same results
    assert torch.allclose(runner_1.model.fc.weight, runner_2.model.fc.weight)
    assert torch.allclose(runner_1.model.fc.bias, runner_2.model.fc.bias)
    shutil.rmtree(runner_1.work_dir)
    shutil.rmtree(runner_2.work_dir)

    # test iter based runner
    data = torch.rand((8, 3)).cuda()
    # optimize with cumulative_iters
    loader_1 = DataLoader(data, batch_size=1)
    runner_1 = build_toy_runner(dict(type='IterBasedRunner', max_iters=8))
    optimizer_hook = GradientCumulativeFp16OptimizerHook(
        grad_clip=dict(max_norm=0.2), cumulative_iters=3)
    runner_1.register_hook(optimizer_hook)
    runner_1.run([loader_1], [('train', 1)])

    # optimize without cumulative_iters
    loader_2_divisible = DataLoader(data[:6], batch_size=3)
    loader_2_remainder = DataLoader(data[6:], batch_size=2)
    runner_2 = build_toy_runner(dict(type='IterBasedRunner', max_iters=3))
    optimizer_hook = Fp16OptimizerHook(grad_clip=dict(max_norm=0.2))
    runner_2.register_hook(optimizer_hook)
    runner_2.run([loader_2_divisible, loader_2_remainder], [('train', 2),
                                                            ('train', 1)])

    # test optimizer works well
    assert (runner_1.model.fc.weight < 1).all()
    assert (runner_1.model.fc.bias < 1).all()
    # test optimizer with cumulative_iters gets the same results
    assert torch.allclose(runner_1.model.fc.weight, runner_2.model.fc.weight)
    assert torch.allclose(runner_1.model.fc.bias, runner_2.model.fc.bias)
    shutil.rmtree(runner_1.work_dir)
    shutil.rmtree(runner_2.work_dir)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_optimizer.py
================================================
import sys
import warnings
from unittest.mock import MagicMock

import pytest
import torch
import torch.nn as nn

from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
from mmcv.runner.optimizer import build_optimizer, build_optimizer_constructor
from mmcv.runner.optimizer.builder import TORCH_OPTIMIZERS
from mmcv.utils.ext_loader import check_ops_exist

OPS_AVAILABLE = check_ops_exist()
if not OPS_AVAILABLE:
    sys.modules['mmcv.ops'] = MagicMock(
        DeformConv2d=dict, ModulatedDeformConv2d=dict)


class SubModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(2, 2, kernel_size=1, groups=2)
        self.gn = nn.GroupNorm(2, 2)
        self.param1 = nn.Parameter(torch.ones(1))

    def forward(self, x):
        return x


class ExampleModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.param1 = nn.Parameter(torch.ones(1))
        self.conv1 = nn.Conv2d(3, 4, kernel_size=1, bias=False)
        self.conv2 = nn.Conv2d(4, 2, kernel_size=1)
        self.bn = nn.BatchNorm2d(2)
        self.sub = SubModel()
        if OPS_AVAILABLE:
            from mmcv.ops import DeformConv2dPack
            self.dcn = DeformConv2dPack(
                3, 4, kernel_size=3, deformable_groups=1)

    def forward(self, x):
        return x


class ExampleDuplicateModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.param1 = nn.Parameter(torch.ones(1))
        self.conv1 = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=False))
        self.conv2 = nn.Sequential(nn.Conv2d(4, 2, kernel_size=1))
        self.bn = nn.BatchNorm2d(2)
        self.sub = SubModel()
        self.conv3 = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=False))
        self.conv3[0] = self.conv1[0]
        if OPS_AVAILABLE:
            from mmcv.ops import DeformConv2dPack
            self.dcn = DeformConv2dPack(
                3, 4, kernel_size=3, deformable_groups=1)

    def forward(self, x):
        return x


class PseudoDataParallel(nn.Module):

    def __init__(self):
        super().__init__()
        self.module = ExampleModel()

    def forward(self, x):
        return x


base_lr = 0.01
base_wd = 0.0001
momentum = 0.9


def check_default_optimizer(optimizer, model, prefix=''):
    assert isinstance(optimizer, torch.optim.SGD)
    assert optimizer.defaults['lr'] == base_lr
    assert optimizer.defaults['momentum'] == momentum
    assert optimizer.defaults['weight_decay'] == base_wd
    param_groups = optimizer.param_groups[0]
    if OPS_AVAILABLE:
        param_names = [
            'param1', 'conv1.weight', 'conv2.weight', 'conv2.bias',
            'bn.weight', 'bn.bias', 'sub.param1', 'sub.conv1.weight',
            'sub.conv1.bias', 'sub.gn.weight', 'sub.gn.bias', 'dcn.weight',
            'dcn.conv_offset.weight', 'dcn.conv_offset.bias'
        ]
    else:
        param_names = [
            'param1', 'conv1.weight', 'conv2.weight', 'conv2.bias',
            'bn.weight', 'bn.bias', 'sub.param1', 'sub.conv1.weight',
            'sub.conv1.bias', 'sub.gn.weight', 'sub.gn.bias'
        ]
    param_dict = dict(model.named_parameters())
    assert len(param_groups['params']) == len(param_names)
    for i in range(len(param_groups['params'])):
        assert torch.equal(param_groups['params'][i],
                           param_dict[prefix + param_names[i]])


def check_sgd_optimizer(optimizer,
                        model,
                        prefix='',
                        bias_lr_mult=1,
                        bias_decay_mult=1,
                        norm_decay_mult=1,
                        dwconv_decay_mult=1,
                        dcn_offset_lr_mult=1,
                        bypass_duplicate=False):
    param_groups = optimizer.param_groups
    assert isinstance(optimizer, torch.optim.SGD)
    assert optimizer.defaults['lr'] == base_lr
    assert optimizer.defaults['momentum'] == momentum
    assert optimizer.defaults['weight_decay'] == base_wd
    model_parameters = list(model.parameters())
    assert len(param_groups) == len(model_parameters)
    for i, param in enumerate(model_parameters):
        param_group = param_groups[i]
        assert torch.equal(param_group['params'][0], param)
        assert param_group['momentum'] == momentum

    # param1
    param1 = param_groups[0]
    assert param1['lr'] == base_lr
    assert param1['weight_decay'] == base_wd
    # conv1.weight
    conv1_weight = param_groups[1]
    assert conv1_weight['lr'] == base_lr
    assert conv1_weight['weight_decay'] == base_wd
    # conv2.weight
    conv2_weight = param_groups[2]
    assert conv2_weight['lr'] == base_lr
    assert conv2_weight['weight_decay'] == base_wd
    # conv2.bias
    conv2_bias = param_groups[3]
    assert conv2_bias['lr'] == base_lr * bias_lr_mult
    assert conv2_bias['weight_decay'] == base_wd * bias_decay_mult
    # bn.weight
    bn_weight = param_groups[4]
    assert bn_weight['lr'] == base_lr
    assert bn_weight['weight_decay'] == base_wd * norm_decay_mult
    # bn.bias
    bn_bias = param_groups[5]
    assert bn_bias['lr'] == base_lr
    assert bn_bias['weight_decay'] == base_wd * norm_decay_mult
    # sub.param1
    sub_param1 = param_groups[6]
    assert sub_param1['lr'] == base_lr
    assert sub_param1['weight_decay'] == base_wd
    # sub.conv1.weight
    sub_conv1_weight = param_groups[7]
    assert sub_conv1_weight['lr'] == base_lr
    assert sub_conv1_weight['weight_decay'] == base_wd * dwconv_decay_mult
    # sub.conv1.bias
    sub_conv1_bias = param_groups[8]
    assert sub_conv1_bias['lr'] == base_lr * bias_lr_mult
    assert sub_conv1_bias['weight_decay'] == base_wd * dwconv_decay_mult
    # sub.gn.weight
    sub_gn_weight = param_groups[9]
    assert sub_gn_weight['lr'] == base_lr
    assert sub_gn_weight['weight_decay'] == base_wd * norm_decay_mult
    # sub.gn.bias
    sub_gn_bias = param_groups[10]
    assert sub_gn_bias['lr'] == base_lr
    assert sub_gn_bias['weight_decay'] == base_wd * norm_decay_mult

    if torch.cuda.is_available():
        dcn_conv_weight = param_groups[11]
        assert dcn_conv_weight['lr'] == base_lr
        assert dcn_conv_weight['weight_decay'] == base_wd

        dcn_offset_weight = param_groups[12]
        assert dcn_offset_weight['lr'] == base_lr * dcn_offset_lr_mult
        assert dcn_offset_weight['weight_decay'] == base_wd

        dcn_offset_bias = param_groups[13]
        assert dcn_offset_bias['lr'] == base_lr * dcn_offset_lr_mult
        assert dcn_offset_bias['weight_decay'] == base_wd


def test_default_optimizer_constructor():
    model = ExampleModel()

    with pytest.raises(TypeError):
        # optimizer_cfg must be a dict
        optimizer_cfg = []
        optim_constructor = DefaultOptimizerConstructor(optimizer_cfg)
        optim_constructor(model)

    with pytest.raises(TypeError):
        # paramwise_cfg must be a dict or None
        optimizer_cfg = dict(lr=0.0001)
        paramwise_cfg = ['error']
        optim_constructor = DefaultOptimizerConstructor(
            optimizer_cfg, paramwise_cfg)
        optim_constructor(model)

    with pytest.raises(ValueError):
        # bias_decay_mult/norm_decay_mult is specified but weight_decay is None
        optimizer_cfg = dict(lr=0.0001, weight_decay=None)
        paramwise_cfg = dict(bias_decay_mult=1, norm_decay_mult=1)
        optim_constructor = DefaultOptimizerConstructor(
            optimizer_cfg, paramwise_cfg)
        optim_constructor(model)

    # basic config with ExampleModel
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg)
    optimizer = optim_constructor(model)
    check_default_optimizer(optimizer, model)

    # basic config with pseudo data parallel
    model = PseudoDataParallel()
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    paramwise_cfg = None
    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg)
    optimizer = optim_constructor(model)
    check_default_optimizer(optimizer, model, prefix='module.')

    # basic config with DataParallel
    if torch.cuda.is_available():
        model = torch.nn.DataParallel(ExampleModel())
        optimizer_cfg = dict(
            type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
        paramwise_cfg = None
        optim_constructor = DefaultOptimizerConstructor(optimizer_cfg)
        optimizer = optim_constructor(model)
        check_default_optimizer(optimizer, model, prefix='module.')

    # Empty paramwise_cfg with ExampleModel
    model = ExampleModel()
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    paramwise_cfg = dict()
    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg,
                                                    paramwise_cfg)
    optimizer = optim_constructor(model)
    check_default_optimizer(optimizer, model)

    # Empty paramwise_cfg with ExampleModel and no grad
    model = ExampleModel()
    for param in model.parameters():
        param.requires_grad = False
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    paramwise_cfg = dict()
    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg)
    optimizer = optim_constructor(model)
    check_default_optimizer(optimizer, model)

    # paramwise_cfg with ExampleModel
    model = ExampleModel()
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    paramwise_cfg = dict(
        bias_lr_mult=2,
        bias_decay_mult=0.5,
        norm_decay_mult=0,
        dwconv_decay_mult=0.1,
        dcn_offset_lr_mult=0.1)
    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg,
                                                    paramwise_cfg)
    optimizer = optim_constructor(model)
    check_sgd_optimizer(optimizer, model, **paramwise_cfg)

    # paramwise_cfg with ExampleModel, weight decay is None
    model = ExampleModel()
    optimizer_cfg = dict(type='Rprop', lr=base_lr)
    paramwise_cfg = dict(bias_lr_mult=2)
    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg,
                                                    paramwise_cfg)
    optimizer = optim_constructor(model)

    param_groups = optimizer.param_groups
    assert isinstance(optimizer, torch.optim.Rprop)
    assert optimizer.defaults['lr'] == base_lr
    model_parameters = list(model.parameters())
    assert len(param_groups) == len(model_parameters)
    for i, param in enumerate(model_parameters):
        param_group = param_groups[i]
        assert torch.equal(param_group['params'][0], param)
    # param1
    assert param_groups[0]['lr'] == base_lr
    # conv1.weight
    assert param_groups[1]['lr'] == base_lr
    # conv2.weight
    assert param_groups[2]['lr'] == base_lr
    # conv2.bias
    assert param_groups[3]['lr'] == base_lr * paramwise_cfg['bias_lr_mult']
    # bn.weight
    assert param_groups[4]['lr'] == base_lr
    # bn.bias
    assert param_groups[5]['lr'] == base_lr
    # sub.param1
    assert param_groups[6]['lr'] == base_lr
    # sub.conv1.weight
    assert param_groups[7]['lr'] == base_lr
    # sub.conv1.bias
    assert param_groups[8]['lr'] == base_lr * paramwise_cfg['bias_lr_mult']
    # sub.gn.weight
    assert param_groups[9]['lr'] == base_lr
    # sub.gn.bias
    assert param_groups[10]['lr'] == base_lr

    if OPS_AVAILABLE:
        # dcn.weight
        assert param_groups[11]['lr'] == base_lr
        # dcn.conv_offset.weight
        assert param_groups[12]['lr'] == base_lr
        # dcn.conv_offset.bias
        assert param_groups[13]['lr'] == base_lr

    # paramwise_cfg with pseudo data parallel
    model = PseudoDataParallel()
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    paramwise_cfg = dict(
        bias_lr_mult=2,
        bias_decay_mult=0.5,
        norm_decay_mult=0,
        dwconv_decay_mult=0.1,
        dcn_offset_lr_mult=0.1)
    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg,
                                                    paramwise_cfg)
    optimizer = optim_constructor(model)
    check_sgd_optimizer(optimizer, model, prefix='module.', **paramwise_cfg)

    # paramwise_cfg with DataParallel
    if torch.cuda.is_available():
        model = torch.nn.DataParallel(ExampleModel())
        optimizer_cfg = dict(
            type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
        paramwise_cfg = dict(
            bias_lr_mult=2,
            bias_decay_mult=0.5,
            norm_decay_mult=0,
            dwconv_decay_mult=0.1,
            dcn_offset_lr_mult=0.1)
        optim_constructor = DefaultOptimizerConstructor(
            optimizer_cfg, paramwise_cfg)
        optimizer = optim_constructor(model)
        check_sgd_optimizer(
            optimizer, model, prefix='module.', **paramwise_cfg)

    # paramwise_cfg with ExampleModel and no grad
    for param in model.parameters():
        param.requires_grad = False
    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg,
                                                    paramwise_cfg)
    optimizer = optim_constructor(model)
    param_groups = optimizer.param_groups
    assert isinstance(optimizer, torch.optim.SGD)
    assert optimizer.defaults['lr'] == base_lr
    assert optimizer.defaults['momentum'] == momentum
    assert optimizer.defaults['weight_decay'] == base_wd
    for i, (name, param) in enumerate(model.named_parameters()):
        param_group = param_groups[i]
        assert torch.equal(param_group['params'][0], param)
        assert param_group['momentum'] == momentum
        assert param_group['lr'] == base_lr
        assert param_group['weight_decay'] == base_wd

    # paramwise_cfg with bypass_duplicate option
    model = ExampleDuplicateModel()
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    paramwise_cfg = dict(
        bias_lr_mult=2,
        bias_decay_mult=0.5,
        norm_decay_mult=0,
        dwconv_decay_mult=0.1)
    with pytest.raises(ValueError) as excinfo:
        optim_constructor = DefaultOptimizerConstructor(
            optimizer_cfg, paramwise_cfg)
        optim_constructor(model)
        assert 'some parameters appear in more than one parameter ' \
               'group' == excinfo.value

    paramwise_cfg = dict(
        bias_lr_mult=2,
        bias_decay_mult=0.5,
        norm_decay_mult=0,
        dwconv_decay_mult=0.1,
        dcn_offset_lr_mult=0.1,
        bypass_duplicate=True)
    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg,
                                                    paramwise_cfg)
    with warnings.catch_warnings(record=True) as w:
        optimizer = optim_constructor(model)
        warnings.simplefilter('always')
        assert len(w) == 1
        assert str(w[0].message) == 'conv3.0 is duplicate. It is skipped ' \
                                    'since bypass_duplicate=True'
    model_parameters = list(model.parameters())
    num_params = 14 if OPS_AVAILABLE else 11
    assert len(optimizer.param_groups) == len(model_parameters) == num_params
    check_sgd_optimizer(optimizer, model, **paramwise_cfg)

    # test DefaultOptimizerConstructor with custom_keys and ExampleModel
    model = ExampleModel()
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    paramwise_cfg = dict(
        custom_keys={
            'param1': dict(lr_mult=10),
            'sub': dict(lr_mult=0.1, decay_mult=0),
            'sub.gn': dict(lr_mult=0.01),
            'non_exist_key': dict(lr_mult=0.0)
        },
        norm_decay_mult=0.5)

    with pytest.raises(TypeError):
        # custom_keys should be a dict
        paramwise_cfg_ = dict(custom_keys=[0.1, 0.0001])
        optim_constructor = DefaultOptimizerConstructor(
            optimizer_cfg, paramwise_cfg_)
        optimizer = optim_constructor(model)

    with pytest.raises(ValueError):
        # if 'decay_mult' is specified in custom_keys, weight_decay should be
        # specified
        optimizer_cfg_ = dict(type='SGD', lr=0.01)
        paramwise_cfg_ = dict(custom_keys={'.backbone': dict(decay_mult=0.5)})
        optim_constructor = DefaultOptimizerConstructor(
            optimizer_cfg_, paramwise_cfg_)
        optimizer = optim_constructor(model)

    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg,
                                                    paramwise_cfg)
    optimizer = optim_constructor(model)
    # check optimizer type and default config
    assert isinstance(optimizer, torch.optim.SGD)
    assert optimizer.defaults['lr'] == base_lr
    assert optimizer.defaults['momentum'] == momentum
    assert optimizer.defaults['weight_decay'] == base_wd

    # check params groups
    param_groups = optimizer.param_groups

    groups = []
    group_settings = []
    # group 1, matches of 'param1'
    # 'param1' is the longest match for 'sub.param1'
    groups.append(['param1', 'sub.param1'])
    group_settings.append({
        'lr': base_lr * 10,
        'momentum': momentum,
        'weight_decay': base_wd,
    })
    # group 2, matches of 'sub.gn'
    groups.append(['sub.gn.weight', 'sub.gn.bias'])
    group_settings.append({
        'lr': base_lr * 0.01,
        'momentum': momentum,
        'weight_decay': base_wd,
    })
    # group 3, matches of 'sub'
    groups.append(['sub.conv1.weight', 'sub.conv1.bias'])
    group_settings.append({
        'lr': base_lr * 0.1,
        'momentum': momentum,
        'weight_decay': 0,
    })
    # group 4, bn is configured by 'norm_decay_mult'
    groups.append(['bn.weight', 'bn.bias'])
    group_settings.append({
        'lr': base_lr,
        'momentum': momentum,
        'weight_decay': base_wd * 0.5,
    })
    # group 5, default group
    groups.append(['conv1.weight', 'conv2.weight', 'conv2.bias'])
    group_settings.append({
        'lr': base_lr,
        'momentum': momentum,
        'weight_decay': base_wd
    })

    num_params = 14 if OPS_AVAILABLE else 11
    assert len(param_groups) == num_params
    for i, (name, param) in enumerate(model.named_parameters()):
        assert torch.equal(param_groups[i]['params'][0], param)
        for group, settings in zip(groups, group_settings):
            if name in group:
                for setting in settings:
                    assert param_groups[i][setting] == settings[
                        setting], f'{name} {setting}'

    # test DefaultOptimizerConstructor with custom_keys and ExampleModel 2
    model = ExampleModel()
    optimizer_cfg = dict(type='SGD', lr=base_lr, momentum=momentum)
    paramwise_cfg = dict(custom_keys={'param1': dict(lr_mult=10)})

    optim_constructor = DefaultOptimizerConstructor(optimizer_cfg,
                                                    paramwise_cfg)
    optimizer = optim_constructor(model)
    # check optimizer type and default config
    assert isinstance(optimizer, torch.optim.SGD)
    assert optimizer.defaults['lr'] == base_lr
    assert optimizer.defaults['momentum'] == momentum
    assert optimizer.defaults['weight_decay'] == 0

    # check params groups
    param_groups = optimizer.param_groups

    groups = []
    group_settings = []
    # group 1, matches of 'param1'
    groups.append(['param1', 'sub.param1'])
    group_settings.append({
        'lr': base_lr * 10,
        'momentum': momentum,
        'weight_decay': 0,
    })
    # group 2, default group
    groups.append([
        'sub.conv1.weight', 'sub.conv1.bias', 'sub.gn.weight', 'sub.gn.bias',
        'conv1.weight', 'conv2.weight', 'conv2.bias', 'bn.weight', 'bn.bias'
    ])
    group_settings.append({
        'lr': base_lr,
        'momentum': momentum,
        'weight_decay': 0
    })

    num_params = 14 if OPS_AVAILABLE else 11
    assert len(param_groups) == num_params
    for i, (name, param) in enumerate(model.named_parameters()):
        assert torch.equal(param_groups[i]['params'][0], param)
        for group, settings in zip(groups, group_settings):
            if name in group:
                for setting in settings:
                    assert param_groups[i][setting] == settings[
                        setting], f'{name} {setting}'


def test_torch_optimizers():
    torch_optimizers = [
        'ASGD', 'Adadelta', 'Adagrad', 'Adam', 'AdamW', 'Adamax', 'LBFGS',
        'Optimizer', 'RMSprop', 'Rprop', 'SGD', 'SparseAdam'
    ]
    assert set(torch_optimizers).issubset(set(TORCH_OPTIMIZERS))


def test_build_optimizer_constructor():
    model = ExampleModel()
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    paramwise_cfg = dict(
        bias_lr_mult=2,
        bias_decay_mult=0.5,
        norm_decay_mult=0,
        dwconv_decay_mult=0.1,
        dcn_offset_lr_mult=0.1)
    optim_constructor_cfg = dict(
        type='DefaultOptimizerConstructor',
        optimizer_cfg=optimizer_cfg,
        paramwise_cfg=paramwise_cfg)
    optim_constructor = build_optimizer_constructor(optim_constructor_cfg)
    optimizer = optim_constructor(model)
    check_sgd_optimizer(optimizer, model, **paramwise_cfg)

    from mmcv.runner import OPTIMIZERS
    from mmcv.utils import build_from_cfg

    @OPTIMIZER_BUILDERS.register_module()
    class MyOptimizerConstructor(DefaultOptimizerConstructor):

        def __call__(self, model):
            if hasattr(model, 'module'):
                model = model.module

            conv1_lr_mult = self.paramwise_cfg.get('conv1_lr_mult', 1.)

            params = []
            for name, param in model.named_parameters():
                param_group = {'params': [param]}
                if name.startswith('conv1') and param.requires_grad:
                    param_group['lr'] = self.base_lr * conv1_lr_mult
                params.append(param_group)
            optimizer_cfg['params'] = params

            return build_from_cfg(optimizer_cfg, OPTIMIZERS)

    paramwise_cfg = dict(conv1_lr_mult=5)
    optim_constructor_cfg = dict(
        type='MyOptimizerConstructor',
        optimizer_cfg=optimizer_cfg,
        paramwise_cfg=paramwise_cfg)
    optim_constructor = build_optimizer_constructor(optim_constructor_cfg)
    optimizer = optim_constructor(model)

    param_groups = optimizer.param_groups
    assert isinstance(optimizer, torch.optim.SGD)
    assert optimizer.defaults['lr'] == base_lr
    assert optimizer.defaults['momentum'] == momentum
    assert optimizer.defaults['weight_decay'] == base_wd
    for i, param in enumerate(model.parameters()):
        param_group = param_groups[i]
        assert torch.equal(param_group['params'][0], param)
        assert param_group['momentum'] == momentum
    # conv1.weight
    assert param_groups[1]['lr'] == base_lr * paramwise_cfg['conv1_lr_mult']
    assert param_groups[1]['weight_decay'] == base_wd


def test_build_optimizer():
    model = ExampleModel()
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    optimizer = build_optimizer(model, optimizer_cfg)
    check_default_optimizer(optimizer, model)

    model = ExampleModel()
    optimizer_cfg = dict(
        type='SGD',
        lr=base_lr,
        weight_decay=base_wd,
        momentum=momentum,
        paramwise_cfg=dict(
            bias_lr_mult=2,
            bias_decay_mult=0.5,
            norm_decay_mult=0,
            dwconv_decay_mult=0.1,
            dcn_offset_lr_mult=0.1))
    optimizer = build_optimizer(model, optimizer_cfg)
    check_sgd_optimizer(optimizer, model, **optimizer_cfg['paramwise_cfg'])


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_runner.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import logging
import os
import os.path as osp
import platform
import random
import string
import tempfile

import pytest
import torch
import torch.nn as nn

from mmcv.parallel import MMDataParallel
from mmcv.runner import (RUNNERS, EpochBasedRunner, IterBasedRunner,
                         build_runner)
from mmcv.runner.hooks import IterTimerHook


class OldStyleModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(3, 3, 1)


class Model(OldStyleModel):

    def train_step(self):
        pass

    def val_step(self):
        pass


def test_build_runner():
    temp_root = tempfile.gettempdir()
    dir_name = ''.join(
        [random.choice(string.ascii_letters) for _ in range(10)])

    default_args = dict(
        model=Model(),
        work_dir=osp.join(temp_root, dir_name),
        logger=logging.getLogger())
    cfg = dict(type='EpochBasedRunner', max_epochs=1)
    runner = build_runner(cfg, default_args=default_args)
    assert runner._max_epochs == 1
    cfg = dict(type='IterBasedRunner', max_iters=1)
    runner = build_runner(cfg, default_args=default_args)
    assert runner._max_iters == 1

    with pytest.raises(ValueError, match='Only one of'):
        cfg = dict(type='IterBasedRunner', max_epochs=1, max_iters=1)
        runner = build_runner(cfg, default_args=default_args)


@pytest.mark.parametrize('runner_class', RUNNERS.module_dict.values())
def test_epoch_based_runner(runner_class):

    with pytest.warns(DeprecationWarning):
        # batch_processor is deprecated
        model = OldStyleModel()

        def batch_processor():
            pass

        _ = runner_class(model, batch_processor, logger=logging.getLogger())

    with pytest.raises(TypeError):
        # batch_processor must be callable
        model = OldStyleModel()
        _ = runner_class(model, batch_processor=0, logger=logging.getLogger())

    with pytest.raises(TypeError):
        # optimizer must be a optimizer or a dict of optimizers
        model = Model()
        optimizer = 'NotAOptimizer'
        _ = runner_class(
            model, optimizer=optimizer, logger=logging.getLogger())

    with pytest.raises(TypeError):
        # optimizer must be a optimizer or a dict of optimizers
        model = Model()
        optimizers = dict(optim1=torch.optim.Adam(), optim2='NotAOptimizer')
        _ = runner_class(
            model, optimizer=optimizers, logger=logging.getLogger())

    with pytest.raises(TypeError):
        # logger must be a logging.Logger
        model = Model()
        _ = runner_class(model, logger=None)

    with pytest.raises(TypeError):
        # meta must be a dict or None
        model = Model()
        _ = runner_class(model, logger=logging.getLogger(), meta=['list'])

    with pytest.raises(AssertionError):
        # model must implement the method train_step()
        model = OldStyleModel()
        _ = runner_class(model, logger=logging.getLogger())

    with pytest.raises(TypeError):
        # work_dir must be a str or None
        model = Model()
        _ = runner_class(model, work_dir=1, logger=logging.getLogger())

    with pytest.raises(RuntimeError):
        # batch_processor and train_step() cannot be both set

        def batch_processor():
            pass

        model = Model()
        _ = runner_class(model, batch_processor, logger=logging.getLogger())

    # test work_dir
    model = Model()
    temp_root = tempfile.gettempdir()
    dir_name = ''.join(
        [random.choice(string.ascii_letters) for _ in range(10)])
    work_dir = osp.join(temp_root, dir_name)
    _ = runner_class(model, work_dir=work_dir, logger=logging.getLogger())
    assert osp.isdir(work_dir)
    _ = runner_class(model, work_dir=work_dir, logger=logging.getLogger())
    assert osp.isdir(work_dir)
    os.removedirs(work_dir)


@pytest.mark.parametrize('runner_class', RUNNERS.module_dict.values())
def test_runner_with_parallel(runner_class):

    def batch_processor():
        pass

    model = MMDataParallel(OldStyleModel())
    _ = runner_class(model, batch_processor, logger=logging.getLogger())

    model = MMDataParallel(Model())
    _ = runner_class(model, logger=logging.getLogger())

    with pytest.raises(RuntimeError):
        # batch_processor and train_step() cannot be both set

        def batch_processor():
            pass

        model = MMDataParallel(Model())
        _ = runner_class(model, batch_processor, logger=logging.getLogger())


@pytest.mark.parametrize('runner_class', RUNNERS.module_dict.values())
def test_save_checkpoint(runner_class):
    model = Model()
    runner = runner_class(model=model, logger=logging.getLogger())

    with pytest.raises(TypeError):
        # meta should be None or dict
        runner.save_checkpoint('.', meta=list())

    with tempfile.TemporaryDirectory() as root:
        runner.save_checkpoint(root)

        latest_path = osp.join(root, 'latest.pth')
        assert osp.exists(latest_path)

        if isinstance(runner, EpochBasedRunner):
            first_ckp_path = osp.join(root, 'epoch_1.pth')
        elif isinstance(runner, IterBasedRunner):
            first_ckp_path = osp.join(root, 'iter_1.pth')

        assert osp.exists(first_ckp_path)

        if platform.system() != 'Windows':
            assert osp.realpath(latest_path) == osp.realpath(first_ckp_path)
        else:
            # use copy instead of symlink on windows
            pass

        torch.load(latest_path)


@pytest.mark.parametrize('runner_class', RUNNERS.module_dict.values())
def test_build_lr_momentum_hook(runner_class):
    model = Model()
    runner = runner_class(model=model, logger=logging.getLogger())

    # test policy that is already title
    lr_config = dict(
        policy='CosineAnnealing',
        by_epoch=False,
        min_lr_ratio=0,
        warmup_iters=2,
        warmup_ratio=0.9)
    runner.register_lr_hook(lr_config)
    assert len(runner.hooks) == 1

    # test policy that is already title
    lr_config = dict(
        policy='Cyclic',
        by_epoch=False,
        target_ratio=(10, 1),
        cyclic_times=1,
        step_ratio_up=0.4)
    runner.register_lr_hook(lr_config)
    assert len(runner.hooks) == 2

    # test policy that is not title
    lr_config = dict(
        policy='cyclic',
        by_epoch=False,
        target_ratio=(0.85 / 0.95, 1),
        cyclic_times=1,
        step_ratio_up=0.4)
    runner.register_lr_hook(lr_config)
    assert len(runner.hooks) == 3

    # test policy that is title
    lr_config = dict(
        policy='Step',
        warmup='linear',
        warmup_iters=500,
        warmup_ratio=1.0 / 3,
        step=[8, 11])
    runner.register_lr_hook(lr_config)
    assert len(runner.hooks) == 4

    # test policy that is not title
    lr_config = dict(
        policy='step',
        warmup='linear',
        warmup_iters=500,
        warmup_ratio=1.0 / 3,
        step=[8, 11])
    runner.register_lr_hook(lr_config)
    assert len(runner.hooks) == 5

    # test policy that is already title
    mom_config = dict(
        policy='CosineAnnealing',
        min_momentum_ratio=0.99 / 0.95,
        by_epoch=False,
        warmup_iters=2,
        warmup_ratio=0.9 / 0.95)
    runner.register_momentum_hook(mom_config)
    assert len(runner.hooks) == 6

    # test policy that is already title
    mom_config = dict(
        policy='Cyclic',
        by_epoch=False,
        target_ratio=(0.85 / 0.95, 1),
        cyclic_times=1,
        step_ratio_up=0.4)
    runner.register_momentum_hook(mom_config)
    assert len(runner.hooks) == 7

    # test policy that is already title
    mom_config = dict(
        policy='cyclic',
        by_epoch=False,
        target_ratio=(0.85 / 0.95, 1),
        cyclic_times=1,
        step_ratio_up=0.4)
    runner.register_momentum_hook(mom_config)
    assert len(runner.hooks) == 8


@pytest.mark.parametrize('runner_class', RUNNERS.module_dict.values())
def test_register_timer_hook(runner_class):
    model = Model()
    runner = runner_class(model=model, logger=logging.getLogger())

    # test register None
    timer_config = None
    runner.register_timer_hook(timer_config)
    assert len(runner.hooks) == 0

    # test register IterTimerHook with config
    timer_config = dict(type='IterTimerHook')
    runner.register_timer_hook(timer_config)
    assert len(runner.hooks) == 1
    assert isinstance(runner.hooks[0], IterTimerHook)

    # test register IterTimerHook
    timer_config = IterTimerHook()
    runner.register_timer_hook(timer_config)
    assert len(runner.hooks) == 2
    assert isinstance(runner.hooks[1], IterTimerHook)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_runner/test_utils.py
================================================
import os
import random

import numpy as np
import torch

from mmcv.runner import set_random_seed
from mmcv.utils import TORCH_VERSION, digit_version

is_rocm_pytorch = False
if digit_version(TORCH_VERSION) >= digit_version('1.5'):
    from torch.utils.cpp_extension import ROCM_HOME
    is_rocm_pytorch = True if ((torch.version.hip is not None) and
                               (ROCM_HOME is not None)) else False


def test_set_random_seed():
    set_random_seed(0)
    a_random = random.randint(0, 10)
    a_np_random = np.random.rand(2, 2)
    a_torch_random = torch.rand(2, 2)
    assert torch.backends.cudnn.deterministic is False
    assert torch.backends.cudnn.benchmark is False
    assert os.environ['PYTHONHASHSEED'] == str(0)

    set_random_seed(0, True)
    b_random = random.randint(0, 10)
    b_np_random = np.random.rand(2, 2)
    b_torch_random = torch.rand(2, 2)
    assert torch.backends.cudnn.deterministic is True
    if is_rocm_pytorch:
        assert torch.backends.cudnn.benchmark is True
    else:
        assert torch.backends.cudnn.benchmark is False

    assert a_random == b_random
    assert np.equal(a_np_random, b_np_random).all()
    assert torch.equal(a_torch_random, b_torch_random)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_config.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import json
import os
import os.path as osp
import shutil
import tempfile
from pathlib import Path

import pytest
import yaml

from mmcv import Config, ConfigDict, DictAction, dump, load

data_path = osp.join(osp.dirname(osp.dirname(__file__)), 'data')


def test_construct():
    cfg = Config()
    assert cfg.filename is None
    assert cfg.text == ''
    assert len(cfg) == 0
    assert cfg._cfg_dict == {}

    with pytest.raises(TypeError):
        Config([0, 1])

    cfg_dict = dict(item1=[1, 2], item2=dict(a=0), item3=True, item4='test')
    # test a.py
    cfg_file = osp.join(data_path, 'config/a.py')
    cfg = Config(cfg_dict, filename=cfg_file)
    assert isinstance(cfg, Config)
    assert cfg.filename == cfg_file
    assert cfg.text == open(cfg_file, 'r').read()
    assert cfg.dump() == cfg.pretty_text
    with tempfile.TemporaryDirectory() as temp_config_dir:
        dump_file = osp.join(temp_config_dir, 'a.py')
        cfg.dump(dump_file)
        assert cfg.dump() == open(dump_file, 'r').read()
        assert Config.fromfile(dump_file)

    # test b.json
    cfg_file = osp.join(data_path, 'config/b.json')
    cfg = Config(cfg_dict, filename=cfg_file)
    assert isinstance(cfg, Config)
    assert cfg.filename == cfg_file
    assert cfg.text == open(cfg_file, 'r').read()
    assert cfg.dump() == json.dumps(cfg_dict)
    with tempfile.TemporaryDirectory() as temp_config_dir:
        dump_file = osp.join(temp_config_dir, 'b.json')
        cfg.dump(dump_file)
        assert cfg.dump() == open(dump_file, 'r').read()
        assert Config.fromfile(dump_file)

    # test c.yaml
    cfg_file = osp.join(data_path, 'config/c.yaml')
    cfg = Config(cfg_dict, filename=cfg_file)
    assert isinstance(cfg, Config)
    assert cfg.filename == cfg_file
    assert cfg.text == open(cfg_file, 'r').read()
    assert cfg.dump() == yaml.dump(cfg_dict)
    with tempfile.TemporaryDirectory() as temp_config_dir:
        dump_file = osp.join(temp_config_dir, 'c.yaml')
        cfg.dump(dump_file)
        assert cfg.dump() == open(dump_file, 'r').read()
        assert Config.fromfile(dump_file)

    # test h.py
    cfg_file = osp.join(data_path, 'config/h.py')
    path = osp.join(osp.dirname(__file__), 'data', 'config')
    # the value of osp.dirname(__file__) may be `D:\a\xxx` in windows
    # environment. When dumping the cfg_dict to file, `D:\a\xxx` will be
    # converted to `D:\x07\xxx` and it will cause unexpected result when
    # checking whether `D:\a\xxx` equals to `D:\x07\xxx`. Therefore, we forcely
    # convert a string representation of the path with forward slashes (/)
    path = Path(path).as_posix()
    cfg_dict = dict(item1='h.py', item2=path, item3='abc_h')
    cfg = Config(cfg_dict, filename=cfg_file)
    assert isinstance(cfg, Config)
    assert cfg.filename == cfg_file
    assert cfg.text == open(cfg_file, 'r').read()
    assert cfg.dump() == cfg.pretty_text
    with tempfile.TemporaryDirectory() as temp_config_dir:
        dump_file = osp.join(temp_config_dir, 'h.py')
        cfg.dump(dump_file)
        assert cfg.dump() == open(dump_file, 'r').read()
        assert Config.fromfile(dump_file)
        assert Config.fromfile(dump_file)['item1'] == cfg_dict['item1']
        assert Config.fromfile(dump_file)['item2'] == cfg_dict['item2']
        assert Config.fromfile(dump_file)['item3'] == cfg_dict['item3']

    # test no use_predefined_variable
    cfg_dict = dict(
        item1='{{fileBasename}}',
        item2='{{ fileDirname}}',
        item3='abc_{{ fileBasenameNoExtension }}')
    assert Config.fromfile(cfg_file, False)
    assert Config.fromfile(cfg_file, False)['item1'] == cfg_dict['item1']
    assert Config.fromfile(cfg_file, False)['item2'] == cfg_dict['item2']
    assert Config.fromfile(cfg_file, False)['item3'] == cfg_dict['item3']

    # test p.yaml
    cfg_file = osp.join(data_path, 'config/p.yaml')
    cfg_dict = dict(item1=osp.join(osp.dirname(__file__), 'data', 'config'))
    cfg = Config(cfg_dict, filename=cfg_file)
    assert isinstance(cfg, Config)
    assert cfg.filename == cfg_file
    assert cfg.text == open(cfg_file, 'r').read()
    assert cfg.dump() == yaml.dump(cfg_dict)
    with tempfile.TemporaryDirectory() as temp_config_dir:
        dump_file = osp.join(temp_config_dir, 'p.yaml')
        cfg.dump(dump_file)
        assert cfg.dump() == open(dump_file, 'r').read()
        assert Config.fromfile(dump_file)
        assert Config.fromfile(dump_file)['item1'] == cfg_dict['item1']

    # test no use_predefined_variable
    assert Config.fromfile(cfg_file, False)
    assert Config.fromfile(cfg_file, False)['item1'] == '{{ fileDirname }}'

    # test o.json
    cfg_file = osp.join(data_path, 'config/o.json')
    cfg_dict = dict(item1=osp.join(osp.dirname(__file__), 'data', 'config'))
    cfg = Config(cfg_dict, filename=cfg_file)
    assert isinstance(cfg, Config)
    assert cfg.filename == cfg_file
    assert cfg.text == open(cfg_file, 'r').read()
    assert cfg.dump() == json.dumps(cfg_dict)
    with tempfile.TemporaryDirectory() as temp_config_dir:
        dump_file = osp.join(temp_config_dir, 'o.json')
        cfg.dump(dump_file)
        assert cfg.dump() == open(dump_file, 'r').read()
        assert Config.fromfile(dump_file)
        assert Config.fromfile(dump_file)['item1'] == cfg_dict['item1']

    # test no use_predefined_variable
    assert Config.fromfile(cfg_file, False)
    assert Config.fromfile(cfg_file, False)['item1'] == '{{ fileDirname }}'


def test_fromfile():
    for filename in ['a.py', 'a.b.py', 'b.json', 'c.yaml']:
        cfg_file = osp.join(data_path, 'config', filename)
        cfg = Config.fromfile(cfg_file)
        assert isinstance(cfg, Config)
        assert cfg.filename == cfg_file
        assert cfg.text == osp.abspath(osp.expanduser(cfg_file)) + '\n' + \
            open(cfg_file, 'r').read()

    # test custom_imports for Config.fromfile
    cfg_file = osp.join(data_path, 'config', 'q.py')
    imported_file = osp.join(data_path, 'config', 'r.py')
    target_pkg = osp.join(osp.dirname(__file__), 'r.py')

    # Since the imported config will be regarded as a tmp file
    # it should be copied to the directory at the same level
    shutil.copy(imported_file, target_pkg)
    Config.fromfile(cfg_file, import_custom_modules=True)

    assert os.environ.pop('TEST_VALUE') == 'test'
    os.remove(target_pkg)

    with pytest.raises(FileNotFoundError):
        Config.fromfile('no_such_file.py')
    with pytest.raises(IOError):
        Config.fromfile(osp.join(data_path, 'color.jpg'))


def test_fromstring():
    for filename in ['a.py', 'a.b.py', 'b.json', 'c.yaml']:
        cfg_file = osp.join(data_path, 'config', filename)
        file_format = osp.splitext(filename)[-1]
        in_cfg = Config.fromfile(cfg_file)

        out_cfg = Config.fromstring(in_cfg.pretty_text, '.py')
        assert in_cfg._cfg_dict == out_cfg._cfg_dict

        cfg_str = open(cfg_file, 'r').read()
        out_cfg = Config.fromstring(cfg_str, file_format)
        assert in_cfg._cfg_dict == out_cfg._cfg_dict

    # test pretty_text only supports py file format
    cfg_file = osp.join(data_path, 'config', 'b.json')
    in_cfg = Config.fromfile(cfg_file)
    with pytest.raises(Exception):
        Config.fromstring(in_cfg.pretty_text, '.json')

    # test file format error
    cfg_str = open(cfg_file, 'r').read()
    with pytest.raises(Exception):
        Config.fromstring(cfg_str, '.py')


def test_merge_from_base():
    cfg_file = osp.join(data_path, 'config/d.py')
    cfg = Config.fromfile(cfg_file)
    assert isinstance(cfg, Config)
    assert cfg.filename == cfg_file
    base_cfg_file = osp.join(data_path, 'config/base.py')
    merge_text = osp.abspath(osp.expanduser(base_cfg_file)) + '\n' + \
        open(base_cfg_file, 'r').read()
    merge_text += '\n' + osp.abspath(osp.expanduser(cfg_file)) + '\n' + \
                  open(cfg_file, 'r').read()
    assert cfg.text == merge_text
    assert cfg.item1 == [2, 3]
    assert cfg.item2.a == 1
    assert cfg.item3 is False
    assert cfg.item4 == 'test_base'

    with pytest.raises(TypeError):
        Config.fromfile(osp.join(data_path, 'config/e.py'))


def test_merge_from_multiple_bases():
    cfg_file = osp.join(data_path, 'config/l.py')
    cfg = Config.fromfile(cfg_file)
    assert isinstance(cfg, Config)
    assert cfg.filename == cfg_file
    # cfg.field
    assert cfg.item1 == [1, 2]
    assert cfg.item2.a == 0
    assert cfg.item3 is False
    assert cfg.item4 == 'test'
    assert cfg.item5 == dict(a=0, b=1)
    assert cfg.item6 == [dict(a=0), dict(b=1)]
    assert cfg.item7 == dict(a=[0, 1, 2], b=dict(c=[3.1, 4.2, 5.3]))

    with pytest.raises(KeyError):
        Config.fromfile(osp.join(data_path, 'config/m.py'))


def test_base_variables():
    for file in ['t.py', 't.json', 't.yaml']:
        cfg_file = osp.join(data_path, f'config/{file}')
        cfg = Config.fromfile(cfg_file)
        assert isinstance(cfg, Config)
        assert cfg.filename == cfg_file
        # cfg.field
        assert cfg.item1 == [1, 2]
        assert cfg.item2.a == 0
        assert cfg.item3 is False
        assert cfg.item4 == 'test'
        assert cfg.item5 == dict(a=0, b=1)
        assert cfg.item6 == [dict(a=0), dict(b=1)]
        assert cfg.item7 == dict(a=[0, 1, 2], b=dict(c=[3.1, 4.2, 5.3]))
        assert cfg.item8 == file
        assert cfg.item9 == dict(a=0)
        assert cfg.item10 == [3.1, 4.2, 5.3]

    # test nested base
    for file in ['u.py', 'u.json', 'u.yaml']:
        cfg_file = osp.join(data_path, f'config/{file}')
        cfg = Config.fromfile(cfg_file)
        assert isinstance(cfg, Config)
        assert cfg.filename == cfg_file
        # cfg.field
        assert cfg.base == '_base_.item8'
        assert cfg.item1 == [1, 2]
        assert cfg.item2.a == 0
        assert cfg.item3 is False
        assert cfg.item4 == 'test'
        assert cfg.item5 == dict(a=0, b=1)
        assert cfg.item6 == [dict(a=0), dict(b=1)]
        assert cfg.item7 == dict(a=[0, 1, 2], b=dict(c=[3.1, 4.2, 5.3]))
        assert cfg.item8 == 't.py'
        assert cfg.item9 == dict(a=0)
        assert cfg.item10 == [3.1, 4.2, 5.3]
        assert cfg.item11 == 't.py'
        assert cfg.item12 == dict(a=0)
        assert cfg.item13 == [3.1, 4.2, 5.3]
        assert cfg.item14 == [1, 2]
        assert cfg.item15 == dict(
            a=dict(b=dict(a=0)),
            b=[False],
            c=['test'],
            d=[[{
                'e': 0
            }], [{
                'a': 0
            }, {
                'b': 1
            }]],
            e=[1, 2])

    # test reference assignment for py
    cfg_file = osp.join(data_path, 'config/v.py')
    cfg = Config.fromfile(cfg_file)
    assert isinstance(cfg, Config)
    assert cfg.filename == cfg_file
    assert cfg.item21 == 't.py'
    assert cfg.item22 == 't.py'
    assert cfg.item23 == [3.1, 4.2, 5.3]
    assert cfg.item24 == [3.1, 4.2, 5.3]
    assert cfg.item25 == dict(
        a=dict(b=[3.1, 4.2, 5.3]),
        b=[[3.1, 4.2, 5.3]],
        c=[[{
            'e': 't.py'
        }], [{
            'a': 0
        }, {
            'b': 1
        }]],
        e='t.py')


def test_merge_recursive_bases():
    cfg_file = osp.join(data_path, 'config/f.py')
    cfg = Config.fromfile(cfg_file)
    assert isinstance(cfg, Config)
    assert cfg.filename == cfg_file
    # cfg.field
    assert cfg.item1 == [2, 3]
    assert cfg.item2.a == 1
    assert cfg.item3 is False
    assert cfg.item4 == 'test_recursive_bases'


def test_merge_from_dict():
    cfg_file = osp.join(data_path, 'config/a.py')
    cfg = Config.fromfile(cfg_file)
    input_options = {'item2.a': 1, 'item2.b': 0.1, 'item3': False}
    cfg.merge_from_dict(input_options)
    assert cfg.item2 == dict(a=1, b=0.1)
    assert cfg.item3 is False

    cfg_file = osp.join(data_path, 'config/s.py')
    cfg = Config.fromfile(cfg_file)

    # Allow list keys
    input_options = {'item.0.a': 1, 'item.1.b': 1}
    cfg.merge_from_dict(input_options, allow_list_keys=True)
    assert cfg.item == [{'a': 1}, {'b': 1, 'c': 0}]

    # allow_list_keys is False
    input_options = {'item.0.a': 1, 'item.1.b': 1}
    with pytest.raises(TypeError):
        cfg.merge_from_dict(input_options, allow_list_keys=False)

    # Overflowed index number
    input_options = {'item.2.a': 1}
    with pytest.raises(KeyError):
        cfg.merge_from_dict(input_options, allow_list_keys=True)


def test_merge_delete():
    cfg_file = osp.join(data_path, 'config/delete.py')
    cfg = Config.fromfile(cfg_file)
    # cfg.field
    assert cfg.item1 == dict(a=0)
    assert cfg.item2 == dict(a=0, b=0)
    assert cfg.item3 is True
    assert cfg.item4 == 'test'
    assert '_delete_' not in cfg.item2

    # related issue: https://github.com/open-mmlab/mmcv/issues/1570
    assert type(cfg.item1) == ConfigDict
    assert type(cfg.item2) == ConfigDict


def test_merge_intermediate_variable():

    cfg_file = osp.join(data_path, 'config/i_child.py')
    cfg = Config.fromfile(cfg_file)
    # cfg.field
    assert cfg.item1 == [1, 2]
    assert cfg.item2 == dict(a=0)
    assert cfg.item3 is True
    assert cfg.item4 == 'test'
    assert cfg.item_cfg == dict(b=2)
    assert cfg.item5 == dict(cfg=dict(b=1))
    assert cfg.item6 == dict(cfg=dict(b=2))


def test_fromfile_in_config():
    cfg_file = osp.join(data_path, 'config/code.py')
    cfg = Config.fromfile(cfg_file)
    # cfg.field
    assert cfg.cfg.item1 == [1, 2]
    assert cfg.cfg.item2 == dict(a=0)
    assert cfg.cfg.item3 is True
    assert cfg.cfg.item4 == 'test'
    assert cfg.item5 == 1


def test_dict():
    cfg_dict = dict(item1=[1, 2], item2=dict(a=0), item3=True, item4='test')

    for filename in ['a.py', 'b.json', 'c.yaml']:
        cfg_file = osp.join(data_path, 'config', filename)
        cfg = Config.fromfile(cfg_file)

        # len(cfg)
        assert len(cfg) == 4
        # cfg.keys()
        assert set(cfg.keys()) == set(cfg_dict.keys())
        assert set(cfg._cfg_dict.keys()) == set(cfg_dict.keys())
        # cfg.values()
        for value in cfg.values():
            assert value in cfg_dict.values()
        # cfg.items()
        for name, value in cfg.items():
            assert name in cfg_dict
            assert value in cfg_dict.values()
        # cfg.field
        assert cfg.item1 == cfg_dict['item1']
        assert cfg.item2 == cfg_dict['item2']
        assert cfg.item2.a == 0
        assert cfg.item3 == cfg_dict['item3']
        assert cfg.item4 == cfg_dict['item4']
        with pytest.raises(AttributeError):
            cfg.not_exist
        # field in cfg, cfg[field], cfg.get()
        for name in ['item1', 'item2', 'item3', 'item4']:
            assert name in cfg
            assert cfg[name] == cfg_dict[name]
            assert cfg.get(name) == cfg_dict[name]
            assert cfg.get('not_exist') is None
            assert cfg.get('not_exist', 0) == 0
            with pytest.raises(KeyError):
                cfg['not_exist']
        assert 'item1' in cfg
        assert 'not_exist' not in cfg
        # cfg.update()
        cfg.update(dict(item1=0))
        assert cfg.item1 == 0
        cfg.update(dict(item2=dict(a=1)))
        assert cfg.item2.a == 1


def test_setattr():
    cfg = Config()
    cfg.item1 = [1, 2]
    cfg.item2 = {'a': 0}
    cfg['item5'] = {'a': {'b': None}}
    assert cfg._cfg_dict['item1'] == [1, 2]
    assert cfg.item1 == [1, 2]
    assert cfg._cfg_dict['item2'] == {'a': 0}
    assert cfg.item2.a == 0
    assert cfg._cfg_dict['item5'] == {'a': {'b': None}}
    assert cfg.item5.a.b is None


def test_pretty_text():
    cfg_file = osp.join(data_path, 'config/l.py')
    cfg = Config.fromfile(cfg_file)
    with tempfile.TemporaryDirectory() as temp_config_dir:
        text_cfg_filename = osp.join(temp_config_dir, '_text_config.py')
        with open(text_cfg_filename, 'w') as f:
            f.write(cfg.pretty_text)
        text_cfg = Config.fromfile(text_cfg_filename)
    assert text_cfg._cfg_dict == cfg._cfg_dict


def test_dict_action():
    parser = argparse.ArgumentParser(description='Train a detector')
    parser.add_argument(
        '--options', nargs='+', action=DictAction, help='custom options')
    # Nested brackets
    args = parser.parse_args(
        ['--options', 'item2.a=a,b', 'item2.b=[(a,b), [1,2], false]'])
    out_dict = {'item2.a': ['a', 'b'], 'item2.b': [('a', 'b'), [1, 2], False]}
    assert args.options == out_dict
    # Single Nested brackets
    args = parser.parse_args(['--options', 'item2.a=[[1]]'])
    out_dict = {'item2.a': [[1]]}
    assert args.options == out_dict
    # Imbalance bracket
    with pytest.raises(AssertionError):
        parser.parse_args(['--options', 'item2.a=[(a,b), [1,2], false'])
    # Normal values
    args = parser.parse_args(
        ['--options', 'item2.a=1', 'item2.b=0.1', 'item2.c=x', 'item3=false'])
    out_dict = {'item2.a': 1, 'item2.b': 0.1, 'item2.c': 'x', 'item3': False}
    assert args.options == out_dict
    cfg_file = osp.join(data_path, 'config/a.py')
    cfg = Config.fromfile(cfg_file)
    cfg.merge_from_dict(args.options)
    assert cfg.item2 == dict(a=1, b=0.1, c='x')
    assert cfg.item3 is False


def test_dump_mapping():
    cfg_file = osp.join(data_path, 'config/n.py')
    cfg = Config.fromfile(cfg_file)

    with tempfile.TemporaryDirectory() as temp_config_dir:
        text_cfg_filename = osp.join(temp_config_dir, '_text_config.py')
        cfg.dump(text_cfg_filename)
        text_cfg = Config.fromfile(text_cfg_filename)

    assert text_cfg._cfg_dict == cfg._cfg_dict


def test_reserved_key():
    cfg_file = osp.join(data_path, 'config/g.py')
    with pytest.raises(KeyError):
        Config.fromfile(cfg_file)


def test_syntax_error():
    # the name can not be used to open the file a second time in windows,
    # so `delete` should be set as `False` and we need to manually remove it
    # more details can be found at https://github.com/open-mmlab/mmcv/pull/1077
    temp_cfg_file = tempfile.NamedTemporaryFile(suffix='.py', delete=False)
    temp_cfg_path = temp_cfg_file.name
    # write a file with syntax error
    with open(temp_cfg_path, 'w') as f:
        f.write('a=0b=dict(c=1)')
    with pytest.raises(
            SyntaxError, match='There are syntax errors in config file'):
        Config.fromfile(temp_cfg_path)
    temp_cfg_file.close()
    os.remove(temp_cfg_path)


def test_pickle_support():
    cfg_file = osp.join(data_path, 'config/n.py')
    cfg = Config.fromfile(cfg_file)

    with tempfile.TemporaryDirectory() as temp_config_dir:
        pkl_cfg_filename = osp.join(temp_config_dir, '_pickle.pkl')
        dump(cfg, pkl_cfg_filename)
        pkl_cfg = load(pkl_cfg_filename)

    assert pkl_cfg._cfg_dict == cfg._cfg_dict


def test_deprecation():
    deprecated_cfg_files = [
        osp.join(data_path, 'config/deprecated.py'),
        osp.join(data_path, 'config/deprecated_as_base.py')
    ]

    for cfg_file in deprecated_cfg_files:
        with pytest.warns(DeprecationWarning):
            cfg = Config.fromfile(cfg_file)
        assert cfg.item1 == 'expected'


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_env.py
================================================
import sys

import pytest

import mmcv


def test_collect_env():
    try:
        import torch  # noqa: F401
    except ModuleNotFoundError:
        pytest.skip('skipping tests that require PyTorch')

    from mmcv.utils import collect_env
    env_info = collect_env()
    expected_keys = [
        'sys.platform', 'Python', 'CUDA available', 'PyTorch',
        'PyTorch compiling details', 'OpenCV', 'MMCV', 'MMCV Compiler',
        'MMCV CUDA Compiler'
    ]
    for key in expected_keys:
        assert key in env_info

    if env_info['CUDA available']:
        for key in ['CUDA_HOME', 'NVCC']:
            assert key in env_info

    if sys.platform != 'win32':
        assert 'GCC' in env_info

    assert env_info['sys.platform'] == sys.platform
    assert env_info['Python'] == sys.version.replace('\n', '')
    assert env_info['MMCV'] == mmcv.__version__


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_hub.py
================================================
import pytest
from torch.utils import model_zoo

from mmcv.utils import TORCH_VERSION, digit_version, load_url


def test_load_url():
    url1 = 'https://download.openmmlab.com/mmcv/test_data/saved_in_pt1.5.pth'
    url2 = 'https://download.openmmlab.com/mmcv/test_data/saved_in_pt1.6.pth'

    # The 1.6 release of PyTorch switched torch.save to use a new zipfile-based
    # file format. It will cause RuntimeError when a checkpoint was saved in
    # torch >= 1.6.0 but loaded in torch < 1.7.0.
    # More details at https://github.com/open-mmlab/mmpose/issues/904
    if digit_version(TORCH_VERSION) < digit_version('1.7.0'):
        model_zoo.load_url(url1)
        with pytest.raises(RuntimeError):
            model_zoo.load_url(url2)
    else:
        # high version of PyTorch can load checkpoints from url, regardless
        # of which version they were saved in
        model_zoo.load_url(url1)
        model_zoo.load_url(url2)

    load_url(url1)
    # if a checkpoint was saved in torch >= 1.6.0 but loaded in torch < 1.5.0,
    # it will raise a RuntimeError
    if digit_version(TORCH_VERSION) < digit_version('1.5.0'):
        with pytest.raises(RuntimeError):
            load_url(url2)
    else:
        load_url(url2)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_logging.py
================================================
import logging
import os
import platform
import tempfile
from unittest.mock import patch

import pytest

from mmcv import get_logger, print_log

if platform.system() == 'Windows':
    import regex as re
else:
    import re


@patch('torch.distributed.get_rank', lambda: 0)
@patch('torch.distributed.is_initialized', lambda: True)
@patch('torch.distributed.is_available', lambda: True)
def test_get_logger_rank0():
    logger = get_logger('rank0.pkg1')
    assert isinstance(logger, logging.Logger)
    assert len(logger.handlers) == 1
    assert isinstance(logger.handlers[0], logging.StreamHandler)
    assert logger.handlers[0].level == logging.INFO

    logger = get_logger('rank0.pkg2', log_level=logging.DEBUG)
    assert isinstance(logger, logging.Logger)
    assert len(logger.handlers) == 1
    assert logger.handlers[0].level == logging.DEBUG

    # the name can not be used to open the file a second time in windows,
    # so `delete` should be set as `False` and we need to manually remove it
    # more details can be found at https://github.com/open-mmlab/mmcv/pull/1077
    with tempfile.NamedTemporaryFile(delete=False) as f:
        logger = get_logger('rank0.pkg3', log_file=f.name)
        assert isinstance(logger, logging.Logger)
        assert len(logger.handlers) == 2
        assert isinstance(logger.handlers[0], logging.StreamHandler)
        assert isinstance(logger.handlers[1], logging.FileHandler)
        logger_pkg3 = get_logger('rank0.pkg3')
        assert id(logger_pkg3) == id(logger)
        # flushing and closing all handlers in order to remove `f.name`
        logging.shutdown()

    os.remove(f.name)

    logger_pkg3 = get_logger('rank0.pkg3.subpkg')
    assert logger_pkg3.handlers == logger_pkg3.handlers


@patch('torch.distributed.get_rank', lambda: 1)
@patch('torch.distributed.is_initialized', lambda: True)
@patch('torch.distributed.is_available', lambda: True)
def test_get_logger_rank1():
    logger = get_logger('rank1.pkg1')
    assert isinstance(logger, logging.Logger)
    assert len(logger.handlers) == 1
    assert isinstance(logger.handlers[0], logging.StreamHandler)
    assert logger.handlers[0].level == logging.INFO

    # the name can not be used to open the file a second time in windows,
    # so `delete` should be set as `False` and we need to manually remove it
    # more details can be found at https://github.com/open-mmlab/mmcv/pull/1077
    with tempfile.NamedTemporaryFile(delete=False) as f:
        logger = get_logger('rank1.pkg2', log_file=f.name)
        assert isinstance(logger, logging.Logger)
        assert len(logger.handlers) == 1
        assert logger.handlers[0].level == logging.INFO
        # flushing and closing all handlers in order to remove `f.name`
        logging.shutdown()

    os.remove(f.name)


def test_print_log_print(capsys):
    print_log('welcome', logger=None)
    out, _ = capsys.readouterr()
    assert out == 'welcome\n'


def test_print_log_silent(capsys, caplog):
    print_log('welcome', logger='silent')
    out, _ = capsys.readouterr()
    assert out == ''
    assert len(caplog.records) == 0


def test_print_log_logger(caplog):
    print_log('welcome', logger='mmcv')
    assert caplog.record_tuples[-1] == ('mmcv', logging.INFO, 'welcome')

    print_log('welcome', logger='mmcv', level=logging.ERROR)
    assert caplog.record_tuples[-1] == ('mmcv', logging.ERROR, 'welcome')

    # the name can not be used to open the file a second time in windows,
    # so `delete` should be set as `False` and we need to manually remove it
    # more details can be found at https://github.com/open-mmlab/mmcv/pull/1077
    with tempfile.NamedTemporaryFile(delete=False) as f:
        logger = get_logger('abc', log_file=f.name)
        print_log('welcome', logger=logger)
        assert caplog.record_tuples[-1] == ('abc', logging.INFO, 'welcome')
        with open(f.name, 'r') as fin:
            log_text = fin.read()
            regex_time = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}'
            match = re.fullmatch(regex_time + r' - abc - INFO - welcome\n',
                                 log_text)
            assert match is not None
        # flushing and closing all handlers in order to remove `f.name`
        logging.shutdown()

    os.remove(f.name)


def test_print_log_exception():
    with pytest.raises(TypeError):
        print_log('welcome', logger=0)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_misc.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import pytest

import mmcv
from mmcv import deprecated_api_warning
from mmcv.utils.misc import has_method


def test_to_ntuple():
    single_number = 2
    assert mmcv.utils.to_1tuple(single_number) == (single_number, )
    assert mmcv.utils.to_2tuple(single_number) == (single_number,
                                                   single_number)
    assert mmcv.utils.to_3tuple(single_number) == (single_number,
                                                   single_number,
                                                   single_number)
    assert mmcv.utils.to_4tuple(single_number) == (single_number,
                                                   single_number,
                                                   single_number,
                                                   single_number)
    assert mmcv.utils.to_ntuple(5)(single_number) == (single_number,
                                                      single_number,
                                                      single_number,
                                                      single_number,
                                                      single_number)
    assert mmcv.utils.to_ntuple(6)(single_number) == (single_number,
                                                      single_number,
                                                      single_number,
                                                      single_number,
                                                      single_number,
                                                      single_number)


def test_iter_cast():
    assert mmcv.list_cast([1, 2, 3], int) == [1, 2, 3]
    assert mmcv.list_cast(['1.1', 2, '3'], float) == [1.1, 2.0, 3.0]
    assert mmcv.list_cast([1, 2, 3], str) == ['1', '2', '3']
    assert mmcv.tuple_cast((1, 2, 3), str) == ('1', '2', '3')
    assert next(mmcv.iter_cast([1, 2, 3], str)) == '1'
    with pytest.raises(TypeError):
        mmcv.iter_cast([1, 2, 3], '')
    with pytest.raises(TypeError):
        mmcv.iter_cast(1, str)


def test_is_seq_of():
    assert mmcv.is_seq_of([1.0, 2.0, 3.0], float)
    assert mmcv.is_seq_of([(1, ), (2, ), (3, )], tuple)
    assert mmcv.is_seq_of((1.0, 2.0, 3.0), float)
    assert mmcv.is_list_of([1.0, 2.0, 3.0], float)
    assert not mmcv.is_seq_of((1.0, 2.0, 3.0), float, seq_type=list)
    assert not mmcv.is_tuple_of([1.0, 2.0, 3.0], float)
    assert not mmcv.is_seq_of([1.0, 2, 3], int)
    assert not mmcv.is_seq_of((1.0, 2, 3), int)


def test_slice_list():
    in_list = [1, 2, 3, 4, 5, 6]
    assert mmcv.slice_list(in_list, [1, 2, 3]) == [[1], [2, 3], [4, 5, 6]]
    assert mmcv.slice_list(in_list, [len(in_list)]) == [in_list]
    with pytest.raises(TypeError):
        mmcv.slice_list(in_list, 2.0)
    with pytest.raises(ValueError):
        mmcv.slice_list(in_list, [1, 2])


def test_concat_list():
    assert mmcv.concat_list([[1, 2]]) == [1, 2]
    assert mmcv.concat_list([[1, 2], [3, 4, 5], [6]]) == [1, 2, 3, 4, 5, 6]


def test_requires_package(capsys):

    @mmcv.requires_package('nnn')
    def func_a():
        pass

    @mmcv.requires_package(['numpy', 'n1', 'n2'])
    def func_b():
        pass

    @mmcv.requires_package('numpy')
    def func_c():
        return 1

    with pytest.raises(RuntimeError):
        func_a()
    out, _ = capsys.readouterr()
    assert out == ('Prerequisites "nnn" are required in method "func_a" but '
                   'not found, please install them first.\n')

    with pytest.raises(RuntimeError):
        func_b()
    out, _ = capsys.readouterr()
    assert out == (
        'Prerequisites "n1, n2" are required in method "func_b" but not found,'
        ' please install them first.\n')

    assert func_c() == 1


def test_requires_executable(capsys):

    @mmcv.requires_executable('nnn')
    def func_a():
        pass

    @mmcv.requires_executable(['ls', 'n1', 'n2'])
    def func_b():
        pass

    @mmcv.requires_executable('mv')
    def func_c():
        return 1

    with pytest.raises(RuntimeError):
        func_a()
    out, _ = capsys.readouterr()
    assert out == ('Prerequisites "nnn" are required in method "func_a" but '
                   'not found, please install them first.\n')

    with pytest.raises(RuntimeError):
        func_b()
    out, _ = capsys.readouterr()
    assert out == (
        'Prerequisites "n1, n2" are required in method "func_b" but not found,'
        ' please install them first.\n')

    assert func_c() == 1


def test_import_modules_from_strings():
    # multiple imports
    import os.path as osp_

    import sys as sys_
    osp, sys = mmcv.import_modules_from_strings(['os.path', 'sys'])
    assert osp == osp_
    assert sys == sys_

    # single imports
    osp = mmcv.import_modules_from_strings('os.path')
    assert osp == osp_
    # No imports
    assert mmcv.import_modules_from_strings(None) is None
    assert mmcv.import_modules_from_strings([]) is None
    assert mmcv.import_modules_from_strings('') is None
    # Unsupported types
    with pytest.raises(TypeError):
        mmcv.import_modules_from_strings(1)
    with pytest.raises(TypeError):
        mmcv.import_modules_from_strings([1])
    # Failed imports
    with pytest.raises(ImportError):
        mmcv.import_modules_from_strings('_not_implemented_module')
    with pytest.warns(UserWarning):
        imported = mmcv.import_modules_from_strings(
            '_not_implemented_module', allow_failed_imports=True)
        assert imported is None
    with pytest.warns(UserWarning):
        imported = mmcv.import_modules_from_strings(
            ['os.path', '_not_implemented'], allow_failed_imports=True)
        assert imported[0] == osp
        assert imported[1] is None


def test_is_method_overridden():

    class Base:

        def foo1():
            pass

        def foo2():
            pass

    class Sub(Base):

        def foo1():
            pass

    # test passing sub class directly
    assert mmcv.is_method_overridden('foo1', Base, Sub)
    assert not mmcv.is_method_overridden('foo2', Base, Sub)

    # test passing instance of sub class
    sub_instance = Sub()
    assert mmcv.is_method_overridden('foo1', Base, sub_instance)
    assert not mmcv.is_method_overridden('foo2', Base, sub_instance)

    # base_class should be a class, not instance
    base_instance = Base()
    with pytest.raises(AssertionError):
        mmcv.is_method_overridden('foo1', base_instance, sub_instance)


def test_has_method():

    class Foo:

        def __init__(self, name):
            self.name = name

        def print_name(self):
            print(self.name)

    foo = Foo('foo')
    assert not has_method(foo, 'name')
    assert has_method(foo, 'print_name')


def test_deprecated_api_warning():

    @deprecated_api_warning(name_dict=dict(old_key='new_key'))
    def dummy_func(new_key=1):
        return new_key

    # replace `old_key` to `new_key`
    assert dummy_func(old_key=2) == 2

    # The expected behavior is to replace the
    # deprecated key `old_key` to `new_key`,
    # but got them in the arguments at the same time
    with pytest.raises(AssertionError):
        dummy_func(old_key=1, new_key=2)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_parrots_jit.py
================================================
import pytest
import torch

import mmcv
from mmcv.utils import TORCH_VERSION

skip_no_parrots = pytest.mark.skipif(
    TORCH_VERSION != 'parrots', reason='test case under parrots environment')


class TestJit(object):

    def test_add_dict(self):

        @mmcv.jit
        def add_dict(oper):
            rets = oper['x'] + oper['y']
            return {'result': rets}

        def add_dict_pyfunc(oper):
            rets = oper['x'] + oper['y']
            return {'result': rets}

        a = torch.rand((3, 4))
        b = torch.rand((3, 4))
        oper = {'x': a, 'y': b}

        rets_t = add_dict(oper)
        rets = add_dict_pyfunc(oper)
        assert 'result' in rets
        assert (rets_t['result'] == rets['result']).all()

    def test_add_list(self):

        @mmcv.jit
        def add_list(oper, x, y):
            rets = {}
            for idx, pair in enumerate(oper):
                rets[f'k{idx}'] = pair['x'] + pair['y']
            rets[f'k{len(oper)}'] = x + y
            return rets

        def add_list_pyfunc(oper, x, y):
            rets = {}
            for idx, pair in enumerate(oper):
                rets[f'k{idx}'] = pair['x'] + pair['y']
            rets[f'k{len(oper)}'] = x + y
            return rets

        pair_num = 3
        oper = []
        for _ in range(pair_num):
            oper.append({'x': torch.rand((3, 4)), 'y': torch.rand((3, 4))})
        a = torch.rand((3, 4))
        b = torch.rand((3, 4))
        rets = add_list_pyfunc(oper, x=a, y=b)
        rets_t = add_list(oper, x=a, y=b)
        for idx in range(pair_num + 1):
            assert f'k{idx}' in rets_t
            assert (rets[f'k{idx}'] == rets_t[f'k{idx}']).all()

    @skip_no_parrots
    def test_jit_cache(self):

        @mmcv.jit
        def func(oper):
            if oper['const'] > 1:
                return oper['x'] * 2 + oper['y']
            else:
                return oper['x'] * 2 - oper['y']

        def pyfunc(oper):
            if oper['const'] > 1:
                return oper['x'] * 2 + oper['y']
            else:
                return oper['x'] * 2 - oper['y']

        assert len(func._cache._cache) == 0

        oper = {'const': 2, 'x': torch.rand((3, 4)), 'y': torch.rand((3, 4))}
        rets_plus = pyfunc(oper)
        rets_plus_t = func(oper)
        assert (rets_plus == rets_plus_t).all()
        assert len(func._cache._cache) == 1

        oper['const'] = 0.5
        rets_minus = pyfunc(oper)
        rets_minus_t = func(oper)
        assert (rets_minus == rets_minus_t).all()
        assert len(func._cache._cache) == 2

        rets_a = (rets_minus_t + rets_plus_t) / 4
        assert torch.allclose(oper['x'], rets_a)

    @skip_no_parrots
    def test_jit_shape(self):

        @mmcv.jit
        def func(a):
            return a + 1

        assert len(func._cache._cache) == 0

        a = torch.ones((3, 4))
        r = func(a)
        assert r.shape == (3, 4)
        assert (r == 2).all()
        assert len(func._cache._cache) == 1

        a = torch.ones((2, 3, 4))
        r = func(a)
        assert r.shape == (2, 3, 4)
        assert (r == 2).all()
        assert len(func._cache._cache) == 2

    @skip_no_parrots
    def test_jit_kwargs(self):

        @mmcv.jit
        def func(a, b):
            return torch.mean((a - b) * (a - b))

        assert len(func._cache._cache) == 0
        x = torch.rand((16, 32))
        y = torch.rand((16, 32))
        func(x, y)
        assert len(func._cache._cache) == 1
        func(x, b=y)
        assert len(func._cache._cache) == 1
        func(b=y, a=x)
        assert len(func._cache._cache) == 1

    def test_jit_derivate(self):

        @mmcv.jit(derivate=True)
        def func(x, y):
            return (x + 2) * (y - 2)

        a = torch.rand((3, 4))
        b = torch.rand((3, 4))
        a.requires_grad = True

        c = func(a, b)
        assert c.requires_grad
        d = torch.empty_like(c)
        d.fill_(1.0)
        c.backward(d)
        assert torch.allclose(a.grad, (b - 2))
        assert b.grad is None

        a.grad = None
        c = func(a, b)
        assert c.requires_grad
        d = torch.empty_like(c)
        d.fill_(2.7)
        c.backward(d)
        assert torch.allclose(a.grad, 2.7 * (b - 2))
        assert b.grad is None

    def test_jit_optimize(self):

        @mmcv.jit(optimize=True)
        def func(a, b):
            return torch.mean((a - b) * (a - b))

        def pyfunc(a, b):
            return torch.mean((a - b) * (a - b))

        a = torch.rand((16, 32))
        b = torch.rand((16, 32))

        c = func(a, b)
        d = pyfunc(a, b)
        assert torch.allclose(c, d)

    @mmcv.skip_no_elena
    def test_jit_coderize(self):
        if not torch.cuda.is_available():
            return

        @mmcv.jit(coderize=True)
        def func(a, b):
            return (a + b) * (a - b)

        def pyfunc(a, b):
            return (a + b) * (a - b)

        a = torch.rand((16, 32), device='cuda')
        b = torch.rand((16, 32), device='cuda')

        c = func(a, b)
        d = pyfunc(a, b)
        assert torch.allclose(c, d)

    def test_jit_value_dependent(self):

        @mmcv.jit
        def func(a, b):
            torch.nonzero(a)
            return torch.mean((a - b) * (a - b))

        def pyfunc(a, b):
            torch.nonzero(a)
            return torch.mean((a - b) * (a - b))

        a = torch.rand((16, 32))
        b = torch.rand((16, 32))

        c = func(a, b)
        d = pyfunc(a, b)
        assert torch.allclose(c, d)

    @skip_no_parrots
    def test_jit_check_input(self):

        def func(x):
            y = torch.rand_like(x)
            return x + y

        a = torch.ones((3, 4))
        with pytest.raises(AssertionError):
            func = mmcv.jit(func, check_input=(a, ))

    @skip_no_parrots
    def test_jit_partial_shape(self):

        @mmcv.jit(full_shape=False)
        def func(a, b):
            return torch.mean((a - b) * (a - b))

        def pyfunc(a, b):
            return torch.mean((a - b) * (a - b))

        a = torch.rand((3, 4))
        b = torch.rand((3, 4))
        assert torch.allclose(func(a, b), pyfunc(a, b))
        assert len(func._cache._cache) == 1

        a = torch.rand((6, 5))
        b = torch.rand((6, 5))
        assert torch.allclose(func(a, b), pyfunc(a, b))
        assert len(func._cache._cache) == 1

        a = torch.rand((3, 4, 5))
        b = torch.rand((3, 4, 5))
        assert torch.allclose(func(a, b), pyfunc(a, b))
        assert len(func._cache._cache) == 2

        a = torch.rand((1, 9, 8))
        b = torch.rand((1, 9, 8))
        assert torch.allclose(func(a, b), pyfunc(a, b))
        assert len(func._cache._cache) == 2

    def test_instance_method(self):

        class T(object):

            def __init__(self, shape):
                self._c = torch.rand(shape)

            @mmcv.jit
            def test_method(self, x, y):
                return (x * self._c) + y

        shape = (16, 32)
        t = T(shape)
        a = torch.rand(shape)
        b = torch.rand(shape)
        res = (a * t._c) + b
        jit_res = t.test_method(a, b)
        assert torch.allclose(res, jit_res)

        t = T(shape)
        res = (a * t._c) + b
        jit_res = t.test_method(a, b)
        assert torch.allclose(res, jit_res)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_path.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from pathlib import Path

import pytest

import mmcv


def test_is_filepath():
    assert mmcv.is_filepath(__file__)
    assert mmcv.is_filepath('abc')
    assert mmcv.is_filepath(Path('/etc'))
    assert not mmcv.is_filepath(0)


def test_fopen():
    assert hasattr(mmcv.fopen(__file__), 'read')
    assert hasattr(mmcv.fopen(Path(__file__)), 'read')


def test_check_file_exist():
    mmcv.check_file_exist(__file__)
    with pytest.raises(FileNotFoundError):
        mmcv.check_file_exist('no_such_file.txt')


def test_scandir():
    folder = osp.join(osp.dirname(osp.dirname(__file__)), 'data/for_scan')
    filenames = ['a.bin', '1.txt', '2.txt', '1.json', '2.json', '3.TXT']
    assert set(mmcv.scandir(folder)) == set(filenames)
    assert set(mmcv.scandir(Path(folder))) == set(filenames)
    assert set(mmcv.scandir(folder, '.txt')) == set(
        [filename for filename in filenames if filename.endswith('.txt')])
    assert set(mmcv.scandir(folder, ('.json', '.txt'))) == set([
        filename for filename in filenames
        if filename.endswith(('.txt', '.json'))
    ])
    assert set(mmcv.scandir(folder, '.png')) == set()

    # path of sep is `\\` in windows but `/` in linux, so osp.join should be
    # used to join string for compatibility
    filenames_recursive = [
        'a.bin', '1.txt', '2.txt', '1.json', '2.json', '3.TXT',
        osp.join('sub', '1.json'),
        osp.join('sub', '1.txt'), '.file'
    ]
    # .file starts with '.' and is a file so it will not be scanned
    assert set(mmcv.scandir(folder, recursive=True)) == set(
        [filename for filename in filenames_recursive if filename != '.file'])
    assert set(mmcv.scandir(Path(folder), recursive=True)) == set(
        [filename for filename in filenames_recursive if filename != '.file'])
    assert set(mmcv.scandir(folder, '.txt', recursive=True)) == set([
        filename for filename in filenames_recursive
        if filename.endswith('.txt')
    ])
    assert set(
        mmcv.scandir(folder, '.TXT', recursive=True,
                     case_sensitive=False)) == set([
                         filename for filename in filenames_recursive
                         if filename.endswith(('.txt', '.TXT'))
                     ])
    assert set(
        mmcv.scandir(
            folder, ('.TXT', '.JSON'), recursive=True,
            case_sensitive=False)) == set([
                filename for filename in filenames_recursive
                if filename.endswith(('.txt', '.json', '.TXT'))
            ])
    with pytest.raises(TypeError):
        list(mmcv.scandir(123))
    with pytest.raises(TypeError):
        list(mmcv.scandir(folder, 111))


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_progressbar.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import time

try:
    from unittest.mock import patch
except ImportError:
    from mock import patch

try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO

import mmcv  # isort:skip


def reset_string_io(io):
    io.truncate(0)
    io.seek(0)


class TestProgressBar:

    def test_start(self):
        out = StringIO()
        bar_width = 20
        # without total task num
        prog_bar = mmcv.ProgressBar(bar_width=bar_width, file=out)
        assert out.getvalue() == 'completed: 0, elapsed: 0s'
        reset_string_io(out)
        prog_bar = mmcv.ProgressBar(bar_width=bar_width, start=False, file=out)
        assert out.getvalue() == ''
        reset_string_io(out)
        prog_bar.start()
        assert out.getvalue() == 'completed: 0, elapsed: 0s'
        # with total task num
        reset_string_io(out)
        prog_bar = mmcv.ProgressBar(10, bar_width=bar_width, file=out)
        assert out.getvalue() == f'[{" " * bar_width}] 0/10, elapsed: 0s, ETA:'
        reset_string_io(out)
        prog_bar = mmcv.ProgressBar(
            10, bar_width=bar_width, start=False, file=out)
        assert out.getvalue() == ''
        reset_string_io(out)
        prog_bar.start()
        assert out.getvalue() == f'[{" " * bar_width}] 0/10, elapsed: 0s, ETA:'

    def test_update(self):
        out = StringIO()
        bar_width = 20
        # without total task num
        prog_bar = mmcv.ProgressBar(bar_width=bar_width, file=out)
        time.sleep(1)
        reset_string_io(out)
        prog_bar.update()
        assert out.getvalue() == 'completed: 1, elapsed: 1s, 1.0 tasks/s'
        reset_string_io(out)
        # with total task num
        prog_bar = mmcv.ProgressBar(10, bar_width=bar_width, file=out)
        time.sleep(1)
        reset_string_io(out)
        prog_bar.update()
        assert out.getvalue() == f'\r[{">" * 2 + " " * 18}] 1/10, 1.0 ' \
                                 'task/s, elapsed: 1s, ETA:     9s'

    def test_adaptive_length(self):
        with patch.dict('os.environ', {'COLUMNS': '80'}):
            out = StringIO()
            bar_width = 20
            prog_bar = mmcv.ProgressBar(10, bar_width=bar_width, file=out)
            time.sleep(1)
            reset_string_io(out)
            prog_bar.update()
            assert len(out.getvalue()) == 66

            os.environ['COLUMNS'] = '30'
            reset_string_io(out)
            prog_bar.update()
            assert len(out.getvalue()) == 48

            os.environ['COLUMNS'] = '60'
            reset_string_io(out)
            prog_bar.update()
            assert len(out.getvalue()) == 60


def sleep_1s(num):
    time.sleep(1)
    return num


def test_track_progress_list():
    out = StringIO()
    ret = mmcv.track_progress(sleep_1s, [1, 2, 3], bar_width=3, file=out)
    assert out.getvalue() == (
        '[   ] 0/3, elapsed: 0s, ETA:'
        '\r[>  ] 1/3, 1.0 task/s, elapsed: 1s, ETA:     2s'
        '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA:     1s'
        '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA:     0s\n')
    assert ret == [1, 2, 3]


def test_track_progress_iterator():
    out = StringIO()
    ret = mmcv.track_progress(
        sleep_1s, ((i for i in [1, 2, 3]), 3), bar_width=3, file=out)
    assert out.getvalue() == (
        '[   ] 0/3, elapsed: 0s, ETA:'
        '\r[>  ] 1/3, 1.0 task/s, elapsed: 1s, ETA:     2s'
        '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA:     1s'
        '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA:     0s\n')
    assert ret == [1, 2, 3]


def test_track_iter_progress():
    out = StringIO()
    ret = []
    for num in mmcv.track_iter_progress([1, 2, 3], bar_width=3, file=out):
        ret.append(sleep_1s(num))
    assert out.getvalue() == (
        '[   ] 0/3, elapsed: 0s, ETA:'
        '\r[>  ] 1/3, 1.0 task/s, elapsed: 1s, ETA:     2s'
        '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA:     1s'
        '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA:     0s\n')
    assert ret == [1, 2, 3]


def test_track_enum_progress():
    out = StringIO()
    ret = []
    count = []
    for i, num in enumerate(
            mmcv.track_iter_progress([1, 2, 3], bar_width=3, file=out)):
        ret.append(sleep_1s(num))
        count.append(i)
    assert out.getvalue() == (
        '[   ] 0/3, elapsed: 0s, ETA:'
        '\r[>  ] 1/3, 1.0 task/s, elapsed: 1s, ETA:     2s'
        '\r[>> ] 2/3, 1.0 task/s, elapsed: 2s, ETA:     1s'
        '\r[>>>] 3/3, 1.0 task/s, elapsed: 3s, ETA:     0s\n')
    assert ret == [1, 2, 3]
    assert count == [0, 1, 2]


def test_track_parallel_progress_list():
    out = StringIO()
    results = mmcv.track_parallel_progress(
        sleep_1s, [1, 2, 3, 4], 2, bar_width=4, file=out)
    # The following cannot pass CI on Github Action
    # assert out.getvalue() == (
    #     '[    ] 0/4, elapsed: 0s, ETA:'
    #     '\r[>   ] 1/4, 1.0 task/s, elapsed: 1s, ETA:     3s'
    #     '\r[>>  ] 2/4, 2.0 task/s, elapsed: 1s, ETA:     1s'
    #     '\r[>>> ] 3/4, 1.5 task/s, elapsed: 2s, ETA:     1s'
    #     '\r[>>>>] 4/4, 2.0 task/s, elapsed: 2s, ETA:     0s\n')
    assert results == [1, 2, 3, 4]


def test_track_parallel_progress_iterator():
    out = StringIO()
    results = mmcv.track_parallel_progress(
        sleep_1s, ((i for i in [1, 2, 3, 4]), 4), 2, bar_width=4, file=out)
    # The following cannot pass CI on Github Action
    # assert out.getvalue() == (
    #     '[    ] 0/4, elapsed: 0s, ETA:'
    #     '\r[>   ] 1/4, 1.0 task/s, elapsed: 1s, ETA:     3s'
    #     '\r[>>  ] 2/4, 2.0 task/s, elapsed: 1s, ETA:     1s'
    #     '\r[>>> ] 3/4, 1.5 task/s, elapsed: 2s, ETA:     1s'
    #     '\r[>>>>] 4/4, 2.0 task/s, elapsed: 2s, ETA:     0s\n')
    assert results == [1, 2, 3, 4]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_registry.py
================================================
import pytest

import mmcv


def test_registry():
    CATS = mmcv.Registry('cat')
    assert CATS.name == 'cat'
    assert CATS.module_dict == {}
    assert len(CATS) == 0

    @CATS.register_module()
    class BritishShorthair:
        pass

    assert len(CATS) == 1
    assert CATS.get('BritishShorthair') is BritishShorthair

    class Munchkin:
        pass

    CATS.register_module(Munchkin)
    assert len(CATS) == 2
    assert CATS.get('Munchkin') is Munchkin
    assert 'Munchkin' in CATS

    with pytest.raises(KeyError):
        CATS.register_module(Munchkin)

    CATS.register_module(Munchkin, force=True)
    assert len(CATS) == 2

    # force=False
    with pytest.raises(KeyError):

        @CATS.register_module()
        class BritishShorthair:
            pass

    @CATS.register_module(force=True)
    class BritishShorthair:
        pass

    assert len(CATS) == 2

    assert CATS.get('PersianCat') is None
    assert 'PersianCat' not in CATS

    @CATS.register_module(name=['Siamese', 'Siamese2'])
    class SiameseCat:
        pass

    assert CATS.get('Siamese').__name__ == 'SiameseCat'
    assert CATS.get('Siamese2').__name__ == 'SiameseCat'

    class SphynxCat:
        pass

    CATS.register_module(name='Sphynx', module=SphynxCat)
    assert CATS.get('Sphynx') is SphynxCat

    CATS.register_module(name=['Sphynx1', 'Sphynx2'], module=SphynxCat)
    assert CATS.get('Sphynx2') is SphynxCat

    repr_str = 'Registry(name=cat, items={'
    repr_str += ("'BritishShorthair': <class 'test_registry.test_registry."
                 "<locals>.BritishShorthair'>, ")
    repr_str += ("'Munchkin': <class 'test_registry.test_registry."
                 "<locals>.Munchkin'>, ")
    repr_str += ("'Siamese': <class 'test_registry.test_registry."
                 "<locals>.SiameseCat'>, ")
    repr_str += ("'Siamese2': <class 'test_registry.test_registry."
                 "<locals>.SiameseCat'>, ")
    repr_str += ("'Sphynx': <class 'test_registry.test_registry."
                 "<locals>.SphynxCat'>, ")
    repr_str += ("'Sphynx1': <class 'test_registry.test_registry."
                 "<locals>.SphynxCat'>, ")
    repr_str += ("'Sphynx2': <class 'test_registry.test_registry."
                 "<locals>.SphynxCat'>")
    repr_str += '})'
    assert repr(CATS) == repr_str

    # name type
    with pytest.raises(TypeError):
        CATS.register_module(name=7474741, module=SphynxCat)

    # the registered module should be a class
    with pytest.raises(TypeError):
        CATS.register_module(0)

    # can only decorate a class
    with pytest.raises(TypeError):

        @CATS.register_module()
        def some_method():
            pass

    # begin: test old APIs
    with pytest.warns(DeprecationWarning):
        CATS.register_module(SphynxCat)
        assert CATS.get('SphynxCat').__name__ == 'SphynxCat'

    with pytest.warns(DeprecationWarning):
        CATS.register_module(SphynxCat, force=True)
        assert CATS.get('SphynxCat').__name__ == 'SphynxCat'

    with pytest.warns(DeprecationWarning):

        @CATS.register_module
        class NewCat:
            pass

        assert CATS.get('NewCat').__name__ == 'NewCat'

    with pytest.warns(DeprecationWarning):
        CATS.deprecated_register_module(SphynxCat, force=True)
        assert CATS.get('SphynxCat').__name__ == 'SphynxCat'

    with pytest.warns(DeprecationWarning):

        @CATS.deprecated_register_module
        class CuteCat:
            pass

        assert CATS.get('CuteCat').__name__ == 'CuteCat'

    with pytest.warns(DeprecationWarning):

        @CATS.deprecated_register_module(force=True)
        class NewCat2:
            pass

        assert CATS.get('NewCat2').__name__ == 'NewCat2'

    # end: test old APIs


def test_multi_scope_registry():
    DOGS = mmcv.Registry('dogs')
    assert DOGS.name == 'dogs'
    assert DOGS.scope == 'test_registry'
    assert DOGS.module_dict == {}
    assert len(DOGS) == 0

    @DOGS.register_module()
    class GoldenRetriever:
        pass

    assert len(DOGS) == 1
    assert DOGS.get('GoldenRetriever') is GoldenRetriever

    HOUNDS = mmcv.Registry('dogs', parent=DOGS, scope='hound')

    @HOUNDS.register_module()
    class BloodHound:
        pass

    assert len(HOUNDS) == 1
    assert HOUNDS.get('BloodHound') is BloodHound
    assert DOGS.get('hound.BloodHound') is BloodHound
    assert HOUNDS.get('hound.BloodHound') is BloodHound

    LITTLE_HOUNDS = mmcv.Registry('dogs', parent=HOUNDS, scope='little_hound')

    @LITTLE_HOUNDS.register_module()
    class Dachshund:
        pass

    assert len(LITTLE_HOUNDS) == 1
    assert LITTLE_HOUNDS.get('Dachshund') is Dachshund
    assert LITTLE_HOUNDS.get('hound.BloodHound') is BloodHound
    assert HOUNDS.get('little_hound.Dachshund') is Dachshund
    assert DOGS.get('hound.little_hound.Dachshund') is Dachshund

    MID_HOUNDS = mmcv.Registry('dogs', parent=HOUNDS, scope='mid_hound')

    @MID_HOUNDS.register_module()
    class Beagle:
        pass

    assert MID_HOUNDS.get('Beagle') is Beagle
    assert HOUNDS.get('mid_hound.Beagle') is Beagle
    assert DOGS.get('hound.mid_hound.Beagle') is Beagle
    assert LITTLE_HOUNDS.get('hound.mid_hound.Beagle') is Beagle
    assert MID_HOUNDS.get('hound.BloodHound') is BloodHound
    assert MID_HOUNDS.get('hound.Dachshund') is None


def test_build_from_cfg():
    BACKBONES = mmcv.Registry('backbone')

    @BACKBONES.register_module()
    class ResNet:

        def __init__(self, depth, stages=4):
            self.depth = depth
            self.stages = stages

    @BACKBONES.register_module()
    class ResNeXt:

        def __init__(self, depth, stages=4):
            self.depth = depth
            self.stages = stages

    cfg = dict(type='ResNet', depth=50)
    model = mmcv.build_from_cfg(cfg, BACKBONES)
    assert isinstance(model, ResNet)
    assert model.depth == 50 and model.stages == 4

    cfg = dict(type='ResNet', depth=50)
    model = mmcv.build_from_cfg(cfg, BACKBONES, default_args={'stages': 3})
    assert isinstance(model, ResNet)
    assert model.depth == 50 and model.stages == 3

    cfg = dict(type='ResNeXt', depth=50, stages=3)
    model = mmcv.build_from_cfg(cfg, BACKBONES)
    assert isinstance(model, ResNeXt)
    assert model.depth == 50 and model.stages == 3

    cfg = dict(type=ResNet, depth=50)
    model = mmcv.build_from_cfg(cfg, BACKBONES)
    assert isinstance(model, ResNet)
    assert model.depth == 50 and model.stages == 4

    # type defined using default_args
    cfg = dict(depth=50)
    model = mmcv.build_from_cfg(
        cfg, BACKBONES, default_args=dict(type='ResNet'))
    assert isinstance(model, ResNet)
    assert model.depth == 50 and model.stages == 4

    cfg = dict(depth=50)
    model = mmcv.build_from_cfg(cfg, BACKBONES, default_args=dict(type=ResNet))
    assert isinstance(model, ResNet)
    assert model.depth == 50 and model.stages == 4

    # not a registry
    with pytest.raises(TypeError):
        cfg = dict(type='VGG')
        model = mmcv.build_from_cfg(cfg, 'BACKBONES')

    # non-registered class
    with pytest.raises(KeyError):
        cfg = dict(type='VGG')
        model = mmcv.build_from_cfg(cfg, BACKBONES)

    # default_args must be a dict or None
    with pytest.raises(TypeError):
        cfg = dict(type='ResNet', depth=50)
        model = mmcv.build_from_cfg(cfg, BACKBONES, default_args=1)

    # cfg['type'] should be a str or class
    with pytest.raises(TypeError):
        cfg = dict(type=1000)
        model = mmcv.build_from_cfg(cfg, BACKBONES)

    # cfg should contain the key "type"
    with pytest.raises(KeyError, match='must contain the key "type"'):
        cfg = dict(depth=50, stages=4)
        model = mmcv.build_from_cfg(cfg, BACKBONES)

    # cfg or default_args should contain the key "type"
    with pytest.raises(KeyError, match='must contain the key "type"'):
        cfg = dict(depth=50)
        model = mmcv.build_from_cfg(
            cfg, BACKBONES, default_args=dict(stages=4))

    # incorrect registry type
    with pytest.raises(TypeError):
        cfg = dict(type='ResNet', depth=50)
        model = mmcv.build_from_cfg(cfg, 'BACKBONES')

    # incorrect default_args type
    with pytest.raises(TypeError):
        cfg = dict(type='ResNet', depth=50)
        model = mmcv.build_from_cfg(cfg, BACKBONES, default_args=0)

    # incorrect arguments
    with pytest.raises(TypeError):
        cfg = dict(type='ResNet', non_existing_arg=50)
        model = mmcv.build_from_cfg(cfg, BACKBONES)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_testing.py
================================================
import numpy as np
import pytest

import mmcv

try:
    import torch
except ImportError:
    torch = None
else:
    import torch.nn as nn


def test_assert_dict_contains_subset():
    dict_obj = {'a': 'test1', 'b': 2, 'c': (4, 6)}

    # case 1
    expected_subset = {'a': 'test1', 'b': 2, 'c': (4, 6)}
    assert mmcv.assert_dict_contains_subset(dict_obj, expected_subset)

    # case 2
    expected_subset = {'a': 'test1', 'b': 2, 'c': (6, 4)}
    assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset)

    # case 3
    expected_subset = {'a': 'test1', 'b': 2, 'c': None}
    assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset)

    # case 4
    expected_subset = {'a': 'test1', 'b': 2, 'd': (4, 6)}
    assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset)

    # case 5
    dict_obj = {
        'a': 'test1',
        'b': 2,
        'c': (4, 6),
        'd': np.array([[5, 3, 5], [1, 2, 3]])
    }
    expected_subset = {
        'a': 'test1',
        'b': 2,
        'c': (4, 6),
        'd': np.array([[5, 3, 5], [6, 2, 3]])
    }
    assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset)

    # case 6
    dict_obj = {'a': 'test1', 'b': 2, 'c': (4, 6), 'd': np.array([[1]])}
    expected_subset = {'a': 'test1', 'b': 2, 'c': (4, 6), 'd': np.array([[1]])}
    assert mmcv.assert_dict_contains_subset(dict_obj, expected_subset)

    if torch is not None:
        dict_obj = {
            'a': 'test1',
            'b': 2,
            'c': (4, 6),
            'd': torch.tensor([5, 3, 5])
        }

        # case 7
        expected_subset = {'d': torch.tensor([5, 5, 5])}
        assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset)

        # case 8
        expected_subset = {'d': torch.tensor([[5, 3, 5], [4, 1, 2]])}
        assert not mmcv.assert_dict_contains_subset(dict_obj, expected_subset)


def test_assert_attrs_equal():

    class TestExample(object):
        a, b, c = 1, ('wvi', 3), [4.5, 3.14]

        def test_func(self):
            return self.b

    # case 1
    assert mmcv.assert_attrs_equal(TestExample, {
        'a': 1,
        'b': ('wvi', 3),
        'c': [4.5, 3.14]
    })

    # case 2
    assert not mmcv.assert_attrs_equal(TestExample, {
        'a': 1,
        'b': ('wvi', 3),
        'c': [4.5, 3.14, 2]
    })

    # case 3
    assert not mmcv.assert_attrs_equal(TestExample, {
        'bc': 54,
        'c': [4.5, 3.14]
    })

    # case 4
    assert mmcv.assert_attrs_equal(TestExample, {
        'b': ('wvi', 3),
        'test_func': TestExample.test_func
    })

    if torch is not None:

        class TestExample(object):
            a, b = torch.tensor([1]), torch.tensor([4, 5])

        # case 5
        assert mmcv.assert_attrs_equal(TestExample, {
            'a': torch.tensor([1]),
            'b': torch.tensor([4, 5])
        })

        # case 6
        assert not mmcv.assert_attrs_equal(TestExample, {
            'a': torch.tensor([1]),
            'b': torch.tensor([4, 6])
        })


assert_dict_has_keys_data_1 = [({
    'res_layer': 1,
    'norm_layer': 2,
    'dense_layer': 3
})]
assert_dict_has_keys_data_2 = [(['res_layer', 'dense_layer'], True),
                               (['res_layer', 'conv_layer'], False)]


@pytest.mark.parametrize('obj', assert_dict_has_keys_data_1)
@pytest.mark.parametrize('expected_keys, ret_value',
                         assert_dict_has_keys_data_2)
def test_assert_dict_has_keys(obj, expected_keys, ret_value):
    assert mmcv.assert_dict_has_keys(obj, expected_keys) == ret_value


assert_keys_equal_data_1 = [(['res_layer', 'norm_layer', 'dense_layer'])]
assert_keys_equal_data_2 = [(['res_layer', 'norm_layer', 'dense_layer'], True),
                            (['res_layer', 'dense_layer', 'norm_layer'], True),
                            (['res_layer', 'norm_layer'], False),
                            (['res_layer', 'conv_layer', 'norm_layer'], False)]


@pytest.mark.parametrize('result_keys', assert_keys_equal_data_1)
@pytest.mark.parametrize('target_keys, ret_value', assert_keys_equal_data_2)
def test_assert_keys_equal(result_keys, target_keys, ret_value):
    assert mmcv.assert_keys_equal(result_keys, target_keys) == ret_value


@pytest.mark.skipif(torch is None, reason='requires torch library')
def test_assert_is_norm_layer():
    # case 1
    assert not mmcv.assert_is_norm_layer(nn.Conv3d(3, 64, 3))

    # case 2
    assert mmcv.assert_is_norm_layer(nn.BatchNorm3d(128))

    # case 3
    assert mmcv.assert_is_norm_layer(nn.GroupNorm(8, 64))

    # case 4
    assert not mmcv.assert_is_norm_layer(nn.Sigmoid())


@pytest.mark.skipif(torch is None, reason='requires torch library')
def test_assert_params_all_zeros():
    demo_module = nn.Conv2d(3, 64, 3)
    nn.init.constant_(demo_module.weight, 0)
    nn.init.constant_(demo_module.bias, 0)
    assert mmcv.assert_params_all_zeros(demo_module)

    nn.init.xavier_normal_(demo_module.weight)
    nn.init.constant_(demo_module.bias, 0)
    assert not mmcv.assert_params_all_zeros(demo_module)

    demo_module = nn.Linear(2048, 400, bias=False)
    nn.init.constant_(demo_module.weight, 0)
    assert mmcv.assert_params_all_zeros(demo_module)

    nn.init.normal_(demo_module.weight, mean=0, std=0.01)
    assert not mmcv.assert_params_all_zeros(demo_module)


def test_check_python_script(capsys):
    mmcv.utils.check_python_script('./tests/data/scripts/hello.py zz')
    captured = capsys.readouterr().out
    assert captured == 'hello zz!\n'
    mmcv.utils.check_python_script('./tests/data/scripts/hello.py agent')
    captured = capsys.readouterr().out
    assert captured == 'hello agent!\n'
    # Make sure that wrong cmd raises an error
    with pytest.raises(SystemExit):
        mmcv.utils.check_python_script('./tests/data/scripts/hello.py li zz')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_timer.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import time

import pytest

import mmcv


def test_timer_init():
    timer = mmcv.Timer(start=False)
    assert not timer.is_running
    timer.start()
    assert timer.is_running
    timer = mmcv.Timer()
    assert timer.is_running


def test_timer_run():
    timer = mmcv.Timer()
    time.sleep(1)
    assert abs(timer.since_start() - 1) < 1e-2
    time.sleep(1)
    assert abs(timer.since_last_check() - 1) < 1e-2
    assert abs(timer.since_start() - 2) < 1e-2
    timer = mmcv.Timer(False)
    with pytest.raises(mmcv.TimerError):
        timer.since_start()
    with pytest.raises(mmcv.TimerError):
        timer.since_last_check()


def test_timer_context(capsys):
    with mmcv.Timer():
        time.sleep(1)
    out, _ = capsys.readouterr()
    assert abs(float(out) - 1) < 1e-2
    with mmcv.Timer(print_tmpl='time: {:.1f}s'):
        time.sleep(1)
    out, _ = capsys.readouterr()
    assert out == 'time: 1.0s\n'


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_trace.py
================================================
import pytest
import torch

from mmcv.utils import digit_version, is_jit_tracing


@pytest.mark.skipif(
    digit_version(torch.__version__) < digit_version('1.6.0'),
    reason='torch.jit.is_tracing is not available before 1.6.0')
def test_is_jit_tracing():

    def foo(x):
        if is_jit_tracing():
            return x
        else:
            return x.tolist()

    x = torch.rand(3)
    # test without trace
    assert isinstance(foo(x), list)

    # test with trace
    traced_foo = torch.jit.trace(foo, (torch.rand(1), ))
    assert isinstance(traced_foo(x), torch.Tensor)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_utils/test_version_utils.py
================================================
from unittest.mock import patch

import pytest

from mmcv import get_git_hash, parse_version_info
from mmcv.utils import digit_version


def test_digit_version():
    assert digit_version('0.2.16') == (0, 2, 16, 0, 0, 0)
    assert digit_version('1.2.3') == (1, 2, 3, 0, 0, 0)
    assert digit_version('1.2.3rc0') == (1, 2, 3, 0, -1, 0)
    assert digit_version('1.2.3rc1') == (1, 2, 3, 0, -1, 1)
    assert digit_version('1.0rc0') == (1, 0, 0, 0, -1, 0)
    assert digit_version('1.0') == digit_version('1.0.0')
    assert digit_version('1.5.0+cuda90_cudnn7.6.3_lms') == digit_version('1.5')
    assert digit_version('1.0.0dev') < digit_version('1.0.0a')
    assert digit_version('1.0.0a') < digit_version('1.0.0a1')
    assert digit_version('1.0.0a') < digit_version('1.0.0b')
    assert digit_version('1.0.0b') < digit_version('1.0.0rc')
    assert digit_version('1.0.0rc1') < digit_version('1.0.0')
    assert digit_version('1.0.0') < digit_version('1.0.0post')
    assert digit_version('1.0.0post') < digit_version('1.0.0post1')
    assert digit_version('v1') == (1, 0, 0, 0, 0, 0)
    assert digit_version('v1.1.5') == (1, 1, 5, 0, 0, 0)
    with pytest.raises(AssertionError):
        digit_version('a')
    with pytest.raises(AssertionError):
        digit_version('1x')
    with pytest.raises(AssertionError):
        digit_version('1.x')


def test_parse_version_info():
    assert parse_version_info('0.2.16') == (0, 2, 16, 0, 0, 0)
    assert parse_version_info('1.2.3') == (1, 2, 3, 0, 0, 0)
    assert parse_version_info('1.2.3rc0') == (1, 2, 3, 0, 'rc', 0)
    assert parse_version_info('1.2.3rc1') == (1, 2, 3, 0, 'rc', 1)
    assert parse_version_info('1.0rc0') == (1, 0, 0, 0, 'rc', 0)


def _mock_cmd_success(cmd):
    return '3b46d33e90c397869ad5103075838fdfc9812aa0'.encode('ascii')


def _mock_cmd_fail(cmd):
    raise OSError


def test_get_git_hash():
    with patch('mmcv.utils.version_utils._minimal_ext_cmd', _mock_cmd_success):
        assert get_git_hash() == '3b46d33e90c397869ad5103075838fdfc9812aa0'
        assert get_git_hash(digits=6) == '3b46d3'
        assert get_git_hash(digits=100) == get_git_hash()
    with patch('mmcv.utils.version_utils._minimal_ext_cmd', _mock_cmd_fail):
        assert get_git_hash() == 'unknown'
        assert get_git_hash(fallback='n/a') == 'n/a'


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_video/test_optflow.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import tempfile

import cv2
import numpy as np
import pytest
from numpy.testing import assert_array_almost_equal, assert_array_equal

import mmcv


def test_flowread():
    data_dir = osp.join(osp.dirname(__file__), '../data')
    flow_shape = (60, 80, 2)

    # read .flo file
    flow = mmcv.flowread(osp.join(data_dir, 'optflow.flo'))
    assert flow.shape == flow_shape

    # pseudo read
    flow_same = mmcv.flowread(flow)
    assert_array_equal(flow, flow_same)

    # read quantized flow concatenated vertically
    flow = mmcv.flowread(
        osp.join(data_dir, 'optflow_concat0.jpg'), quantize=True, denorm=True)
    assert flow.shape == flow_shape

    # read quantized flow concatenated horizontally
    flow = mmcv.flowread(
        osp.join(data_dir, 'optflow_concat1.jpg'),
        quantize=True,
        concat_axis=1,
        denorm=True)
    assert flow.shape == flow_shape

    # test exceptions
    notflow_file = osp.join(data_dir, 'color.jpg')
    with pytest.raises(TypeError):
        mmcv.flowread(1)
    with pytest.raises(IOError):
        mmcv.flowread(notflow_file)
    with pytest.raises(IOError):
        mmcv.flowread(notflow_file, quantize=True)
    with pytest.raises(ValueError):
        mmcv.flowread(np.zeros((100, 100, 1)))


def test_flowwrite():
    flow = np.random.rand(100, 100, 2).astype(np.float32)

    # write to a .flo file
    tmp_filehandler, filename = tempfile.mkstemp()
    mmcv.flowwrite(flow, filename)
    flow_from_file = mmcv.flowread(filename)
    assert_array_equal(flow, flow_from_file)
    os.close(tmp_filehandler)
    os.remove(filename)

    # write to two .jpg files
    tmp_filename = osp.join(tempfile.gettempdir(), 'mmcv_test_flow.jpg')
    for concat_axis in range(2):
        mmcv.flowwrite(
            flow, tmp_filename, quantize=True, concat_axis=concat_axis)
        shape = (200, 100) if concat_axis == 0 else (100, 200)
        assert osp.isfile(tmp_filename)
        assert mmcv.imread(tmp_filename, flag='unchanged').shape == shape
        os.remove(tmp_filename)

    # test exceptions
    with pytest.raises(AssertionError):
        mmcv.flowwrite(flow, tmp_filename, quantize=True, concat_axis=2)


def test_quantize_flow():
    flow = (np.random.rand(10, 8, 2).astype(np.float32) - 0.5) * 15
    max_val = 5.0
    dx, dy = mmcv.quantize_flow(flow, max_val=max_val, norm=False)
    ref = np.zeros_like(flow, dtype=np.uint8)
    for i in range(ref.shape[0]):
        for j in range(ref.shape[1]):
            for k in range(ref.shape[2]):
                val = flow[i, j, k] + max_val
                val = min(max(val, 0), 2 * max_val)
                ref[i, j, k] = min(np.floor(255 * val / (2 * max_val)), 254)
    assert_array_equal(dx, ref[..., 0])
    assert_array_equal(dy, ref[..., 1])
    max_val = 0.5
    dx, dy = mmcv.quantize_flow(flow, max_val=max_val, norm=True)
    ref = np.zeros_like(flow, dtype=np.uint8)
    for i in range(ref.shape[0]):
        for j in range(ref.shape[1]):
            for k in range(ref.shape[2]):
                scale = flow.shape[1] if k == 0 else flow.shape[0]
                val = flow[i, j, k] / scale + max_val
                val = min(max(val, 0), 2 * max_val)
                ref[i, j, k] = min(np.floor(255 * val / (2 * max_val)), 254)
    assert_array_equal(dx, ref[..., 0])
    assert_array_equal(dy, ref[..., 1])


def test_dequantize_flow():
    dx = np.random.randint(256, size=(10, 8), dtype=np.uint8)
    dy = np.random.randint(256, size=(10, 8), dtype=np.uint8)
    max_val = 5.0
    flow = mmcv.dequantize_flow(dx, dy, max_val=max_val, denorm=False)
    ref = np.zeros_like(flow, dtype=np.float32)
    for i in range(ref.shape[0]):
        for j in range(ref.shape[1]):
            ref[i, j, 0] = float(dx[i, j] + 0.5) * 2 * max_val / 255 - max_val
            ref[i, j, 1] = float(dy[i, j] + 0.5) * 2 * max_val / 255 - max_val
    assert_array_almost_equal(flow, ref)
    max_val = 0.5
    flow = mmcv.dequantize_flow(dx, dy, max_val=max_val, denorm=True)
    h, w = dx.shape
    ref = np.zeros_like(flow, dtype=np.float32)
    for i in range(ref.shape[0]):
        for j in range(ref.shape[1]):
            ref[i, j,
                0] = (float(dx[i, j] + 0.5) * 2 * max_val / 255 - max_val) * w
            ref[i, j,
                1] = (float(dy[i, j] + 0.5) * 2 * max_val / 255 - max_val) * h
    assert_array_almost_equal(flow, ref)


def test_flow2rgb():
    flow = np.array([[[0, 0], [0.5, 0.5], [1, 1], [2, 1], [3, np.inf]]],
                    dtype=np.float32)
    flow_img = mmcv.flow2rgb(flow)
    # yapf: disable
    assert_array_almost_equal(
        flow_img,
        np.array([[[1., 1., 1.],
                   [1., 0.826074731, 0.683772236],
                   [1., 0.652149462, 0.367544472],
                   [1., 0.265650552, 5.96046448e-08],
                   [0., 0., 0.]]],
                 dtype=np.float32))
    # yapf: enable


def test_flow_warp():

    img = np.zeros((5, 5, 3))
    img[2, 2, 0] = 1
    flow = np.ones((5, 5, 2))

    res_nn = mmcv.flow_warp(img, flow, interpolate_mode='nearest')
    res_bi = mmcv.flow_warp(img, flow, interpolate_mode='bilinear')

    assert_array_almost_equal(res_nn, res_bi, decimal=5)

    img = np.zeros((5, 5, 1))
    img[2, 2, 0] = 1
    img[2, 3, 0] = 0.75
    flow = np.zeros((5, 5, 2))
    flow[2, 2, :] = [0.5, 0.7]

    res_ = np.copy(img)
    res_[2, 2] = 0.5 * 0.3 + 0.75 * 0.5 * 0.3
    res_bi = mmcv.flow_warp(img, flow, interpolate_mode='bilinear')
    assert_array_almost_equal(res_, res_bi, decimal=5)

    with pytest.raises(NotImplementedError):
        _ = mmcv.flow_warp(img, flow, interpolate_mode='xxx')

    with pytest.raises(AssertionError):
        _ = mmcv.flow_warp(img, flow[:, :, 0], interpolate_mode='xxx')


def test_make_color_wheel():
    default_color_wheel = mmcv.make_color_wheel()
    color_wheel = mmcv.make_color_wheel([2, 2, 2, 2, 2, 2])
    # yapf: disable
    assert_array_equal(default_color_wheel, np.array(
        [[1.       , 0.        , 0.        ],  # noqa
        [1.        , 0.06666667, 0.        ],  # noqa
        [1.        , 0.13333334, 0.        ],  # noqa
        [1.        , 0.2       , 0.        ],  # noqa
        [1.        , 0.26666668, 0.        ],  # noqa
        [1.        , 0.33333334, 0.        ],  # noqa
        [1.        , 0.4       , 0.        ],  # noqa
        [1.        , 0.46666667, 0.        ],  # noqa
        [1.        , 0.53333336, 0.        ],  # noqa
        [1.        , 0.6       , 0.        ],  # noqa
        [1.        , 0.6666667 , 0.        ],  # noqa
        [1.        , 0.73333335, 0.        ],  # noqa
        [1.        , 0.8       , 0.        ],  # noqa
        [1.        , 0.8666667 , 0.        ],  # noqa
        [1.        , 0.93333334, 0.        ],  # noqa
        [1.        , 1.        , 0.        ],  # noqa
        [0.8333333 , 1.        , 0.        ],  # noqa
        [0.6666667 , 1.        , 0.        ],  # noqa
        [0.5       , 1.        , 0.        ],  # noqa
        [0.33333334, 1.        , 0.        ],  # noqa
        [0.16666667, 1.        , 0.        ],  # noqa
        [0.        , 1.        , 0.        ],  # noqa
        [0.        , 1.        , 0.25      ],  # noqa
        [0.        , 1.        , 0.5       ],  # noqa
        [0.        , 1.        , 0.75      ],  # noqa
        [0.        , 1.        , 1.        ],  # noqa
        [0.        , 0.90909094, 1.        ],  # noqa
        [0.        , 0.8181818 , 1.        ],  # noqa
        [0.        , 0.72727275, 1.        ],  # noqa
        [0.        , 0.6363636 , 1.        ],  # noqa
        [0.        , 0.54545456, 1.        ],  # noqa
        [0.        , 0.45454547, 1.        ],  # noqa
        [0.        , 0.36363637, 1.        ],  # noqa
        [0.        , 0.27272728, 1.        ],  # noqa
        [0.        , 0.18181819, 1.        ],  # noqa
        [0.        , 0.09090909, 1.        ],  # noqa
        [0.        , 0.        , 1.        ],  # noqa
        [0.07692308, 0.        , 1.        ],  # noqa
        [0.15384616, 0.        , 1.        ],  # noqa
        [0.23076923, 0.        , 1.        ],  # noqa
        [0.30769232, 0.        , 1.        ],  # noqa
        [0.3846154 , 0.        , 1.        ],  # noqa
        [0.46153846, 0.        , 1.        ],  # noqa
        [0.53846157, 0.        , 1.        ],  # noqa
        [0.61538464, 0.        , 1.        ],  # noqa
        [0.6923077 , 0.        , 1.        ],  # noqa
        [0.7692308 , 0.        , 1.        ],  # noqa
        [0.84615386, 0.        , 1.        ],  # noqa
        [0.9230769 , 0.        , 1.        ],  # noqa
        [1.        , 0.        , 1.        ],  # noqa
        [1.        , 0.        , 0.8333333 ],  # noqa
        [1.        , 0.        , 0.6666667 ],  # noqa
        [1.        , 0.        , 0.5       ],  # noqa
        [1.        , 0.        , 0.33333334],  # noqa
        [1.        , 0.        , 0.16666667]], dtype=np.float32))  # noqa

    assert_array_equal(
        color_wheel,
        np.array([[1., 0. , 0. ],  # noqa
                 [1. , 0.5, 0. ],  # noqa
                 [1. , 1. , 0. ],  # noqa
                 [0.5, 1. , 0. ],  # noqa
                 [0. , 1. , 0. ],  # noqa
                 [0. , 1. , 0.5],  # noqa
                 [0. , 1. , 1. ],  # noqa
                 [0. , 0.5, 1. ],  # noqa
                 [0. , 0. , 1. ],  # noqa
                 [0.5, 0. , 1. ],  # noqa
                 [1. , 0. , 1. ],  # noqa
                 [1. , 0. , 0.5]], dtype=np.float32))  # noqa
    # yapf: enable


def test_flow_from_bytes():
    data_dir = osp.join(osp.dirname(__file__), '../data')
    flow_shape = (60, 80, 2)
    flow_file = osp.join(data_dir, 'optflow.flo')

    # read .flo file
    flow_fromfile = mmcv.flowread(flow_file)

    with open(flow_file, 'rb') as f:
        flow_bytes = f.read()
    flow_frombytes = mmcv.flow_from_bytes(flow_bytes)

    assert flow_frombytes.shape == flow_shape
    assert np.all(flow_frombytes == flow_fromfile)


def test_sparse_flow_from_bytes():
    data_dir = osp.join(osp.dirname(__file__), '../data')
    flow_file = osp.join(data_dir, 'sparse_flow.png')

    with open(flow_file, 'rb') as f:
        flow_bytes = f.read()
    # read flow from bytes
    flow_frombytes, valid_frombytes = mmcv.sparse_flow_from_bytes(flow_bytes)

    # test flow shape is [H, W, 2] and valid shape is [H, W]
    assert flow_frombytes.shape[:2] == valid_frombytes.shape
    assert flow_frombytes.shape[2] == 2

    def read_sparse_flow_from_file():
        flow = cv2.imread(flow_file, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR)
        flow = flow[:, :, ::-1].astype(np.float32)
        flow, valid = flow[:, :, :2], flow[:, :, 2]
        flow = (flow - 2**15) / 64.0
        return flow, valid

    # read flow from file
    flow_flowfile, valid_fromfile = read_sparse_flow_from_file()

    assert np.all(flow_frombytes == flow_flowfile)
    assert np.all(valid_frombytes == valid_fromfile)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_video/test_processing.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import platform
import tempfile

import pytest

import mmcv


class TestVideoEditor:

    @classmethod
    def setup_class(cls):
        cls.video_path = osp.join(osp.dirname(__file__), '../data/test.mp4')
        cls.num_frames = 168

    @pytest.mark.skipif(platform.system() == 'Windows', reason='skip windows')
    def test_cut_concat_video(self):
        part1_file = osp.join(tempfile.gettempdir(), '.mmcv_test1.mp4')
        part2_file = osp.join(tempfile.gettempdir(), '.mmcv_test2.mp4')
        mmcv.cut_video(self.video_path, part1_file, end=3, vcodec='h264')
        mmcv.cut_video(self.video_path, part2_file, start=3, vcodec='h264')
        v1 = mmcv.VideoReader(part1_file)
        v2 = mmcv.VideoReader(part2_file)
        assert len(v1) == 75
        assert len(v2) == self.num_frames - 75

        out_file = osp.join(tempfile.gettempdir(), '.mmcv_test.mp4')
        mmcv.concat_video([part1_file, part2_file], out_file)
        v = mmcv.VideoReader(out_file)
        assert len(v) == self.num_frames
        os.remove(part1_file)
        os.remove(part2_file)
        os.remove(out_file)

    @pytest.mark.skipif(platform.system() == 'Windows', reason='skip windows')
    def test_resize_video(self):
        out_file = osp.join(tempfile.gettempdir(), '.mmcv_test.mp4')
        mmcv.resize_video(
            self.video_path, out_file, (200, 100), log_level='panic')
        v = mmcv.VideoReader(out_file)
        assert v.resolution == (200, 100)
        os.remove(out_file)
        mmcv.resize_video(self.video_path, out_file, ratio=2)
        v = mmcv.VideoReader(out_file)
        assert v.resolution == (294 * 2, 240 * 2)
        os.remove(out_file)
        mmcv.resize_video(self.video_path, out_file, (1000, 480), keep_ar=True)
        v = mmcv.VideoReader(out_file)
        assert v.resolution == (294 * 2, 240 * 2)
        os.remove(out_file)
        mmcv.resize_video(
            self.video_path, out_file, ratio=(2, 1.5), keep_ar=True)
        v = mmcv.VideoReader(out_file)
        assert v.resolution == (294 * 2, 360)
        os.remove(out_file)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_video/test_reader.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import shutil
import tempfile
from collections import OrderedDict

import pytest

import mmcv


class TestCache:

    def test_init(self):
        with pytest.raises(ValueError):
            mmcv.Cache(0)
        cache = mmcv.Cache(100)
        assert cache.capacity == 100
        assert cache.size == 0

    def test_put(self):
        cache = mmcv.Cache(3)
        for i in range(1, 4):
            cache.put(f'k{i}', i)
            assert cache.size == i
        assert cache._cache == OrderedDict([('k1', 1), ('k2', 2), ('k3', 3)])
        cache.put('k4', 4)
        assert cache.size == 3
        assert cache._cache == OrderedDict([('k2', 2), ('k3', 3), ('k4', 4)])
        cache.put('k2', 2)
        assert cache._cache == OrderedDict([('k2', 2), ('k3', 3), ('k4', 4)])

    def test_get(self):
        cache = mmcv.Cache(3)
        assert cache.get('key_none') is None
        assert cache.get('key_none', 0) == 0
        cache.put('k1', 1)
        assert cache.get('k1') == 1


class TestVideoReader:

    @classmethod
    def setup_class(cls):
        cls.video_path = osp.join(osp.dirname(__file__), '../data/test.mp4')
        cls.num_frames = 168
        cls.video_url = 'https://www.learningcontainer.com/wp-content/uploads/2020/05/sample-mp4-file.mp4'  # noqa: E501

    def test_load(self):
        # read from video file
        v = mmcv.VideoReader(self.video_path)
        assert v.width == 294
        assert v.height == 240
        assert v.fps == 25
        assert v.frame_cnt == self.num_frames
        assert len(v) == self.num_frames
        assert v.opened
        import cv2
        assert isinstance(v.vcap, type(cv2.VideoCapture()))

        # read from video url
        v = mmcv.VideoReader(self.video_url)
        assert v.width == 320
        assert v.height == 240
        assert v.fps == 15
        assert v.frame_cnt == 1889
        assert len(v) == 1889
        assert v.opened
        assert isinstance(v.vcap, type(cv2.VideoCapture()))

    def test_read(self):
        v = mmcv.VideoReader(self.video_path)
        img = v.read()
        assert int(round(img.mean())) == 94
        img = v.get_frame(63)
        assert int(round(img.mean())) == 94
        img = v[64]
        assert int(round(img.mean())) == 205
        img = v[-104]
        assert int(round(img.mean())) == 205
        img = v[63]
        assert int(round(img.mean())) == 94
        img = v[-105]
        assert int(round(img.mean())) == 94
        img = v.read()
        assert int(round(img.mean())) == 205
        with pytest.raises(IndexError):
            v.get_frame(self.num_frames + 1)
        with pytest.raises(IndexError):
            v[-self.num_frames - 1]

    def test_slice(self):
        v = mmcv.VideoReader(self.video_path)
        imgs = v[-105:-103]
        assert int(round(imgs[0].mean())) == 94
        assert int(round(imgs[1].mean())) == 205
        assert len(imgs) == 2
        imgs = v[63:65]
        assert int(round(imgs[0].mean())) == 94
        assert int(round(imgs[1].mean())) == 205
        assert len(imgs) == 2
        imgs = v[64:62:-1]
        assert int(round(imgs[0].mean())) == 205
        assert int(round(imgs[1].mean())) == 94
        assert len(imgs) == 2
        imgs = v[:5]
        assert len(imgs) == 5
        for img in imgs:
            assert int(round(img.mean())) == 94
        imgs = v[165:]
        assert len(imgs) == 3
        for img in imgs:
            assert int(round(img.mean())) == 0
        imgs = v[-3:]
        assert len(imgs) == 3
        for img in imgs:
            assert int(round(img.mean())) == 0

    def test_current_frame(self):
        v = mmcv.VideoReader(self.video_path)
        assert v.current_frame() is None
        v.read()
        img = v.current_frame()
        assert int(round(img.mean())) == 94

    def test_position(self):
        v = mmcv.VideoReader(self.video_path)
        assert v.position == 0
        for _ in range(10):
            v.read()
        assert v.position == 10
        v.get_frame(99)
        assert v.position == 100

    def test_iterator(self):
        cnt = 0
        for img in mmcv.VideoReader(self.video_path):
            cnt += 1
            assert img.shape == (240, 294, 3)
        assert cnt == self.num_frames

    def test_with(self):
        with mmcv.VideoReader(self.video_path) as v:
            assert v.opened
        assert not v.opened

    def test_cvt2frames(self):
        v = mmcv.VideoReader(self.video_path)
        frame_dir = tempfile.mkdtemp()
        v.cvt2frames(frame_dir)
        assert osp.isdir(frame_dir)
        for i in range(self.num_frames):
            filename = f'{frame_dir}/{i:06d}.jpg'
            assert osp.isfile(filename)
            os.remove(filename)

        v = mmcv.VideoReader(self.video_path)
        v.cvt2frames(frame_dir, show_progress=False)
        assert osp.isdir(frame_dir)
        for i in range(self.num_frames):
            filename = f'{frame_dir}/{i:06d}.jpg'
            assert osp.isfile(filename)
            os.remove(filename)

        v = mmcv.VideoReader(self.video_path)
        v.cvt2frames(
            frame_dir,
            file_start=100,
            filename_tmpl='{:03d}.JPEG',
            start=100,
            max_num=20)
        assert osp.isdir(frame_dir)
        for i in range(100, 120):
            filename = f'{frame_dir}/{i:03d}.JPEG'
            assert osp.isfile(filename)
            os.remove(filename)
        shutil.rmtree(frame_dir)

    def test_frames2video(self):
        v = mmcv.VideoReader(self.video_path)
        frame_dir = tempfile.mkdtemp()
        v.cvt2frames(frame_dir)
        assert osp.isdir(frame_dir)
        for i in range(self.num_frames):
            filename = f'{frame_dir}/{i:06d}.jpg'
            assert osp.isfile(filename)

        out_filename = osp.join(tempfile.gettempdir(), 'mmcv_test.avi')
        mmcv.frames2video(frame_dir, out_filename)
        v = mmcv.VideoReader(out_filename)
        assert v.fps == 30
        assert len(v) == self.num_frames

        mmcv.frames2video(
            frame_dir,
            out_filename,
            fps=25,
            start=10,
            end=50,
            show_progress=False)

        with mmcv.VideoReader(out_filename) as v:
            assert v.fps == 25
            assert len(v) == 40

            for i in range(self.num_frames):
                filename = f'{frame_dir}/{i:06d}.jpg'
                os.remove(filename)
            shutil.rmtree(frame_dir)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/mmcv/tests/test_visualization.py
================================================
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import pytest

import mmcv


def test_color():
    assert mmcv.color_val(mmcv.Color.blue) == (255, 0, 0)
    assert mmcv.color_val('green') == (0, 255, 0)
    assert mmcv.color_val((1, 2, 3)) == (1, 2, 3)
    assert mmcv.color_val(100) == (100, 100, 100)
    assert mmcv.color_val(np.zeros(3, dtype=int)) == (0, 0, 0)
    with pytest.raises(TypeError):
        mmcv.color_val([255, 255, 255])
    with pytest.raises(TypeError):
        mmcv.color_val(1.0)
    with pytest.raises(AssertionError):
        mmcv.color_val((0, 0, 500))


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/common/dataset.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import torch.utils.data as data
import torch
import h5py
import cv2
import numpy as np

class Dataset_Pro(data.Dataset):
    def __init__(self, file_path, img_scale):
        super(Dataset_Pro, self).__init__()

        data = h5py.File(file_path)  # NxCxHxW = 0x1x2x3

        print(f"loading Dataset_Pro: {file_path} with {img_scale}")
        # tensor type:
        gt1 = data["gt"][...]  # convert to np tpye for CV2.filter
        gt1 = np.array(gt1, dtype=np.float32) / img_scale
        self.gt = torch.from_numpy(gt1)  # NxCxHxW:

        ms1 = data["ms"][...]  # convert to np tpye for CV2.filter
        ms1 = np.array(ms1, dtype=np.float32) / img_scale

        self.ms = torch.from_numpy(ms1)

        lms1 = data["lms"][...]  # convert to np tpye for CV2.filter
        lms1 = np.array(lms1, dtype=np.float32) / img_scale
        self.lms = torch.from_numpy(lms1)


        pan1 = data['pan'][...]  # Nx1xHxW
        pan1 = np.array(pan1, dtype=np.float32) / img_scale # Nx1xHxW
        self.pan = torch.from_numpy(pan1)  # Nx1xHxW:

        if 'valid' in file_path:
            self.gt = self.gt.permute([0, 2, 3, 1])

        print(pan1.shape, lms1.shape, gt1.shape, ms1.shape)
    #####必要函数
    def __getitem__(self, index):
        return {'gt':self.gt[index, :, :, :].float(),
               'lms':self.lms[index, :, :, :].float(),
               'ms':self.ms[index, :, :, :].float(),
               'pan':self.pan[index, :, :, :].float()}

            #####必要函数
    def __len__(self):
        return self.gt.shape[0]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/common/dataset_hp.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import torch.utils.data as data
import torch
import h5py
import cv2
import numpy as np


def get_edge(data):  # for training: HxWxC
    rs = np.zeros_like(data)
    N = data.shape[0]
    for i in range(N):
        if len(data.shape) == 3:
            rs[i, :, :] = data[i, :, :] - cv2.boxFilter(data[i, :, :], -1, (5, 5))
        else:
            rs[i, :, :, :] = data[i, :, :, :] - cv2.boxFilter(data[i, :, :, :], -1, (5, 5))
    return rs


class Dataset_Pro(data.Dataset):
    def __init__(self, file_path, img_scale):
        super(Dataset_Pro, self).__init__()
        data = h5py.File(file_path)  # NxCxHxW = 0x1x2x3=8806x8x64x64

        # tensor type:
        gt1 = data["gt"][...]  # convert to np tpye for CV2.filter
        gt1 = np.array(gt1, dtype=np.float32) / img_scale
        self.gt = torch.from_numpy(gt1)  # NxCxHxW:

        lms1 = data["lms"][...]  # convert to np tpye for CV2.filter
        lms1 = np.array(lms1, dtype=np.float32) / img_scale
        self.lms = torch.from_numpy(lms1)

        ms1 = data["ms"][...]  # NxCxHxW=0,1,2,3
        ms1 = np.array(ms1.transpose(0, 2, 3, 1), dtype=np.float32) / img_scale  # NxHxWxC
        ms1_tmp = get_edge(ms1)  # NxHxWxC
        self.ms_hp = torch.from_numpy(ms1_tmp).permute(0, 3, 1, 2)  # NxCxHxW:

        pan1 = data['pan'][...]  # Nx1xHxW
        pan1 = np.array(pan1.transpose(0, 2, 3, 1), dtype=np.float32) / img_scale  # NxHxWx1
        pan1 = np.squeeze(pan1, axis=3)  # NxHxW
        pan_hp_tmp = get_edge(pan1)  # NxHxW
        pan_hp_tmp = np.expand_dims(pan_hp_tmp, axis=3)  # NxHxWx1
        self.pan_hp = torch.from_numpy(pan_hp_tmp).permute(0, 3, 1, 2)  # Nx1xHxW:
        print(
            f"gt: {self.gt.size()}, lms: {self.lms.size()}, pan_hp: {self.pan_hp.size()}, ms_hp: {self.ms_hp.size()} with {img_scale}")

    #####必要函数
    def __getitem__(self, index):
        return {'gt': self.gt[index, :, :, :].float(),
                'lms': self.lms[index, :, :, :].float(),
                'ms_hp': self.ms_hp[index, :, :, :].float(),
                'pan_hp': self.pan_hp[index, :, :, :].float()}

        #####必要函数

    def __len__(self):
        return self.gt.shape[0]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/common/evaluate.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import math
import torch
import torch.nn.functional as F
import numpy as np


def q2n(gt, x, q_blocks_size, q_shift):
    '''
    '''
    if isinstance(gt, torch.Tensor):
        gt = gt.cpu().numpy()
        x = x.cpu().numpy()

    N, N1, N2, N3 = gt.shape  # 255 255 8
    size2 = q_blocks_size  # 32

    stepx = math.ceil(N1 / q_shift)  # 8
    stepy = math.ceil(N2 / q_shift)  # 8

    if stepy <= 0:
        stepy = 1
        stepx = 1

    est1 = (stepx - 1) * q_shift + q_blocks_size - N1  # 1
    est2 = (stepy - 1) * q_shift + q_blocks_size - N2  # 1
    # if np.sum(np.array([est1 != 0, est2 != 0])) > 0:
    # refref = np.zeros(shape=[N1+1, N2+1])
    # fusfus = refref.copy()

    for i in range(N3):
        a1 = gt[..., 0]

        ia1 = np.zeros(shape=[N, N1 + est1, N2 + est2])
        ia1[:, : N1, : N2] = a1
        ia1[:, :, N2:N2 + est2] = ia1[:, :, N2 - 1:-1:N2 - est2 + 1]
        ia1[:, N1:N1 + est1, ...] = ia1[:, N1 - 1:-1:N1 - est1 + 1, ...]
        if i == 0:
            refref = ia1[..., np.newaxis]  # np.concatenate(refref, ia1, axis=3)
        else:
            refref = np.concatenate([refref, ia1[..., np.newaxis]], axis=-1)
        if i < N3:
            gt = gt[..., 1:]

    gt = refref

    for i in range(N3):

        a2 = x[..., 0]
        ia2 = np.zeros(shape=[N, N1 + est1, N2 + est2])
        ia2[:, : N1, : N2] = a2
        ia2[:, :, N2:N2 + est2] = ia2[:, :, N2 - 1:-1:N2 - est2 + 1]
        ia2[:, N1:N1 + est1, ...] = ia2[:, N1 - 1:-1:N1 - est1 + 1, ...]
        if i == 0:
            fusfus = ia2[..., np.newaxis]  # np.concatenate(refref, ia1, axis=3)
        else:
            fusfus = np.concatenate([fusfus, ia2[..., np.newaxis]], axis=-1)

        if i < N3:
            x = x[..., 1:]
    x = fusfus

    x = np.array(x, dtype=np.uint16)
    gt = np.array(gt, dtype=np.uint16)

    _, N1, N2, N3 = gt.shape

    if math.ceil(math.log2(N3)) - math.log2(N3) != 0:
        Ndif = pow(2, math.ceil(math.log2(N3))) - N3
        dif = np.zeros(shape=[N, N1, N2, Ndif], dtype=np.uint16)
        gt = np.concatenate(gt, dif, axis=-1)
        x = np.concatenate(x, dif, axis=-1)

    _, _, _, N3 = gt.shape

    valori = np.zeros(shape=[N, stepx, stepy, N3])

    for j in range(stepx):
        for i in range(stepy):
            o = onions_quality(gt[:, j * q_shift:j * q_shift + q_blocks_size,
                               i * q_shift: i * q_shift + size2, :],
                               x[:, j * q_shift:j * q_shift + q_blocks_size,
                               i * q_shift: i * q_shift + size2, :],
                               q_blocks_size)
            valori[:, j, i, :] = o
    q2n_idx_map = np.sqrt(np.sum(valori ** 2, axis=-1))
    # q2n_index = np.mean(q2n_idx_map)
    return q2n_idx_map


def norm_blocco(x, eps=1e-8):
    a = x.mean()
    c = x.std()
    if c == 0:
        c = eps
    return (x - a) / c + 1, a, c


def onions_quality(dat1, dat2, size1):
    dat1 = np.float64(dat1)
    dat2 = np.float64(dat2)

    dat2 = np.concatenate([dat2[..., 0, np.newaxis], -dat2[..., 1:]], axis=-1)
    N, _, _, N3 = dat1.shape
    size2 = size1

    for i in range(N3):
        a1, s, t = norm_blocco(np.squeeze(dat1[..., i]))
        # print(s,t)
        dat1[..., i] = a1
        if s == 0:
            if i == 0:
                dat2[..., i] = dat2[..., i] - s + 1
            else:
                dat2[..., i] = -(-dat2[..., i] - s + 1)
        else:
            if i == 0:
                dat2[..., i] = ((dat2[..., i] - s) / t) + 1
            else:
                dat2[..., i] = -(((-dat2[..., i] - s) / t) + 1)
    m1 = np.zeros(shape=[N, N3])
    m2 = m1.copy()

    mod_q1m = 0
    mod_q2m = 0
    mod_q1 = np.zeros(shape=[size1, size2])
    mod_q2 = np.zeros(shape=[size1, size2])

    for i in range(N3):
        m1[..., i] = np.mean(np.squeeze(dat1[..., i]))
        m2[..., i] = np.mean(np.squeeze(dat2[..., i]))
        mod_q1m += m1[..., i] ** 2
        mod_q2m += m2[..., i] ** 2
        mod_q1 += np.squeeze(dat1[..., i]) ** 2
        mod_q2 += np.squeeze(dat2[..., i]) ** 2

    mod_q1m = np.sqrt(mod_q1m)
    mod_q2m = np.sqrt(mod_q2m)
    mod_q1 = np.sqrt(mod_q1)
    mod_q2 = np.sqrt(mod_q2)

    termine2 = mod_q1m * mod_q2m  # 7.97
    termine4 = mod_q1m ** 2 + mod_q2m ** 2  #
    int1 = (size1 * size2) / (size1 * size2 - 1) * np.mean(mod_q1 ** 2)
    int2 = (size1 * size2) / (size1 * size2 - 1) * np.mean(mod_q2 ** 2)
    termine3 = int1 + int2 - (size1 * size2) / ((size1 * size2 - 1)) * (mod_q1m ** 2 + mod_q2m ** 2)  # 17.8988  ** 2
    mean_bias = 2 * termine2 / termine4  # 1
    if termine3 == 0:
        q = np.zeros(shape=[N, 1, N3])
        q[:, :, N3 - 1] = mean_bias
    else:
        cbm = 2 / termine3
        # 32 32 8
        qu = onion_mult2D(dat1, dat2)
        qm = onion_mult(m1.reshape(-1), m2.reshape(-1))
        qv = np.zeros(shape=[N, N3])
        for i in range(N3):
            qv[..., i] = (size1 * size2) / ((size1 * size2) - 1) * np.mean(np.squeeze(qu[:, :, i]))
        q = qv - (size1 * size2) / ((size1 * size2) - 1) * qm
        q = q * mean_bias * cbm
    return q


def onion_mult2D(onion1, onion2):
    _, _, _, N3 = onion1.shape

    if N3 > 1:
        L = N3 // 2
        a = onion1[..., : L]
        b = onion1[..., L:]
        b = np.concatenate([b[..., 0, np.newaxis], -b[..., 1:]], axis=-1)
        c = onion2[..., : L]
        d = onion2[..., L:]
        d = np.concatenate([d[..., 0, np.newaxis], -d[..., 1:]], axis=-1)

        if N3 == 2:
            ris = np.concatenate([a * c - d * b, a * d + c * b], axis=-1)
        else:
            ris1 = onion_mult2D(a, c)
            ris2 = onion_mult2D(d, np.concatenate([b[..., 0, np.newaxis], -b[..., 1:]], axis=-1))
            ris3 = onion_mult2D(np.concatenate([a[..., 0, np.newaxis], -a[..., 1:]], axis=-1), d)
            ris4 = onion_mult2D(c, b)

            aux1 = ris1 - ris2
            aux2 = ris3 + ris4

            ris = np.concatenate([aux1, aux2], axis=-1)
    else:
        ris = onion1 * onion2
    return ris


def onion_mult(onion1, onion2):
    # _, N = onion1.shape
    N = len(onion1)
    if N > 1:

        L = N // 2
        a = onion1[:L]
        b = onion1[L:]
        # b[1:] = -b[1:]
        b = np.append(np.array(b[0]), -b[1:])
        c = onion2[:L]
        d = onion2[L:]
        # d[1:] = -d[1:]
        d = np.append(np.array(d[0]), -d[1:])

        if N == 2:
            ris = np.append(a * c - d * b, a * d + c * b)
        else:

            ris1 = onion_mult(a, c)
            # b[1:] = -b[1:]
            ris2 = onion_mult(d, np.append(np.array(b[0]), -b[1:]))
            # a[1:] = -a[1:]
            ris3 = onion_mult(np.append(np.array(a[0]), -a[1:]), d)
            ris4 = onion_mult(c, b)

            aux1 = ris1 - ris2
            aux2 = ris3 + ris4
            ris = np.append(aux1, aux2)
    else:
        ris = np.array(onion1).reshape(-1) * np.array(onion2).reshape(-1)
    return ris


def compute_index(img_base, img_out, ratio):
    h = img_out.shape[0]
    w = img_out.shape[1]
    chanel = img_out.shape[2]
    # 计算SAM
    sum1 = torch.sum(img_base * img_out, 2)
    sum2 = torch.sum(img_base * img_base, 2)
    sum3 = torch.sum(img_out * img_out, 2)
    t = (sum2 * sum3) ** 0.5
    numlocal = torch.gt(t, 0)
    num = torch.sum(numlocal)
    t = sum1 / t
    angle = torch.acos(t)
    sumangle = torch.where(torch.isnan(angle), torch.full_like(angle, 0), angle).sum()
    if num == 0:
        averangle = sumangle
    else:
        averangle = sumangle / num
    SAM = averangle * 180 / 3.14159256

    # 计算ERGAS
    summ = 0
    for i in range(chanel):
        a1 = torch.mean((img_base[:, :, i] - img_out[:, :, i]) ** 2)
        m1 = torch.mean(img_base[:, :, i])
        a2 = m1 * m1
        summ = summ + a1 / a2
    ERGAS = 100 * (1 / ratio) * ((summ / chanel) ** 0.5)

    return SAM, ERGAS


import decimal

decimal.getcontext().rounding = "ROUND_HALF_UP"
n_digits = 6


def analysis_accu(img_base, img_out, ratio, flag_cut_bounds=True, dim_cut=21, choices=4):
    if flag_cut_bounds:
        img_base = img_base[dim_cut - 1:-dim_cut, dim_cut - 1:-dim_cut, :]  #:
        img_out = img_out[dim_cut - 1:-dim_cut, dim_cut - 1:-dim_cut, :]  #:

    # q2n
    # q2n_index = q2n(img_base, img_out, q_blocks_size=32, q_shift=32)

    h = img_out.shape[0]
    w = img_out.shape[1]
    chanel = img_out.shape[2]

    # 计算SAM
    sum1 = torch.sum(img_base * img_out, 2)
    sum2 = torch.sum(img_base * img_base, 2)
    sum3 = torch.sum(img_out * img_out, 2)
    t = (sum2 * sum3) ** 0.5
    numlocal = torch.gt(t, 0)
    num = torch.sum(numlocal)
    t = sum1 / t
    angle = torch.acos(t)
    sumangle = torch.where(torch.isnan(angle), torch.full_like(angle, 0), angle).sum()
    if num == 0:
        averangle = sumangle
    else:
        averangle = sumangle / num

    # you can adopt https://segmentfault.com/a/1190000018929994 to compute, too.
    # averangle = math.ceil(averangle * 1000000) / 1000000
    averangle = (averangle * 10 ** n_digits).round() / (10 ** n_digits)
    # SAM = decimal.Decimal(averangle.cpu().numpy() * 180 / 3.14159256).quantize(decimal.Decimal("0.00000"))
    SAM = averangle * 180 / 3.14159256

    # 计算ERGAS
    summ = 0
    for i in range(chanel):
        a1 = torch.mean((img_base[:, :, i] - img_out[:, :, i]) ** 2)
        m1 = torch.mean(img_base[:, :, i])
        a2 = m1 * m1
        summ = summ + a1 / a2
    ERGAS = 100 * (1 / ratio) * ((summ / chanel) ** 0.5)

    # 计算PSNR
    # mse = torch.mean((img_base - img_out) ** 2, 0)
    # mse = torch.mean(mse, 0)
    # rmse = mse ** 0.5
    # temp = torch.log(1 / rmse) / math.log(10)
    PSNR = 10 * torch.log10(math.pow(1.0, 2) / torch.mean((img_out-img_base)**2, [0, 1]))

    # SSIM
    # img_base = img_base.permute(2, 0, 1)
    # img_out = img_out.permute(2, 0, 1)
    # img_base = img_base.unsqueeze(0)
    # img_out = img_out.unsqueeze(0)
    # SSIM = _ssim(img_base.permute(2, 0, 1).unsqueeze(0), img_out.permute(2, 0, 1).unsqueeze(0))

    # index = torch.zeros((5, chanel + 1))
    # index[0, 1:chanel + 1] = CC
    # index[1, 1:chanel + 1] = PSNR
    # index[2, 1:chanel + 1] = SSIM
    # index[0, 0] = torch.mean(CC)
    # index[1, 0] = torch.mean(PSNR)
    # index[2, 0] = torch.mean(SSIM)
    # index[3, 0] = SAM
    # index[4, 0] = ERGAS

    PSNR = torch.mean(PSNR)
    # SSIM = torch.mean(SSIM)
    # q2n_index = np.mean(q2n_index)

    if choices == 5:
        # 计算CC
        C1 = torch.sum(torch.sum(img_base * img_out, 0), 0) - h * w * (
                torch.mean(torch.mean(img_base, 0), 0) * torch.mean(torch.mean(img_out, 0), 0))
        C2 = torch.sum(torch.sum(img_out ** 2, 0), 0) - h * w * (torch.mean(torch.mean(img_out, 0), 0) ** 2)
        C3 = torch.sum(torch.sum(img_base ** 2, 0), 0) - h * w * (torch.mean(torch.mean(img_base, 0), 0) ** 2)
        CC = C1 / ((C2 * C3) ** 0.5)
        CC = torch.mean(CC)
        return {'SAM': SAM, 'ERGAS': ERGAS, 'PSNR': PSNR, 'CC': CC}  # , q2n_index

    return {'SAM': SAM, 'ERGAS': ERGAS, 'PSNR': PSNR, }


def _ssim(img1, img2):
    img1 = img1.float()
    img2 = img2.float()

    channel = img1.shape[1]
    max_val = 1
    _, c, w, h = img1.size()
    window_size = min(w, h, 11)
    sigma = 1.5 * window_size / 11
    window = create_window(window_size, sigma, channel).cuda()
    mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
    mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)

    mu1_sq = mu1.pow(2)
    mu2_sq = mu2.pow(2)
    mu1_mu2 = mu1 * mu2

    sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
    sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
    sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
    C1 = (0.01 * max_val) ** 2
    C2 = (0.03 * max_val) ** 2
    V1 = 2.0 * sigma12 + C2
    V2 = sigma1_sq + sigma2_sq + C2
    ssim_map = ((2 * mu1_mu2 + C1) * V1) / ((mu1_sq + mu2_sq + C1) * V2)
    t = ssim_map.shape
    return ssim_map.mean(2).mean(2)


from torch.autograd import Variable


def gaussian(window_size, sigma):
    gauss = torch.Tensor([math.exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
    return gauss / gauss.sum()


def create_window(window_size, sigma, channel):
    _1D_window = gaussian(window_size, sigma).unsqueeze(1)
    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
    window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
    return window


def compare_index(A):
    A_size = A.shape
    ite_n = A_size[2]
    band_n = A_size[1]
    C_better = A[:, 0, 0]
    ind = 0
    for i in range(ite_n):
        score_b = 0
        score_c = 0
        C_compare = A[:, 0, i]
        if (C_better[0] > C_compare[0]):
            score_b = score_b + 1
        else:
            score_c = score_c + 1
        if (C_better[1] > C_compare[1]):
            score_b = score_b + 1
        else:
            score_c = score_c + 1
        if (C_better[2] > C_compare[2]):
            score_b = score_b + 1
        else:
            score_c = score_c + 1
        if (C_better[3] < C_compare[3]):
            score_b = score_b + 1
        else:
            score_c = score_c + 1
        if (C_better[4] < C_compare[4]):
            score_b = score_b + 1
        else:
            score_c = score_c + 1

        if (score_c > score_b):
            C_better = A[:, 0, i]
            ind = i

    C_best = A[:, :, ind]
    best_ind = ind + 1
    return C_best, best_ind


if __name__ == "__main__":
    # a = np.zeros(shape=[256, 256])
    # print(a[:255, :255].shape)
    from scipy import io as sio
    ms = sio.loadmat('../../tests/I_MS.mat')['I_MS'] / 2047.0
    gt = sio.loadmat('../../tests/I_GT.mat')['I_GT'] / 2047.0
    ms = torch.from_numpy(ms).float() #* 2047.0
    gt = torch.from_numpy(gt).float() #* 2047.0
    print(analysis_accu(ms, gt, ratio=4, dim_cut=21))

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/common/psdata.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import glob
import torch
from torch.utils.data import DataLoader


class PansharpeningSession():
    def __init__(self, args):
        self.dataloaders = {}
        self.samples_per_gpu = args.samples_per_gpu
        self.workers_per_gpu = args.workers_per_gpu
        # self.patch_size = args.patch_size
        self.writers = {}
        self.args = args


    def get_dataloader(self, dataset_name, distributed):

        if any(list(map(lambda x: x in dataset_name, ['wv2', 'wv3', 'wv4', 'qb']))):
            if "hp" in dataset_name:
                # high-pass filter
                from UDL.pansharpening.common.dataset_hp import Dataset_Pro
                dataset_name = dataset_name.split('_')[0] #'wv2_hp'
                dataset = Dataset_Pro('/'.join([self.args.data_dir, 'training_data', f'train_{dataset_name}_10000.h5']), img_scale=self.args.img_range)
            else:

                from UDL.pansharpening.common.dataset import Dataset_Pro
                dataset = Dataset_Pro('/'.join([self.args.data_dir, 'training_data', f'train_{dataset_name}_10000.h5']), img_scale=self.args.img_range)

        else:
            print(f"train_{dataset_name} is not supported.")
            raise NotImplementedError


        sampler = None
        if distributed:
            sampler = torch.utils.data.distributed.DistributedSampler(dataset)

        # if not dataset_name in self.dataloaders:
        dataloaders = \
            DataLoader(dataset, batch_size=self.samples_per_gpu,
                       persistent_workers=(True if self.workers_per_gpu > 0 else False), pin_memory=True,
                       shuffle=(sampler is None), num_workers=self.workers_per_gpu, drop_last=True, sampler=sampler)

        return dataloaders, sampler

    def get_eval_dataloader(self, dataset_name, distributed):

        if 'valid' in dataset_name:
            if "hp" in dataset_name:
                from UDL.pansharpening.common.dataset_hp import Dataset_Pro
                dataset = Dataset_Pro(
                    '/'.join([self.args.data_dir, 'validation_data', f'{dataset_name}.h5']), img_scale=self.args.img_range)

            else:
                from UDL.pansharpening.common.dataset import Dataset_Pro
                dataset = Dataset_Pro('/'.join([self.args.data_dir, 'validation_data', f'{dataset_name}.h5']), img_scale=self.args.img_range)

        elif 'TestData' in dataset_name:
            if 'hp' in dataset_name:
                satellite = dataset_name.split('_')[-2]
            else:
                satellite = dataset_name.split('_')[-1]

            from UDL.pansharpening.evaluation.ps_evaluate import MultiExmTest_h5
            dataset = MultiExmTest_h5('/'.join([self.args.data_dir, 'test_data', satellite.lower(), f"{dataset_name.replace('_hp', '')}.h5"]),
                                      dataset_name, img_scale=self.args.img_range)

        elif 'RR' in dataset_name or 'FR' in dataset_name:
            splits = dataset_name.split('_')
            if 'hp' in dataset_name:
                satellite = splits[-3]
            else:
                satellite = splits[-2]

            from UDL.pansharpening.evaluation.ps_evaluate import SingleDataset

            dataset = SingleDataset(['/'.join([self.args.data_dir, 'test_data', satellite.lower(),
                                               dataset_name.replace('_hp', '')+".mat"])], dataset_name, img_scale=self.args.img_range)


        else:
            print(f"{dataset_name} is not supported.")
            raise NotImplementedError

        sampler = None
        if distributed:
            sampler = torch.utils.data.distributed.DistributedSampler(dataset)

        # if not dataset_name in self.dataloaders:
        dataloaders = \
            DataLoader(dataset, batch_size=1,
                       shuffle=False, num_workers=1, drop_last=False, sampler=sampler)
        return dataloaders, sampler


if __name__ == '__main__':
    # from option import args
    import argparse
    parser = argparse.ArgumentParser()

    args = parser.parse_args()
    args.samples_per_gpu = 8
    args.workers_per_gpu = 0
    args.data_dir = "C:/Datasets/pansharpening_2"
    args.dataset = 'gf2'

    # survey
    # wv3 9714 16-64
    # wv2 15084 16-64
    # gf2 19809 16-64
    # qb  17139 16-64
    sess = PansharpeningSession(args)
    train_loader, _ = sess.get_test_dataloader(args.dataset, False)
    print(len(train_loader))

    # import scipy.io as sio
    #
    # x = sio.loadmat("D:/Datasets/pansharpening/training_data/train1.mat")
    # print(x.keys())


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/__init__.py
================================================
# from .models.builder import PANSHARPENING_MODELS, build_model
from importlib import import_module
import os
join = os.path.join
dirname = os.path.dirname

pkg_list = [import_module('.' + pkg.replace('.py', ''), package="UDL.pansharpening.configs")
            for pkg in os.listdir(dirname(__file__)) if '.py' in pkg]
del pkg_list

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/hook_configs.py
================================================
# checkpoint saving
# checkpoint_config = dict(interval=1)
checkpoint_config = dict(type='ModelCheckpoint', indicator='loss')
# yapf:disable
log_config = dict(
    interval=100,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable

# dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = "D:/ProjectSets/NDA/UDL/UDL/results/pansharpening/wv3/FusionNet/Test/model_2022-04-02-12-02-55/275.pth.tar"
resume_from = "D:/ProjectSets/NDA/UDL/UDL/results/pansharpening/wv3/FusionNet/Test/model_2022-04-02-12-02-55/275.pth.tar"
workflow = [('train', 1)]

# optimizer
optimizer = dict(type='Adam', lr=3e-4)
optimizer_config = dict(grad_clip=None)
lr_config = None
# learning policy
runner = dict(type='EpochBasedRunner', max_epochs=275)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_bdpn.py
================================================
import argparse
from UDL.AutoDL import TaskDispatcher
import os

class parser_args(TaskDispatcher, name='BDPN'):
    def __init__(self, cfg=None):
        super(parser_args, self).__init__()
        if cfg is None:
            from UDL.Basis.option import panshaprening_cfg
            cfg = panshaprening_cfg()

        script_path = os.path.dirname(os.path.dirname(__file__))
        root_dir = script_path.split(cfg.task)[0]

        model_path = f'{root_dir}/results/{cfg.task}/wv3/BDPN/Test/.pth.tar'

        parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training')
        # * Logger
        parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}',
                            help='path to save model')
        # * Training
        parser.add_argument('--lr', default=0.0001, type=float)  # 1e-4 2e-4 8
        parser.add_argument('--lr_scheduler', default=True, type=bool)
        parser.add_argument('--samples_per_gpu', default=8, type=int,  # 8
                            metavar='N', help='mini-batch size (default: 256)')
        parser.add_argument('--print-freq', '-p', default=50, type=int,
                            metavar='N', help='print frequency (default: 10)')
        parser.add_argument('--epochs', default=1000, type=int)
        parser.add_argument('--workers_per_gpu', default=0, type=int)
        parser.add_argument('--resume_from',
                            default=model_path,
                            type=str, metavar='PATH',
                            help='path to latest checkpoint (default: none)')
        # * Model and Dataset
        parser.add_argument('--arch', '-a', metavar='ARCH', default='BDPN', type=str,
                            choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet'])
        parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str,
                            choices=[None, 'wv2', 'wv3', 'wv4', 'qb',
                                     'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4',
                                     'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR',
                                     'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'],
                            help="training choices: ['wv2', 'wv3', 'wv4', 'qb'],"
                                 "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']"
                                 "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR")
        parser.add_argument('--eval', default=False, type=bool,
                            help="performing evalution for patch2entire")

        args = parser.parse_args()
        args.start_epoch = args.best_epoch = 1
        args.experimental_desc = "Test"
        # cfg.save_fmt = 'png'
        cfg.img_range = 2047.0

        cfg.merge_args2cfg(args)
        print(cfg.pretty_text)
        # cfg.workflow = [('train', 50), ('val', 1)]
        # cfg.workflow = [('val', 1)]  # only val workflow means perform test.
        cfg.workflow = [('train', 50)]
        self.merge_from_dict(cfg)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_dicnn.py
================================================
import argparse
# from UDL.Basis.option import panshaprening_cfg, Config, os
from UDL.AutoDL import TaskDispatcher
import os

class parser_args(TaskDispatcher, name='DiCNN1'):
    def __init__(self, cfg=None):
        super(parser_args, self).__init__()
        if cfg is None:
            from UDL.Basis.option import panshaprening_cfg
            cfg = panshaprening_cfg()

        script_path = os.path.dirname(os.path.dirname(__file__))
        root_dir = script_path.split(cfg.task)[0]

        # model_path = f'{root_dir}/results/{cfg.task}/gf2/DiCNN1/Test/.pth.tar'
        # model_path = f'{root_dir}/results/{cfg.task}/qb/DiCNN1/Test/m.pth.tar'
        model_path = f'{root_dir}/results/{cfg.task}/wv3/DiCNN1/Test/.pth.tar'

        parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training')
        # * Logger
        parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}',
                            help='path to save model')
        # * Training
        parser.add_argument('--lr', default=2e-4, type=float)  # 1e-4 2e-4 8
        parser.add_argument('--lr_scheduler', default=True, type=bool)
        parser.add_argument('--samples_per_gpu', default=64, type=int,  # 8
                            metavar='N', help='mini-batch size (default: 256)')
        parser.add_argument('--print-freq', '-p', default=1, type=int,
                            metavar='N', help='print frequency (default: 10)')
        parser.add_argument('--epochs', default=5000, type=int)
        parser.add_argument('--workers_per_gpu', default=0, type=int)
        parser.add_argument('--resume_from',
                            default=model_path,
                            type=str, metavar='PATH',
                            help='path to latest checkpoint (default: none)')
        # * Model and Dataset
        parser.add_argument('--arch', '-a', metavar='ARCH', default='DiCNN1', type=str,
                            choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet'])
        parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str,
                            choices=[None, 'wv2', 'wv3', 'wv4', 'qb',
                                     'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4',
                                     'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR',
                                     'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'],
                            help="training choices: ['wv2', 'wv3', 'wv4', 'qb'],"
                                 "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']"
                                 "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR")
        parser.add_argument('--eval', default=False, type=bool,
                            help="performing evalution for patch2entire")

        args = parser.parse_args()
        args.start_epoch = args.best_epoch = 1
        args.experimental_desc = "Test"
        # cfg.save_fmt = 'png'
        cfg.img_range = 2047.0#1023.0

        cfg.merge_args2cfg(args)
        print(cfg.pretty_text)
        cfg.workflow = [('train', 1)]
        self.merge_from_dict(cfg)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_drpnn.py
================================================
import argparse
# from UDL.Basis.option import panshaprening_cfg, Config, os
from UDL.AutoDL import TaskDispatcher
import os

class parser_args(TaskDispatcher, name='DRPNN'):
    def __init__(self, cfg=None):
        super(parser_args, self).__init__()
        if cfg is None:
            from UDL.Basis.option import panshaprening_cfg
            cfg = panshaprening_cfg()

        script_path = os.path.dirname(os.path.dirname(__file__))
        root_dir = script_path.split(cfg.task)[0]

        model_path = f'{root_dir}/results/{cfg.task}/wv3/DRPNN/Test/.pth.tar'

        parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training')
        # * Logger
        parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}',
                            help='path to save model')
        # * Training
        parser.add_argument('--lr', default=2e-4, type=float)  # 1e-4 2e-4 8
        parser.add_argument('--lr_scheduler', default=True, type=bool)
        parser.add_argument('--samples_per_gpu', default=32, type=int,  # 8
                            metavar='N', help='mini-batch size (default: 256)')
        parser.add_argument('--print-freq', '-p', default=50, type=int,
                            metavar='N', help='print frequency (default: 10)')
        parser.add_argument('--epochs', default=500, type=int)
        parser.add_argument('--workers_per_gpu', default=0, type=int)
        parser.add_argument('--resume_from',
                            default=model_path,
                            type=str, metavar='PATH',
                            help='path to latest checkpoint (default: none)')
        # * Model and Dataset
        parser.add_argument('--arch', '-a', metavar='ARCH', default='DRPNN', type=str,
                            choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet'])
        parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str,
                            choices=[None, 'wv2', 'wv3', 'wv4', 'qb',
                                     'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4',
                                     'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR',
                                     'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'],
                            help="training choices: ['wv2', 'wv3', 'wv4', 'qb'],"
                                 "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']"
                                 "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR")
        parser.add_argument('--eval', default=False, type=bool,
                            help="performing evalution for patch2entire")

        args = parser.parse_args()
        args.start_epoch = args.best_epoch = 1
        args.experimental_desc = "Test"
        # cfg.save_fmt = 'png'
        cfg.img_range = 2047.0
        cfg.seed = 1
        cfg.merge_args2cfg(args)
        print(cfg.pretty_text)
        # cfg.workflow = [('train', 50), ('val', 1)]
        cfg.workflow = [('val', 1)]
        self.merge_from_dict(cfg)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_fusionnet.py
================================================
import argparse
# from UDL.Basis.option import panshaprening_cfg, Config, os
from UDL.AutoDL import TaskDispatcher
import os

class parser_args(TaskDispatcher, name='FusionNet'):
    def __init__(self, cfg=None):
        super(parser_args, self).__init__()
        if cfg is None:
            from UDL.Basis.option import panshaprening_cfg
            cfg = panshaprening_cfg()

        script_path = os.path.dirname(os.path.dirname(__file__))
        root_dir = script_path.split(cfg.task)[0]

        model_path = f'./.pth.tar'


        parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training')
        # * Logger
        parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}',
                            help='path to save model')
        parser.add_argument('--mode', default=argparse.SUPPRESS, help='protective declare, please ignore it')

        parser.add_argument('--lr', default=3e-4, type=float)
        # parser.add_argument('--lr_scheduler', default=True, type=bool)
        parser.add_argument('--samples_per_gpu', default=32, type=int,
                            metavar='N', help='mini-batch size (default: 256)')
        parser.add_argument('--print-freq', '-p', default=50, type=int,
                            metavar='N', help='print frequency (default: 10)')
        parser.add_argument('--seed', default=1, type=int,
                            help='seed for initializing training. ')
        parser.add_argument('--epochs', default=400, type=int)
        parser.add_argument('--workers_per_gpu', default=0, type=int)
        parser.add_argument('--resume_from',
                            default=model_path,
                            type=str, metavar='PATH',
                            help='path to latest checkpoint (default: none)')
        ##
        parser.add_argument('--arch', '-a', metavar='ARCH', default='FusionNet', type=str,
                            choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet'])
        parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str,
                            choices=[None, 'wv2', 'wv3', 'wv4', 'qb',
                                     'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4',
                                     'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR',
                                     'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'],
                            help="training choices: ['wv2', 'wv3', 'wv4', 'qb'],"
                                 "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']"
                                 "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR")
        parser.add_argument('--eval', default=False, type=bool,
                            help="performing evalution for patch2entire")


        args = parser.parse_args()
        args.start_epoch = args.best_epoch = 1
        args.experimental_desc = 'Test'
        cfg.merge_args2cfg(args)
        cfg.save_fmt = "mat"
        # cfg.workflow = [('train', 10), ('val', 1)]
        cfg.workflow = [('val', 1), ('train', 1)]
        # cfg.config = f"{script_path}/configs/hook_configs.py"
        cfg.use_tfb = False
        cfg.img_range = 2047.0#1023.0

        self.merge_from_dict(cfg)

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_msdcnn.py
================================================
import argparse
# from UDL.Basis.option import panshaprening_cfg, Config, os
from UDL.AutoDL import TaskDispatcher
import os

class parser_args(TaskDispatcher, name='MSDCNN'):
    def __init__(self, cfg=None):
        super(parser_args, self).__init__()
        if cfg is None:
            from UDL.Basis.option import panshaprening_cfg
            cfg = panshaprening_cfg()

        script_path = os.path.dirname(os.path.dirname(__file__))
        root_dir = script_path.split(cfg.task)[0]

        model_path = f'{root_dir}/results/{cfg.task}/wv3/MSDCNN/Test/.pth.tar'

        parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training')
        # * Logger
        parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}',
                            help='path to save model')
        # * Training
        parser.add_argument('--lr', default=0.000001, type=float)  # 1e-4 2e-4 8
        parser.add_argument('--lr_scheduler', default=True, type=bool)
        parser.add_argument('--samples_per_gpu', default=64, type=int,  # 8
                            metavar='N', help='mini-batch size (default: 256)')
        parser.add_argument('--print-freq', '-p', default=50, type=int,
                            metavar='N', help='print frequency (default: 10)')
        parser.add_argument('--epochs', default=500, type=int)
        parser.add_argument('--workers_per_gpu', default=0, type=int)
        parser.add_argument('--resume_from',
                            default=model_path,
                            type=str, metavar='PATH',
                            help='path to latest checkpoint (default: none)')
        # * Model and Dataset
        parser.add_argument('--arch', '-a', metavar='ARCH', default='MSDCNN', type=str,
                            choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet'])
        parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str,
                            choices=[None, 'wv2', 'wv3', 'wv4', 'qb',
                                     'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4',
                                     'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR',
                                     'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'],
                            help="training choices: ['wv2', 'wv3', 'wv4', 'qb'],"
                                 "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']"
                                 "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR")
        parser.add_argument('--eval', default=False, type=bool,
                            help="performing evalution for patch2entire")

        args = parser.parse_args()
        args.start_epoch = args.best_epoch = 1
        args.experimental_desc = "Test"
        # cfg.save_fmt = 'png'
        cfg.img_range = 2047.0

        cfg.merge_args2cfg(args)
        print(cfg.pretty_text)
        # cfg.workflow = [('train', 50), ('val', 1)]
        # cfg.workflow = [('val', 1)]
        cfg.workflow = [('train', 50)]
        self.merge_from_dict(cfg)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_pannet.py
================================================
import argparse
# from UDL.Basis.option import panshaprening_cfg, Config, os
from UDL.AutoDL import TaskDispatcher
import os

class parser_args(TaskDispatcher, name='PanNet'):
    def __init__(self, cfg=None):
        super(parser_args, self).__init__()
        if cfg is None:
            from UDL.Basis.option import panshaprening_cfg
            cfg = panshaprening_cfg()

        script_path = os.path.dirname(os.path.dirname(__file__))
        root_dir = script_path.split(cfg.task)[0]

        # model_path = f'{root_dir}/results/{cfg.task}/qb_hp/PanNet/Test/.pth.tar'
        model_path = f'{root_dir}/results/{cfg.task}/wv3_hp/PanNet/Test/.pth.tar'
        # model_path = f''
        parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training')
        # * Logger
        parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}',
                            help='path to save model')
        # * Training
        parser.add_argument('--lr', default=1e-3, type=float)  # 1e-4 2e-4 8
        parser.add_argument('--lr_scheduler', default=True, type=bool)
        parser.add_argument('--samples_per_gpu', default=32, type=int,  # 8
                            metavar='N', help='mini-batch size (default: 256)')
        parser.add_argument('--print-freq', '-p', default=50, type=int,
                            metavar='N', help='print frequency (default: 10)')
        parser.add_argument('--seed', default=1, type=int,
                            help='seed for initializing training. ')
        parser.add_argument('--epochs', default=450, type=int)
        parser.add_argument('--workers_per_gpu', default=0, type=int)
        parser.add_argument('--resume_from',
                            default=model_path,
                            type=str, metavar='PATH',
                            help='path to latest checkpoint (default: none)')

        # * Model and Dataset
        parser.add_argument('--arch', '-a', metavar='ARCH', default='PanNet', type=str,
                            choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet'])
        parser.add_argument('--dataset', default={'train': 'wv3_hp', 'val': 'NY1_WV3_RR_hp'}, type=str,
                            choices=[None, 'wv2_hp', 'wv3_hp', 'wv4_hp', 'qb_hp',
                                     'TestData_qb_hp', 'TestData_wv2_hp', 'TestData_wv3_hp', 'TestData_wv4_hp',
                                     'San_Francisco_QB_RR_hp', 'San_Francisco_QB_FR_hp', 'NY1_WV3_FR_hp',
                                     'NY1_WV3_RR_hp', 'Alice_WV4_FR', 'Alice_WV4_RR_hp', 'Rio_WV2_FR_hp', 'Rio_WV2_RR_hp'],
                            help="training choices: ['wv2', 'wv3', 'wv4', 'qb'],"
                                 "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']"
                                 "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR")
        parser.add_argument('--eval', default=False, type=bool,
                            help="performing evalution for patch2entire")

        args = parser.parse_args()
        args.start_epoch = args.best_epoch = 1
        args.experimental_desc = "Test"


        cfg.merge_args2cfg(args)
        cfg.img_range = 2047.0
        cfg.reg = True
        cfg.workflow = [('train', 1)]
        print(cfg.pretty_text)
        cfg.workflow = [('train', 1)]
        self.merge_from_dict(cfg)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/configs/option_pnn.py
================================================
import argparse
# from UDL.Basis.option import panshaprening_cfg, Config, os
from UDL.AutoDL import TaskDispatcher
import os

class parser_args(TaskDispatcher, name='PNN'):
    def __init__(self, cfg=None):
        super(parser_args, self).__init__()
        if cfg is None:
            from UDL.Basis.option import panshaprening_cfg
            cfg = panshaprening_cfg()

        script_path = os.path.dirname(os.path.dirname(__file__))
        root_dir = script_path.split(cfg.task)[0].replace('\\', '/')

        model_path = f'.pth.tar'

        parser = argparse.ArgumentParser(description='PyTorch Pansharpening Training')
        # * Logger
        parser.add_argument('--out_dir', metavar='DIR', default=f'{root_dir}/results/{cfg.task}',
                            help='path to save model')
        # * Training
        parser.add_argument('--lr', default=1e-3, type=float)  # 1e-4 2e-4 8
        parser.add_argument('--lr_scheduler', default=True, type=bool)
        parser.add_argument('--samples_per_gpu', default=64, type=int,  # 8
                            metavar='N', help='mini-batch size (default: 256)')
        parser.add_argument('--print-freq', '-p', default=500, type=int,
                            metavar='N', help='print frequency (default: 10)')
        parser.add_argument('--seed', default=1, type=int,
                            help='seed for initializing training. ')
        parser.add_argument('--epochs', default=12000, type=int)
        parser.add_argument('--workers_per_gpu', default=0, type=int)
        parser.add_argument('--resume_from',
                            default=model_path,
                            type=str, metavar='PATH',
                            help='path to latest checkpoint (default: none)')
        # * Model and Dataset
        parser.add_argument('--arch', '-a', metavar='ARCH', default='PNN', type=str,
                            choices=['PanNet', 'DiCNN', 'PNN', 'FusionNet'])
        parser.add_argument('--dataset', default={'train': 'wv3', 'val': 'NY1_WV3_RR'}, type=str,
                            choices=[None, 'wv2', 'wv3', 'wv4', 'qb',
                                     'TestData_qb', 'TestData_wv2', 'TestData_wv3', 'TestData_wv4',
                                     'San_Francisco_QB_RR', 'San_Francisco_QB_FR', 'NY1_WV3_FR',
                                     'NY1_WV3_RR', 'Alice_WV4_FR', 'Alice_WV4_RR', 'Rio_WV2_FR', 'Rio_WV2_RR'],
                            help="training choices: ['wv2', 'wv3', 'wv4', 'qb'],"
                                 "validation choices: ['valid_wv2_10000','valid_wv3_10000', 'valid_wv4_10000', 'valid_qb_10000']"
                                 "test choices is ['TestData_wv2', 'TestData_wv3', 'TestData_wv4', 'TestData_qb'], and others with RR/FR")
        parser.add_argument('--eval', default=False, type=bool,
                            help="performing evalution for patch2entire")
        args = parser.parse_args()
        args.start_epoch = args.best_epoch = 1
        args.experimental_desc = 'Test'
        cfg.merge_args2cfg(args)
        cfg.workflow = [('train', 1)]
        cfg.img_range = 2047.0
        print(cfg.pretty_text)
        self.merge_from_dict(cfg)

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/evaluation/ps_evaluate.py
================================================
import os
import datetime
import imageio
import numpy as np
import cv2
import h5py
import torch
import torch.nn.functional as F
from scipy import io as sio
from torch.utils.data import DataLoader, Dataset
from UDL.Basis.auxiliary import MetricLogger, SmoothedValue, set_random_seed
from UDL.Basis.dist_utils import init_dist, dist_train_v1, get_dist_info, reduce_mean
from UDL.pansharpening.common.evaluate import analysis_accu
from UDL.Basis.postprocess import showimage8
import matplotlib.pyplot as plt
# from UDL.Basis.zoom_image_region import show_region_images
from logging import info as log_string

# dmd
def load_gt_compared(file_path_gt, file_path_compared):
    data1 = sio.loadmat(file_path_gt)  # HxWxC
    data2 = sio.loadmat(file_path_compared)
    try:
        gt = torch.from_numpy(data1['gt'] / 2047.0)
    except KeyError:
        print(data1.keys())
    compared_data = torch.from_numpy(data2['output_dmdnet_newdata6'] * 2047.0)
    return gt, compared_data


def get_edge(data):  # get high-frequency
    rs = np.zeros_like(data)
    if rs.ndim == 4:
        for b in range(data.shape[0]):
            for i in range(data.shape[1]):
                rs[b, i, :, :] = data[b, i, :, :] - cv2.boxFilter(data[b, i, :, :], -1, (5, 5))
    elif len(rs.shape) == 3:
        for i in range(data.shape[2]):
            rs[:, :, i] = data[:, :, i] - cv2.boxFilter(data[:, :, i], -1, (5, 5))
    else:
        rs = data - cv2.boxFilter(data, -1, (5, 5))

    return rs


def load_dataset_singlemat_hp(file_path, scale):
    data = sio.loadmat(file_path)  # HxWxC

    # tensor type:
    lms = torch.from_numpy(data['I_MS'] / scale).permute(2, 0, 1)  # CxHxW = 8x256x256
    ms_hp = torch.from_numpy(get_edge(data['I_MS_LR'] / scale)).permute(2, 0, 1).unsqueeze(dim=0)  # CxHxW= 8x64x64
    pan_hp = torch.from_numpy(get_edge(data['I_PAN'] / scale))   # HxW = 256x256
    gt = torch.from_numpy(data['I_GT'] / scale)

    return lms.squeeze().float(), ms_hp.squeeze().float(), pan_hp.float(), gt.float()


def load_dataset_singlemat(file_path, scale):
    data = sio.loadmat(file_path)  # HxWxC
    print("load_dataset_singlemat: ", data.keys())
    # tensor type:
    lms = torch.from_numpy(data['I_MS'] / scale).permute(2, 0, 1)  # CxHxW = 8x256x256
    ms = torch.from_numpy(data['I_MS_LR'] / scale).permute(2, 0, 1).unsqueeze(dim=0)  # CxHxW= 8x64x64

    pan = torch.from_numpy(data['I_PAN'] / scale)  # HxW = 256x256
    if data.get('I_GT', None) is None:
        gt = torch.from_numpy(data['I_MS'] / scale)
    else:
        gt = torch.from_numpy(data['I_GT'] / scale)

    return lms.squeeze().float(), ms.squeeze().float(), pan.float(), gt.float()


def load_dataset_H5_hp(file_path, scale, use_cuda=True):
    data = h5py.File(file_path)  # NxHxWxC
    shape_list = []
    # for k in data.keys():
    #     shape_list.append((k, data[k].shape))
    # print(shape_list)

    # tensor type: NxCxHxW:

    lms = torch.from_numpy(data['lms'][...] / scale).float()#.permute(0, 3, 1, 2)
    ms_hp = torch.from_numpy(get_edge(data['ms'][...] / scale)).float()#.permute(0, 3, 1, 2)  # NxCxHxW:
    mms_hp = torch.nn.functional.interpolate(ms_hp, size=(ms_hp.size(2) * 2, ms_hp.size(3) * 2),
                                          mode="bilinear", align_corners=True)
    pan = np.squeeze(data['pan'][...])
    pan = pan[:, np.newaxis, :, :]  # NxCxHxW (C=1)
    pan_hp = torch.from_numpy(get_edge(pan / scale)).float()#.permute(0, 3, 1, 2)  # Nx1xHxW:
    if data.get('gt', None) is None:
        gt = torch.from_numpy(data['lms'][...]).float()
    else:
        gt = torch.from_numpy(data['gt'][...]).float()

    return {'lms': lms,
            'mms:': mms_hp,
            'ms': ms_hp,
            'pan': pan_hp,
            'gt': gt.permute([0, 2, 3, 1])
            }

def load_dataset_H5(file_path, scale, use_cuda=True):
    data = h5py.File(file_path)  # CxHxW
    print(data.keys())
    # tensor type:
    if use_cuda:
        lms = torch.from_numpy(data['lms'][...] / scale).cuda().float()  # CxHxW = 8x64x64

        ms = torch.from_numpy(data['ms'][...] / scale).cuda().float()  # CxHxW= 8x64x64
        pan = torch.from_numpy(data['pan'][...] / scale).cuda().float()  # HxW = 256x256

        gt = torch.from_numpy(data['gt'][...]).cuda().float()

    else:
        lms = torch.from_numpy(data['lms'][...] / scale).float()  # CxHxW = 8x64x64

        ms = torch.from_numpy(data['ms'][...] / scale).float()  # CxHxW= 8x64x64
        pan = torch.from_numpy(data['pan'][...] / scale).float()  # HxW = 256x256
        if data.get('gt', None) is None:
            gt = torch.from_numpy(data['lms'][...]).float()
        else:
            gt = torch.from_numpy(data['gt'][...]).float()

    return {'lms': lms,
            'ms': ms,
            'pan': pan,
            'gt': gt.permute([0, 2, 3, 1])
            }


class MultiExmTest_h5(Dataset):

    def __init__(self, file_path, dataset_name, img_scale, suffix='.h5'):
        super(MultiExmTest_h5, self).__init__()

        # self.scale = 2047.0
        # if 'gf' in dataset_name:
        #     self.scale = 1023.0
        self.img_scale = img_scale
        print(f"loading MultiExmTest_h5: {file_path} with {img_scale}")
        # 一次性载入到内存
        if 'hp' not in dataset_name:
            data = load_dataset_H5(file_path, img_scale, False)

        elif 'hp' in dataset_name:
            file_path = file_path.replace('_hp', '')
            data = load_dataset_H5_hp(file_path, img_scale, False)

        else:
            print(f"{dataset_name} is not supported in evaluation")
            raise NotImplementedError
        if suffix == '.mat':
            self.lms = data['lms'].permute(0, 3, 1, 2)  # CxHxW = 8x256x256
            self.ms = data['ms'].permute(0, 3, 1, 2)  # CxHxW= 8x64x64
            self.pan = data['pan'].unsqueeze(1)
            self.gt = data['gt'].permute(0, 3, 1, 2)
        else:
            self.lms = data['lms']
            self.ms = data['ms']
            self.pan = data['pan']
            self.gt = data['gt']

        print(f"lms: {self.lms.shape}, ms: {self.ms.shape}, pan: {self.pan.shape}, gt: {self.gt.shape}")

    def __getitem__(self, item):
        return {'lms': self.lms[item, ...],
                'ms': self.ms[item, ...],
                'pan': self.pan[item, ...],
                'gt': self.gt[item, ...]
                }

    def __len__(self):
        return self.gt.shape[0]


class SingleDataset(Dataset):


    def __init__(self, file_lists, dataset_name, img_scale, dataset=None):

        self.img_scale = img_scale
        self.file_lists = file_lists
        print(f"loading SingleDataset: {file_lists} with {img_scale}")
        self.file_nums = len(file_lists)
        self.dataset = {}
        self.dataset_name = dataset_name

        if 'hp' not in dataset_name:
            self.dataset = load_dataset_singlemat
        elif 'hp' in dataset_name:
            self.dataset = load_dataset_singlemat_hp
        else:
            print(f"{dataset_name} is not supported in evaluation")
            raise NotImplementedError

    def __getitem__(self, idx):
        file_path = self.file_lists[idx % self.file_nums]
        test_lms, test_ms, test_pan, gt = self.dataset(file_path, self.img_scale)

        if 'hp' not in self.dataset_name:
            return {'gt': gt,
                    'lms': test_lms,
                    'ms': test_ms,
                    'pan': test_pan.unsqueeze(dim=0),
                    'filename': file_path}
        else:
            return {'gt': gt,
                    'lms': test_lms,
                    'ms': test_ms,
                    'pan': test_pan.unsqueeze(dim=0),
                    'filename': file_path}

    def __len__(self):
        return self.file_nums


def save_results(idx, save_model_output, filename, save_fmt, output):
    if filename is None:
        save_name = os.path.join(f"{save_model_output}",
                                 "output_mulExm_{}.mat".format(idx))
        sio.savemat(save_name, {'sr': output.cpu().detach().numpy()})
    else:
        filename = os.path.basename(filename).split('.')[0]
        if save_fmt != 'mat':
            output = showimage8(output)
            filename = '/'.join([save_model_output, filename + ".png"])
            # plt.imsave(filename, output, dpi=300)
            # show_region_images(output, xywh=[50, 100, 50, 50], #sub_width="20%", sub_height="20%",
            #                    sub_ax_anchor=(0, 0, 1, 1))
            # mpl_save_fig(filename)
        else:
            filename = '/'.join([save_model_output, "output_" + filename + ".mat"])
            sio.savemat(filename, {'sr': output.cpu().detach().numpy()})


def mpl_save_fig(filename):
    plt.savefig(f"{filename}", format='svg', dpi=300, pad_inches=0, bbox_inches='tight')


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/data_qb.py
================================================
import torch.utils.data as data
import torch
import h5py
import numpy as np


class Dataset_Ft(data.Dataset):
    def __init__(self, file_path):
        super(Dataset_Ft, self).__init__()


class Dataset_Pro(data.Dataset):
    def __init__(self, file_path):
        super(Dataset_Pro, self).__init__()
        data = h5py.File(file_path)  # NxCxHxW = 0x1x2x3

        # tensor type:
        gt1 = data["gt"][...]
        gt1 = np.array(gt1, dtype=np.float32) / 2047
        self.gt = torch.from_numpy(gt1)  # NxCxHxW:

        print(self.gt.size())

        lms1 = data["lms"][...]
        lms1 = np.array(lms1, dtype=np.float32) / 2047
        self.lms = torch.from_numpy(lms1)

        ms1 = data["ms"][...]  # NxCxHxW
        ms1 = np.array(ms1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047  # NxHxWxC
        self.ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW:

        pan1 = data['pan'][...]  # Nx1xHxW
        pan1 = np.array(pan1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047  # NxHxWx1
        pan1 = np.squeeze(pan1, axis=3)  # NxHxW
        pan_tmp = np.expand_dims(pan1, axis=3)   # NxHxWx1
        self.pan = torch.from_numpy(pan_tmp).permute(0, 3, 1, 2) # Nx1xHxW:

    #####必要函数
    def __getitem__(self, index):
        return self.gt[index, :, :, :].float(), \
               self.lms[index, :, :, :].float(), \
               self.ms[index, :, :, :].float(), \
               self.pan[index, :, :, :].float()

            #####必要函数
    def __len__(self):
        return self.gt.shape[0]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/data_single_read.py
================================================
import torch.nn.modules as nn
import torch
import cv2
import numpy as np
import h5py
import scipy.io as sio
import os


def load_set(file_path, blk):
    data = sio.loadmat(file_path)  # HxWxC

    # tensor type:
    lms = np.array(data['lms'] / 2047.0, dtype=np.float32)
    pan_hp = np.expand_dims(np.array(data['pan'] / 2047.0,dtype=np.float32), axis=-1)
    lms = np.concatenate([lms, pan_hp], axis=-1)
    lms = np.pad(lms, ((blk, blk), (blk, blk), (0, 0)), mode='edge')
    lms = torch.from_numpy(lms).cuda().permute(2, 0, 1)  # CxHxW = 8x256x256
    pan_hp = torch.from_numpy(pan_hp).cuda().permute(2, 0, 1)   # HxW = 256x256
    ms_hp = torch.from_numpy(data['ms'] / 2047.0).cuda().permute(2, 0, 1)  # CxHxW= 8x64x64
    gt = torch.from_numpy(data['gt'] / 2047.0).cuda()

    return lms, ms_hp, pan_hp, gt


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/data_wv2.py
================================================
import torch.utils.data as data
import torch
import h5py
import numpy as np


class Dataset_Ft(data.Dataset):
    def __init__(self, file_path):
        super(Dataset_Ft, self).__init__()


class Dataset_Pro(data.Dataset):
    def __init__(self, file_path):
        super(Dataset_Pro, self).__init__()
        data = h5py.File(file_path)  # NxCxHxW = 0x1x2x3

        # tensor type:
        gt1 = data["gt"][...]
        gt1 = np.array(gt1, dtype=np.float32) / 2047
        self.gt = torch.from_numpy(gt1)  # NxCxHxW:

        print(self.gt.size())

        lms1 = data["lms"][...]
        lms1 = np.array(lms1, dtype=np.float32) / 2047
        self.lms = torch.from_numpy(lms1)

        ms1 = data["ms"][...]  # NxCxHxW
        ms1 = np.array(ms1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047  # NxHxWxC
        self.ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW:

        pan1 = data['pan'][...]  # Nx1xHxW
        pan1 = np.array(pan1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047  # NxHxWx1
        pan1 = np.squeeze(pan1, axis=3)  # NxHxW
        pan_tmp = np.expand_dims(pan1, axis=3)   # NxHxWx1
        self.pan = torch.from_numpy(pan_tmp).permute(0, 3, 1, 2) # Nx1xHxW:

    #####必要函数
    def __getitem__(self, index):
        return self.gt[index, :, :, :].float(), \
               self.lms[index, :, :, :].float(), \
               self.ms[index, :, :, :].float(), \
               self.pan[index, :, :, :].float()

            #####必要函数
    def __len__(self):
        return self.gt.shape[0]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/data_wv3.py
================================================
import torch.utils.data as data
import torch
import h5py
import numpy as np


class Dataset_Ft(data.Dataset):
    def __init__(self, file_path):
        super(Dataset_Ft, self).__init__()


class Dataset_Pro(data.Dataset):
    def __init__(self, file_path):
        super(Dataset_Pro, self).__init__()
        data = h5py.File(file_path)  # NxCxHxW = 0x1x2x3
        # tensor type:
        gt1 = data["gt"][...]
        gt1 = np.array(gt1, dtype=np.float32) / 2047
        self.gt = torch.from_numpy(gt1)  # NxCxHxW:

        print(self.gt.size())

        lms1 = data["lms"][...]
        lms1 = np.array(lms1, dtype=np.float32) / 2047
        self.lms = torch.from_numpy(lms1)

        ms1 = data["ms"][...]  # NxCxHxW
        ms1 = np.array(ms1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047  # NxHxWxC
        self.ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW:

        pan1 = data['pan'][...]  # Nx1xHxW
        pan1 = np.array(pan1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047  # NxHxWx1
        pan1 = np.squeeze(pan1, axis=3)  # NxHxW
        pan_tmp = np.expand_dims(pan1, axis=3)   # NxHxWx1
        self.pan = torch.from_numpy(pan_tmp).permute(0, 3, 1, 2) # Nx1xHxW:

    #####必要函数
    def __getitem__(self, index):
        return self.gt[index, :, :, :].float(), \
               self.lms[index, :, :, :].float(), \
               self.ms[index, :, :, :].float(), \
               self.pan[index, :, :, :].float()

            #####必要函数
    def __len__(self):
        return self.gt.shape[0]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/data_wv4.py
================================================
import torch.utils.data as data
import torch
import h5py
import numpy as np


class Dataset_Ft(data.Dataset):
    def __init__(self, file_path):
        super(Dataset_Ft, self).__init__()


class Dataset_Pro(data.Dataset):
    def __init__(self, file_path):
        super(Dataset_Pro, self).__init__()
        data = h5py.File(file_path)  # NxCxHxW = 0x1x2x3

        # tensor type:
        gt1 = data["gt"][...]
        gt1 = np.array(gt1, dtype=np.float32) / 2047
        self.gt = torch.from_numpy(gt1)  # NxCxHxW:

        print(self.gt.size())

        lms1 = data["lms"][...]
        lms1 = np.array(lms1, dtype=np.float32) / 2047
        self.lms = torch.from_numpy(lms1)

        ms1 = data["ms"][...]  # NxCxHxW
        ms1 = np.array(ms1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047  # NxHxWxC
        self.ms = torch.from_numpy(ms1).permute(0, 3, 1, 2) # NxCxHxW:

        pan1 = data['pan'][...]  # Nx1xHxW
        pan1 = np.array(pan1.transpose(0, 2, 3, 1), dtype=np.float32) / 2047  # NxHxWx1
        pan1 = np.squeeze(pan1, axis=3)  # NxHxW
        pan_tmp = np.expand_dims(pan1, axis=3)   # NxHxWx1
        self.pan = torch.from_numpy(pan_tmp).permute(0, 3, 1, 2) # Nx1xHxW:

    #####必要函数
    def __getitem__(self, index):
        return self.gt[index, :, :, :].float(), \
               self.lms[index, :, :, :].float(), \
               self.ms[index, :, :, :].float(), \
               self.pan[index, :, :, :].float()

            #####必要函数
    def __len__(self):
        return self.gt.shape[0]


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/evaluate.py
================================================
import math
import torch
import torch.nn.functional as F
import numpy as np


# 由于dat及其方差等数值舍入存在误差，最终结果有0.001左右的误差
def q2n(gt, x, q_blocks_size, q_shift):
    '''
    '''
    if isinstance(gt , torch.Tensor):
        gt = gt.cpu().numpy()
        x = x.cpu().numpy()

    N, N1, N2, N3 = gt.shape  # 255 255 8
    size2 = q_blocks_size  # 32

    stepx = math.ceil(N1 / q_shift)  # 8
    stepy = math.ceil(N2 / q_shift)  # 8

    if stepy <= 0:
        stepy = 1
        stepx = 1

    est1 = (stepx - 1) * q_shift + q_blocks_size - N1  # 1
    est2 = (stepy - 1) * q_shift + q_blocks_size - N2  # 1
    # if np.sum(np.array([est1 != 0, est2 != 0])) > 0:
    # refref = np.zeros(shape=[N1+1, N2+1])
    # fusfus = refref.copy()

    for i in range(N3):
        a1 = gt[..., 0]

        ia1 = np.zeros(shape=[N, N1 + est1, N2 + est2])
        ia1[:, : N1, : N2] = a1
        ia1[:, :, N2:N2 + est2] = ia1[:, :, N2 - 1:-1:N2 - est2 + 1]
        ia1[:, N1:N1 + est1, ...] = ia1[:, N1 - 1:-1:N1 - est1 + 1, ...]
        if i == 0:
            refref = ia1[..., np.newaxis]  # np.concatenate(refref, ia1, axis=3)
        else:
            refref = np.concatenate([refref, ia1[..., np.newaxis]], axis=-1)
        if i < N3:
            gt = gt[..., 1:]

    gt = refref

    for i in range(N3):

        a2 = x[..., 0]
        ia2 = np.zeros(shape=[N, N1 + est1, N2 + est2])
        ia2[:, : N1, : N2] = a2
        ia2[:, :, N2:N2 + est2] = ia2[:, :, N2 - 1:-1:N2 - est2 + 1]
        ia2[:, N1:N1 + est1, ...] = ia2[:, N1 - 1:-1:N1 - est1 + 1, ...]
        if i == 0:
            fusfus = ia2[..., np.newaxis]  # np.concatenate(refref, ia1, axis=3)
        else:
            fusfus = np.concatenate([fusfus, ia2[..., np.newaxis]], axis=-1)

        if i < N3:
            x = x[..., 1:]
    x = fusfus

    x = np.array(x, dtype=np.uint16)
    gt = np.array(gt, dtype=np.uint16)

    _, N1, N2, N3 = gt.shape

    if math.ceil(math.log2(N3)) - math.log2(N3) != 0:
        Ndif = pow(2, math.ceil(math.log2(N3))) - N3
        dif = np.zeros(shape=[N, N1, N2, Ndif], dtype=np.uint16)
        gt = np.concatenate(gt, dif, axis=-1)
        x = np.concatenate(x, dif, axis=-1)

    _, _, _, N3 = gt.shape

    valori = np.zeros(shape=[N, stepx, stepy, N3])

    for j in range(stepx):
        for i in range(stepy):
            o = onions_quality(gt[:, j * q_shift:j * q_shift + q_blocks_size,
                               i * q_shift: i * q_shift + size2, :],
                               x[:, j * q_shift:j * q_shift + q_blocks_size,
                               i * q_shift: i * q_shift + size2, :],
                               q_blocks_size)
            # 0.971379489438014	0.00553590637316723	0.00305237797490489	-0.0188289323262161	-0.00420556598390016	-0.0173947468044076	-0.0202144450367593	0.0102693855205061
            valori[:, j, i, :] = o
    q2n_idx_map = np.sqrt(np.sum(valori ** 2, axis=-1))
    # q2n_index = np.mean(q2n_idx_map)
    return q2n_idx_map


def norm_blocco(x, eps=1e-8):
    a = x.mean()
    c = x.std()
    if c == 0:
        c = eps
    return (x - a) / c + 1, a, c


def onions_quality(dat1, dat2, size1):
    dat1 = np.float64(dat1)
    dat2 = np.float64(dat2)

    dat2 = np.concatenate([dat2[..., 0, np.newaxis], -dat2[..., 1:]], axis=-1)
    N, _, _, N3 = dat1.shape
    size2 = size1

    # Block norm
    for i in range(N3):
        a1, s, t = norm_blocco(np.squeeze(dat1[..., i]))
        # print(s,t)
        dat1[..., i] = a1
        if s == 0:
            if i == 0:
                dat2[..., i] = dat2[..., i] - s + 1
            else:
                dat2[..., i] = -(-dat2[..., i] - s + 1)
        else:
            if i == 0:
                dat2[..., i] = ((dat2[..., i] - s) / t) + 1
            else:
                dat2[..., i] = -(((-dat2[..., i] - s) / t) + 1)
    m1 = np.zeros(shape=[N, N3])
    m2 = m1.copy()

    mod_q1m = 0
    mod_q2m = 0
    mod_q1 = np.zeros(shape=[size1, size2])
    mod_q2 = np.zeros(shape=[size1, size2])

    for i in range(N3):
        m1[..., i] = np.mean(np.squeeze(dat1[..., i]))
        m2[..., i] = np.mean(np.squeeze(dat2[..., i]))
        mod_q1m += m1[..., i] ** 2
        mod_q2m += m2[..., i] ** 2
        mod_q1 += np.squeeze(dat1[..., i]) ** 2
        mod_q2 += np.squeeze(dat2[..., i]) ** 2

    mod_q1m = np.sqrt(mod_q1m)
    mod_q2m = np.sqrt(mod_q2m)
    mod_q1 = np.sqrt(mod_q1)
    mod_q2 = np.sqrt(mod_q2)

    termine2 = mod_q1m * mod_q2m  # 7.97
    termine4 = mod_q1m ** 2 + mod_q2m ** 2  #
    int1 = (size1 * size2) / (size1 * size2 - 1) * np.mean(mod_q1 ** 2)
    int2 = (size1 * size2) / (size1 * size2 - 1) * np.mean(mod_q2 ** 2)
    termine3 = int1 + int2 - (size1 * size2) / ((size1 * size2 - 1)) * (mod_q1m ** 2 + mod_q2m ** 2)  # 17.8988  ** 2
    mean_bias = 2 * termine2 / termine4  # 1
    if termine3 == 0:
        q = np.zeros(shape=[N, 1, N3])
        q[:, :, N3 - 1] = mean_bias
    else:
        cbm = 2 / termine3
        # 32 32 8
        qu = onion_mult2D(dat1, dat2)
        qm = onion_mult(m1.reshape(-1), m2.reshape(-1))
        qv = np.zeros(shape=[N, N3])
        for i in range(N3):
            qv[..., i] = (size1 * size2) / ((size1 * size2) - 1) * np.mean(np.squeeze(qu[:, :, i]))
        q = qv - (size1 * size2) / ((size1 * size2) - 1) * qm
        q = q * mean_bias * cbm
    return q


def onion_mult2D(onion1, onion2):
    _, _, _, N3 = onion1.shape

    if N3 > 1:
        L = N3 // 2
        a = onion1[..., : L]
        b = onion1[..., L:]
        b = np.concatenate([b[..., 0, np.newaxis], -b[..., 1:]], axis=-1)
        c = onion2[..., : L]
        d = onion2[..., L:]
        d = np.concatenate([d[..., 0, np.newaxis], -d[..., 1:]], axis=-1)

        if N3 == 2:
            ris = np.concatenate([a * c - d * b, a * d + c * b], axis=-1)
        else:
            ris1 = onion_mult2D(a, c)
            ris2 = onion_mult2D(d, np.concatenate([b[..., 0, np.newaxis], -b[..., 1:]], axis=-1))
            ris3 = onion_mult2D(np.concatenate([a[..., 0, np.newaxis], -a[..., 1:]], axis=-1), d)
            ris4 = onion_mult2D(c, b)

            aux1 = ris1 - ris2
            aux2 = ris3 + ris4

            ris = np.concatenate([aux1, aux2], axis=-1)
    else:
        ris = onion1 * onion2
    return ris


def onion_mult(onion1, onion2):
    # _, N = onion1.shape
    N = len(onion1)
    if N > 1:

        L = N // 2
        a = onion1[:L]
        b = onion1[L:]
        # b[1:] = -b[1:]
        b = np.append(np.array(b[0]), -b[1:])
        c = onion2[:L]
        d = onion2[L:]
        # d[1:] = -d[1:]
        d = np.append(np.array(d[0]), -d[1:])

        if N == 2:
            ris = np.append(a * c - d * b, a * d + c * b)
        else:

            ris1 = onion_mult(a, c)
            # b[1:] = -b[1:]
            ris2 = onion_mult(d, np.append(np.array(b[0]), -b[1:]))
            # a[1:] = -a[1:]
            ris3 = onion_mult(np.append(np.array(a[0]), -a[1:]), d)
            ris4 = onion_mult(c, b)

            aux1 = ris1 - ris2
            aux2 = ris3 + ris4
            ris = np.append(aux1, aux2)
    else:
        ris = np.array(onion1).reshape(-1) * np.array(onion2).reshape(-1)
    return ris


def compute_index(img_base, img_out, ratio):
    h = img_out.shape[0]
    w = img_out.shape[1]
    chanel = img_out.shape[2]
    # 计算SAM
    sum1 = torch.sum(img_base * img_out, 2)
    sum2 = torch.sum(img_base * img_base, 2)
    sum3 = torch.sum(img_out * img_out, 2)
    t = (sum2 * sum3) ** 0.5
    numlocal = torch.gt(t, 0)
    num = torch.sum(numlocal)
    t = sum1 / t
    angle = torch.acos(t)
    sumangle = torch.where(torch.isnan(angle), torch.full_like(angle, 0), angle).sum()
    if num == 0:
        averangle = sumangle
    else:
        averangle = sumangle / num
    SAM = averangle * 180 / 3.14159256

    # 计算ERGAS
    summ = 0
    for i in range(chanel):
        a1 = torch.mean((img_base[:, :, i] - img_out[:, :, i]) ** 2)
        m1 = torch.mean(img_base[:, :, i])
        a2 = m1 * m1
        summ = summ + a1 / a2
    ERGAS = 100 * (1 / ratio) * ((summ / chanel) ** 0.5)

    return SAM, ERGAS


import decimal

decimal.getcontext().rounding = "ROUND_HALF_UP"
n_digits = 6


# panHrnet: 2.6565  |1.4651  | 0.98364  | 0.98024  | 0.98089-Q8
def analysis_accu(img_base, img_out, ratio, flag_cut_bounds=True, dim_cut=1):
    if flag_cut_bounds:
        img_base = img_base[dim_cut - 1:-dim_cut, dim_cut - 1:-dim_cut, :]#:
        img_out = img_out[dim_cut - 1:-dim_cut, dim_cut - 1:-dim_cut, :]#:

    # q2n
    # q2n_index = q2n(img_base, img_out, q_blocks_size=32, q_shift=32)

    h = img_out.shape[0]
    w = img_out.shape[1]
    chanel = img_out.shape[2]

    # 计算CC
    C1 = torch.sum(torch.sum(img_base * img_out, 0), 0) - h * w * (
            torch.mean(torch.mean(img_base, 0), 0) * torch.mean(torch.mean(img_out, 0), 0))
    C2 = torch.sum(torch.sum(img_out ** 2, 0), 0) - h * w * (torch.mean(torch.mean(img_out, 0), 0) ** 2)
    C3 = torch.sum(torch.sum(img_base ** 2, 0), 0) - h * w * (torch.mean(torch.mean(img_base, 0), 0) ** 2)
    CC = C1 / ((C2 * C3) ** 0.5)

    # 计算SAM
    sum1 = torch.sum(img_base * img_out, 2)
    sum2 = torch.sum(img_base * img_base, 2)
    sum3 = torch.sum(img_out * img_out, 2)
    t = (sum2 * sum3) ** 0.5
    numlocal = torch.gt(t, 0)
    num = torch.sum(numlocal)
    t = sum1 / t
    angle = torch.acos(t)
    sumangle = torch.where(torch.isnan(angle), torch.full_like(angle, 0), angle).sum()
    if num == 0:
        averangle = sumangle
    else:
        averangle = sumangle / num

    # 或者采用https://segmentfault.com/a/1190000018929994修改精度
    # averangle = math.ceil(averangle * 1000000) / 1000000
    averangle = (averangle * 10 ** n_digits).round() / (10 ** n_digits)
    # SAM = decimal.Decimal(averangle.cpu().numpy() * 180 / 3.14159256).quantize(decimal.Decimal("0.00000"))
    SAM = averangle * 180 / 3.14159256

    # 计算ERGAS
    summ = 0
    for i in range(chanel):
        a1 = torch.mean((img_base[:, :, i] - img_out[:, :, i]) ** 2)
        m1 = torch.mean(img_base[:, :, i])
        a2 = m1 * m1
        summ = summ + a1 / a2
    ERGAS = 100 * (1 / ratio) * ((summ / chanel) ** 0.5)

    # 计算PSNR
    mse = torch.mean((img_base - img_out) ** 2, 0)
    mse = torch.mean(mse, 0)
    rmse = mse ** 0.5
    temp = torch.log(1 / rmse) / math.log(10)
    PSNR = 20 * temp

    # 计算SSIM
    img_base = img_base.permute(2, 0, 1)
    img_out = img_out.permute(2, 0, 1)
    img_base = img_base.unsqueeze(0)
    img_out = img_out.unsqueeze(0)
    SSIM = _ssim(img_base, img_out)

    index = torch.zeros((5, chanel + 1))
    index[0, 1:chanel + 1] = CC
    index[1, 1:chanel + 1] = PSNR
    index[2, 1:chanel + 1] = SSIM
    # index[0, 0] = torch.mean(CC)
    # index[1, 0] = torch.mean(PSNR)
    # index[2, 0] = torch.mean(SSIM)
    # index[3, 0] = SAM
    # index[4, 0] = ERGAS
    CC = torch.mean(CC)
    PSNR = torch.mean(PSNR)
    SSIM = torch.mean(SSIM)
    # q2n_index = np.mean(q2n_index)

    return CC, PSNR, SSIM, SAM, ERGAS#, q2n_index


def _ssim(img1, img2):
    img1 = img1.float()
    img2 = img2.float()

    channel = img1.shape[1]
    max_val = 1
    _, c, w, h = img1.size()
    window_size = min(w, h, 11)
    sigma = 1.5 * window_size / 11
    window = create_window(window_size, sigma, channel).cuda()
    mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
    mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)

    mu1_sq = mu1.pow(2)
    mu2_sq = mu2.pow(2)
    mu1_mu2 = mu1 * mu2

    sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
    sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
    sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
    C1 = (0.01 * max_val) ** 2
    C2 = (0.03 * max_val) ** 2
    V1 = 2.0 * sigma12 + C2
    V2 = sigma1_sq + sigma2_sq + C2
    ssim_map = ((2 * mu1_mu2 + C1) * V1) / ((mu1_sq + mu2_sq + C1) * V2)
    t = ssim_map.shape
    return ssim_map.mean(2).mean(2)


from torch.autograd import Variable


def gaussian(window_size, sigma):
    gauss = torch.Tensor([math.exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
    return gauss / gauss.sum()


def create_window(window_size, sigma, channel):
    _1D_window = gaussian(window_size, sigma).unsqueeze(1)
    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
    window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
    return window


def compare_index(A):
    A_size = A.shape
    ite_n = A_size[2]
    band_n = A_size[1]
    C_better = A[:, 0, 0]
    ind = 0
    for i in range(ite_n):
        score_b = 0
        score_c = 0
        C_compare = A[:, 0, i]
        if (C_better[0] > C_compare[0]):
            score_b = score_b + 1
        else:
            score_c = score_c + 1
        if (C_better[1] > C_compare[1]):
            score_b = score_b + 1
        else:
            score_c = score_c + 1
        if (C_better[2] > C_compare[2]):
            score_b = score_b + 1
        else:
            score_c = score_c + 1
        if (C_better[3] < C_compare[3]):
            score_b = score_b + 1
        else:
            score_c = score_c + 1
        if (C_better[4] < C_compare[4]):
            score_b = score_b + 1
        else:
            score_c = score_c + 1

        if (score_c > score_b):
            C_better = A[:, 0, i]
            ind = i

    C_best = A[:, :, ind]
    best_ind = ind + 1
    return C_best, best_ind


if __name__ == "__main__":
    a = np.zeros(shape=[256, 256])
    print(a[:255, :255].shape)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_pre_train_trainData_qb.py
================================================
import os
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.autograd import Variable
from torch.utils.data import DataLoader
from data_qb import Dataset_Pro
from model_qb import APNN, summaries, loss_with_l2_regularization, weights_init
from logger import create_logger, log_string
import numpy as np


import argparse

parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')

parser.add_argument('--out_dir', metavar='DIR', default='../results',
                    help='path to save model')
parser.add_argument('--log_dir', metavar='DIR', default='logs',
                    help='path to save log')
parser.add_argument('--tfb_dir', metavar='DIR', default=None,
                    help='useless in this script.')
parser.add_argument('--arch', '-a', metavar='ARCH', default='APNN')

args = parser.parse_args()
args.experimental_desc = "APNN"
args.dataset = "QB"

out_dir, model_save_dir, tfb_dir = create_logger(args, args.experimental_desc)

###################################################################
# ------------------- Pre-Define Part----------------------
###################################################################
# ================== Pre-Define =================== #
SEED = 10
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
# cudnn.benchmark = True  ###自动寻找最优算法
cudnn.deterministic = True

# ============= 2) HYPER PARAMS(Pre-Defined) ==========#

sensor = 'QB'
nr_bands = 4 #selected by user or taken from data?
lr = 0.0001*17*17*nr_bands
epochs = 15000
ckpt = 50
batch_size = 128
model_path = "Weights/qb/.pth"

# ============= 3) Load Model + Loss + Optimizer + Learn_rate_update ==========#
model = APNN().cuda()
model.apply(weights_init)
if os.path.isfile(model_path):
    model.load_state_dict(torch.load(model_path))   ## Load the pretrained Encoder
    log_string('APNN is Successfully Loaded from %s' % (model_path))

# summaries(model, grad=True)    ## Summary the Network
criterion = nn.L1Loss(reduction='mean').cuda()
regularization = loss_with_l2_regularization().cuda()
target_layerParam = list(map(id, model.conv3.parameters()))
base_layerParam = filter(lambda p: id(p) not in target_layerParam, model.parameters())

training_parameters = [{'params': model.conv3.parameters(), 'lr': lr/10},
                       {'params': base_layerParam}]

optimizer = optim.SGD(training_parameters, lr=lr, momentum=0.9)

log_string("inspect optimizer setting: {}\n".format(optimizer.state_dict()))
print("target id: {}".format(target_layerParam))

# (input_size - kernel_size + 1) // 2 = 2* pad = 2 * blk = net_scope
net_scope = 0
for name, layer in model.named_parameters():
    if 'conv' in name and 'bias' not in name:
        net_scope += layer.shape[-1]-1

net_scope = np.sum(net_scope) + 1
blk = net_scope//2 #8

save_best_file = './results/PNN/PNN_model.pth.tar'

PNN_model = {'sensor': sensor,
             'lr': lr,
             'epochs': epochs,
             'model_sampling_period': ckpt,
             'net_scope': net_scope,
             'batch_size': batch_size}


writer = SummaryWriter('./train_logs')    ## Tensorboard_show: case 2

def save_checkpoint(model, epoch):  # save model function
    model_out_path = 'Weights' + '/' + "{}.pth".format(epoch)
    torch.save(model.state_dict(), model_out_path)

###################################################################
# ------------------- Main Train (Run second)----------------------------------
###################################################################

def train(training_data_loader, validate_data_loader, start_epoch=0):
    log_string('Start training...')
    vmin = 10000
    for epoch in range(start_epoch, epochs, 1):

        epoch += 1
        epoch_train_mae, epoch_train_mse, epoch_val_mae, epoch_val_mse = [], [], [], []

        # ============Epoch Train=============== #
        model.train()

        for iteration, batch in enumerate(training_data_loader, 1):

            gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda()
            gt = gt - lms

            lms = torch.cat([lms, pan], dim=1)
            optimizer.zero_grad()  # fixed

            sr = model(lms)  # call model

            gt = gt[:, :, blk:-blk, blk:-blk]

            loss = criterion(sr, gt)  # compute loss
            new_loss = regularization(loss, model, flag=False)
            epoch_train_mae.append(loss.item())  # save all losses into a vector for one epoch

            new_loss.backward()  # fixed
            optimizer.step()  # fixed

            with torch.no_grad():
                loss = nn.MSELoss()(sr, gt)
                loss.requires_grad = False
                epoch_train_mse.append(loss.item())

        t_loss1 = np.nanmean(np.array(epoch_train_mae))  # compute the mean value of all losses, as one epoch loss
        t_loss2 = np.nanmean(np.array(epoch_train_mse))

        writer.add_scalar('mae_loss/t_mae', t_loss1, epoch)  # write to tensorboard to check
        writer.add_scalar('mae_loss/t_mse', t_loss2, epoch)
        log_string('Epoch: {}/{} training L1-loss: {:.7f}, L2-loss: {:.7f}'.format(epochs, epoch, t_loss1, t_loss2))  # print loss for each epoch
        # if epoch % ckpt == 0:  # if each ckpt epochs, then start to save model
        #     save_checkpoint(model, epoch)

        # ============Epoch Validate=============== #
        model.eval()
        with torch.no_grad():
            for iteration, batch in enumerate(validate_data_loader, 1):
                gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda()
                gt = gt - lms
                lms = torch.cat([lms, pan], dim=1)
                sr = model(lms)

                gt = gt[:, :, blk:-blk, blk:-blk]
                
                loss1 = criterion(sr, gt)
                loss2 = nn.MSELoss()(sr, gt)
                epoch_val_mae.append(loss1.item())
                epoch_val_mse.append(loss2.item())

            v_loss1 = np.nanmean(np.array(epoch_val_mae))
            v_loss2 = np.nanmean(np.array(epoch_val_mse))
            writer.add_scalar('val/v_mae', v_loss1, epoch)
            writer.add_scalar('val/v_mse', v_loss2, epoch)
            log_string('Epoch: {}/{} validate L1-loss: {:.7f}, L2-loss: {:7f}'.format(epochs, epoch, v_loss1, v_loss2))  # print loss for each epoch


        ### during save and simple best save ###
        # vmin = 10000
        if (epoch + 1) % ckpt == 0:
            # print("saving PNN_model_{}.pth.tar".format(epoch))
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            loss=v_loss1,
                            train_params=PNN_model),
                        '{}/PNN_model_{}.pth.tar'.format(model_save_dir, epoch + 1))

        if v_loss1 < vmin:
            if os.path.isfile(save_best_file):
                os.remove(save_best_file)
            # print("saving PNN_model.pth.tar")
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            loss1=v_loss1,
                            train_params=PNN_model),
                            '{}/best_PNN_model_{}.pth.tar'.format(model_save_dir, epoch))
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            loss1=v_loss1,
                            train_params=PNN_model),
                            './pretrained_models/'+sensor+'_PNNplus_model.pth.tar')
            vmin = v_loss1


    writer.close()  # close tensorboard

###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################
if __name__ == "__main__":
    train_set = Dataset_Pro('../training_data/train_qb_10000.h5')  # creat data for training
    training_data_loader = DataLoader(dataset=train_set, num_workers=0, batch_size=batch_size, shuffle=True,
                                      pin_memory=True, drop_last=True)  # put training data to DataLoader for batches

    validate_set = Dataset_Pro('../training_data/valid_qb_10000.h5')  # creat data for validation
    validate_data_loader = DataLoader(dataset=validate_set, num_workers=0, batch_size=batch_size, shuffle=False,
                                      pin_memory=True, drop_last=True)  # put training data to DataLoader for batches

    train(training_data_loader, validate_data_loader)  # call train function (call: Line 53)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_pre_train_trainData_wv2.py
================================================
import os
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.autograd import Variable
from torch.utils.data import DataLoader
from data_wv2 import Dataset_Pro
from model_wv2 import APNN, summaries, loss_with_l2_regularization, weights_init
from logger import create_logger, log_string
import numpy as np

import argparse

parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')

parser.add_argument('--out_dir', metavar='DIR', default='../results',
                    help='path to save model')
parser.add_argument('--log_dir', metavar='DIR', default='logs',
                    help='path to save log')
parser.add_argument('--tfb_dir', metavar='DIR', default=None,
                    help='useless in this script.')
parser.add_argument('--arch', '-a', metavar='ARCH', default='APNN')

args = parser.parse_args()
args.experimental_desc = "APNN"
args.dataset = "WV2"

out_dir, model_save_dir, tfb_dir = create_logger(args, args.experimental_desc)
print(model_save_dir)
# import shutil
# from torch.utils.tensorboard import SummaryWriter

###################################################################
# ------------------- Pre-Define Part----------------------
###################################################################
# ================== Pre-Define =================== #
SEED = 10
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
# cudnn.benchmark = True  ###自动寻找最优算法
cudnn.deterministic = True

# ============= 2) HYPER PARAMS(Pre-Defined) ==========#
""" CHANGES: 
    1. row 48:         in APNN the L1 loss is averaged only on the minibatch size,
                       for the learning rate in case the loss is averaged on minibatches, patches size,
                       and bands is lr=0.0001*17*17*nr_bands
                           a. nr_bands takes into account the number of bands
                           
    2. row 46:          <sensor> should be indicated by user here, or taken from data 
    
    3. row 46:          <nr_bands>  depends on the <sensor>
    
    4. the dataset is already normalized, so we do not need anymore <L> and <ratio> 
    
    5. row 49:          in APNN epochs=10000
    
    6. row 71:          in APNN weight_decay=0
    
    7. rows 182-195:    in pretrained_models the best PNN model is saved
"""

sensor = 'WV2'
nr_bands = 8  # selected by user or taken from data?
lr = 0.1#0.0001 * 17 * 17 * nr_bands#=0.2302
epochs = 15000
ckpt = 50
batch_size = 128
model_path = "../results/WV2/best_PNN_model_4765.pth.tar"
v_min = 10000
'''
- Epoch: 15000/4765 training L1-loss: 0.0145116, L2-loss: 0.0005868
- Epoch: 15000/4765 validate L1-loss: 0.0145294, L2-loss: 0.000590
- Epoch: 15000/4766 training L1-loss: 0.0145040, L2-loss: 0.0005864
- Epoch: 15000/4766 validate L1-loss: 0.0145357, L2-loss: 0.000590
- Epoch: 15000/4767 training L1-loss: 0.0145072, L2-loss: 0.0005866
- Epoch: 15000/4767 validate L1-loss: 0.0145413, L2-loss: 0.000591
'''
# TODO L2 norm to do where
# ============= 3) Load Model + Loss + Optimizer + Learn_rate_update ==========#
model = APNN().cuda()
model.apply(weights_init)
if os.path.isfile(model_path):
    log_string("loading")
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint["model_state"])  ## Load the pretrained Encoder
    log_string('APNN is Successfully Loaded from %s' % (model_path))
    v_min = checkpoint["loss1"]
# summaries(model, grad=True)    ## Summary the Network
criterion = nn.L1Loss(reduction='mean').cuda()
regularization = loss_with_l2_regularization().cuda()
# 用model里有的实例id去指定model中的其他参数,而不要遍历model.parameters()
target_layerParam = list(map(id, model.conv3.parameters()))
base_layerParam = filter(lambda p: id(p) not in target_layerParam, model.parameters())

training_parameters = [{'params': model.conv3.parameters(), 'lr': lr / 10},
                       {'params': base_layerParam}]

optimizer = optim.SGD(training_parameters, lr=lr, momentum=0.9)

log_string("inspect optimizer setting: {}\n".format(optimizer.state_dict()))
log_string("target id: {}".format(target_layerParam))

# 模型卷积层宽卷积零填充范围
# (input_size - kernel_size + 1) // 2 = 2* pad = 2 * blk = net_scope
net_scope = 0
for name, layer in model.named_parameters():
    if 'conv' in name and 'bias' not in name:
        net_scope += layer.shape[-1] - 1

net_scope = np.sum(net_scope) + 1
blk = net_scope // 2  # 8

save_best_file = './results/WV2/PNN_model.pth.tar'

PNN_model = {'sensor': sensor,
             'lr': lr,
             'epochs': epochs,
             'model_sampling_period': ckpt,
             'net_scope': net_scope,
             'batch_size': batch_size}

writer = SummaryWriter('../train_logs')  ## Tensorboard_show: case 2


def save_checkpoint(model, epoch):  # save model function
    model_out_path = 'Weights' + '/' + "{}.pth".format(epoch)
    torch.save(model.state_dict(), model_out_path)


###################################################################
# ------------------- Main Train (Run second)----------------------------------
###################################################################

def train(training_data_loader, validate_data_loader, start_epoch=0, v_min=10000):
    log_string('Start training...')

    for epoch in range(start_epoch, epochs, 1):

        epoch += 1
        epoch_train_mae, epoch_train_mse, epoch_val_mae, epoch_val_mse = [], [], [], []

        # ============Epoch Train=============== #
        model.train()

        for iteration, batch in enumerate(training_data_loader, 1):
            gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda()
            gt = gt - lms

            lms = torch.cat([lms, pan], dim=1)
            optimizer.zero_grad()  # fixed

            sr = model(lms)  # call model

            gt = gt[:, :, blk:-blk, blk:-blk]

            loss = criterion(sr, gt)  # compute loss
            new_loss = regularization(loss, model, flag=False)
            epoch_train_mae.append(loss.item())  # save all losses into a vector for one epoch

            new_loss.backward()  # fixed
            optimizer.step()  # fixed

            with torch.no_grad():
                loss = nn.MSELoss()(sr, gt)
                loss.requires_grad = False
                epoch_train_mse.append(loss.item())

        t_loss1 = np.nanmean(np.array(epoch_train_mae))  # compute the mean value of all losses, as one epoch loss
        t_loss2 = np.nanmean(np.array(epoch_train_mse))

        writer.add_scalar('mae_loss/t_mae', t_loss1, epoch)  # write to tensorboard to check
        writer.add_scalar('mae_loss/t_mse', t_loss2, epoch)
        log_string('Epoch: {}/{} training L1-loss: {:.7f}, L2-loss: {:.7f}'.format(epochs, epoch, t_loss1,
                                                                              t_loss2))  # print loss for each epoch
        # if epoch % ckpt == 0:  # if each ckpt epochs, then start to save model
        #     save_checkpoint(model, epoch)

        # ============Epoch Validate=============== #
        model.eval()
        with torch.no_grad():
            for iteration, batch in enumerate(validate_data_loader, 1):
                gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda()
                gt = gt - lms
                lms = torch.cat([lms, pan], dim=1)
                sr = model(lms)

                gt = gt[:, :, blk:-blk, blk:-blk]

                loss1 = criterion(sr, gt)
                loss2 = nn.MSELoss()(sr, gt)
                epoch_val_mae.append(loss1.item())
                epoch_val_mse.append(loss2.item())

            v_loss1 = np.nanmean(np.array(epoch_val_mae))
            v_loss2 = np.nanmean(np.array(epoch_val_mse))
            writer.add_scalar('val/v_mae', v_loss1, epoch)
            writer.add_scalar('val/v_mse', v_loss2, epoch)
            log_string('Epoch: {}/{} validate L1-loss: {:.7f}, L2-loss: {:7f}'.format(epochs, epoch, v_loss1,
                                                                                 v_loss2))  # print loss for each epoch

        ### during save and simple best save ###
        # vmin = 10000
        if (epoch + 1) % ckpt == 0:
            # print("saving PNN_model_{}.pth.tar".format(epoch))
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            loss=v_loss1,
                            train_params=PNN_model),
                       '{}/PNN_model_{}.pth.tar'.format(model_save_dir, epoch + 1))

        if v_loss1 < v_min:
            if os.path.isfile(save_best_file):
                os.remove(save_best_file)
            # print("saving PNN_model.pth.tar")
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            loss1=v_loss1,
                            train_params=PNN_model),
                       '{}/best_PNN_model_{}.pth.tar'.format(model_save_dir,epoch))
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            loss1=v_loss1,
                            train_params=PNN_model),
                            '../pretrained_models/'+sensor+'_PNNplus_model.pth.tar')
            v_min = v_loss1

    writer.close()  # close tensorboard


###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################
if __name__ == "__main__":


    train_set = Dataset_Pro('../training_data/train_wv2_10000.h5')  # creat data for training
    training_data_loader = DataLoader(dataset=train_set, num_workers=0, batch_size=batch_size, shuffle=True,
                                      pin_memory=True, drop_last=True)  # put training data to DataLoader for batches

    validate_set = Dataset_Pro('../training_data/valid_wv2_10000.h5')  # creat data for validation
    validate_data_loader = DataLoader(dataset=validate_set, num_workers=0, batch_size=batch_size, shuffle=False,
                                      pin_memory=True, drop_last=True)  # put training data to DataLoader for batches

    train(training_data_loader, validate_data_loader, 6700, v_min=v_min)  # call train function (call: Line 53)
'''
- Epoch: 15000/1942 validate L1-loss: 0.0152082, L2-loss: 0.000656
- Epoch: 15000/1943 training L1-loss: 0.0151794, L2-loss: 0.0006520
- Epoch: 15000/1943 validate L1-loss: 0.0152087, L2-loss: 0.000656
- Epoch: 15000/1944 training L1-loss: 0.0151762, L2-loss: 0.0006518
- Epoch: 15000/1944 validate L1-loss: 0.0152076, L2-loss: 0.000656
- Epoch: 15000/1945 training L1-loss: 0.0151769, L2-loss: 0.0006519
- Epoch: 15000/1945 validate L1-loss: 0.0152089, L2-loss: 0.000656
- Epoch: 15000/1946 training L1-loss: 0.0151799, L2-loss: 0.0006520
- Epoch: 15000/1946 validate L1-loss: 0.0152077, L2-loss: 0.000656
'''

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_pre_train_trainData_wv3.py
================================================
import os
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.autograd import Variable
from torch.utils.data import DataLoader
from data_wv3 import Dataset_Pro
from model_wv3 import APNN, loss_with_l2_regularization, weights_init
import numpy as np
import shutil
from torch.utils.tensorboard import SummaryWriter
from torchstat import stat
###################################################################
# ------------------- Pre-Define Part----------------------
###################################################################
# ================== Pre-Define =================== #
SEED = 10
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
# cudnn.benchmark = True  ###自动寻找最优算法
cudnn.deterministic = True

# ============= 2) HYPER PARAMS(Pre-Defined) ==========#


sensor = 'WV3'
nr_bands = 8  # selected by user or taken from data?
lr = 0.0001 * 17 * 17 * nr_bands
epochs = 10000
ckpt = 50
batch_size = 128
# 0.010094023841832365
model_path = "results/PNN/.pth.tar"
# ============= 3) Load Model + Loss + Optimizer + Learn_rate_update ==========#
model = APNN().cuda()
model.apply(weights_init)

stat(model, input_size=[(9, 64, 64)])

if os.path.isfile(model_path):
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint["model_state"])  ## Load the pretrained Encoder
    print('APNN is Successfully Loaded from %s' % (model_path))
    if "loss1" in dict(checkpoint).keys():
        print("loss: {}".format(checkpoint["loss1"]))


criterion = nn.L1Loss(reduction='mean').cuda()
regularization = loss_with_l2_regularization().cuda()
target_layerParam = list(map(id, model.conv3.parameters()))
base_layerParam = filter(lambda p: id(p) not in target_layerParam, model.parameters())

training_parameters = [{'params': model.conv3.parameters(), 'lr': lr / 10},
                       {'params': base_layerParam}]

optimizer = optim.SGD(training_parameters, lr=lr, momentum=0.9, weight_decay=0)

print("inspect optimizer setting:\n", optimizer.state_dict())
print("target id:", target_layerParam)

# (input_size - kernel_size + 1) // 2 = 2* pad = 2 * blk = net_scope
net_scope = 0
for name, layer in model.named_parameters():
    if 'conv' in name and 'bias' not in name:
        net_scope += layer.shape[-1] - 1

net_scope = np.sum(net_scope) + 1
blk = net_scope // 2  # 8

save_best_file = './results/PNN/PNN_model.pth.tar'

PNN_model = {'sensor': sensor,
             'lr': lr,
             'epochs': epochs,
             'model_sampling_period': ckpt,
             'net_scope': net_scope,
             'batch_size': batch_size}

writer = SummaryWriter('./train_logs')  ## Tensorboard_show: case 2


def save_checkpoint(model, epoch):  # save model function
    model_out_path = 'Weights' + '/wv3/' + "{}.pth".format(epoch)
    torch.save(model.state_dict(), model_out_path)


###################################################################
# ------------------- Main Train (Run second)----------------------------------
###################################################################

def train(training_data_loader, validate_data_loader, start_epoch=0):
    print('Start training...')
    print(model.conv1.weight.data[0, 0, 0, 0])
    vmin = 10000
    for epoch in range(start_epoch, epochs, 1):
        flag = (epoch == (epochs - 1)) or epoch == 0
        epoch += 1
        epoch_train_mae, epoch_train_mse, epoch_val_mae, epoch_val_mse = [], [], [], []

        # ============Epoch Train=============== #
        model.train()

        for iteration, batch in enumerate(training_data_loader, 1):
            gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda()
            gt = gt - lms

            lms = torch.cat([lms, pan], dim=1)
            optimizer.zero_grad()  # fixed

            sr = model(lms)  # call model

            gt = gt[:, :, blk:-blk, blk:-blk]

            loss = criterion(sr, gt)  # compute loss
            new_loss = regularization(loss, model, flag=flag or (iteration == 0))
            epoch_train_mae.append(loss.item())  # save all losses into a vector for one epoch

            new_loss.backward()  # fixed
            optimizer.step()  # fixed

            with torch.no_grad():
                loss = nn.MSELoss()(sr, gt)
                loss.requires_grad = False
                epoch_train_mse.append(loss.item())

        t_loss1 = np.nanmean(np.array(epoch_train_mae))  # compute the mean value of all losses, as one epoch loss
        t_loss2 = np.nanmean(np.array(epoch_train_mse))

        writer.add_scalar('mae_loss/t_mae', t_loss1, epoch)  # write to tensorboard to check
        writer.add_scalar('mae_loss/t_mse', t_loss2, epoch)
        print('Epoch: {}/{} training L1-loss: {:.7f}, L2-loss: {:.7f}'.format(epochs, epoch, t_loss1,
                                                                              t_loss2))  # print loss for each epoch
        # if epoch % ckpt == 0:  # if each ckpt epochs, then start to save model
        #     save_checkpoint(model, epoch)

        # ============Epoch Validate=============== #
        model.eval()
        with torch.no_grad():
            for iteration, batch in enumerate(validate_data_loader, 1):
                gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda()
                gt = gt - lms
                lms = torch.cat([lms, pan], dim=1)
                sr = model(lms)

                gt = gt[:, :, blk:-blk, blk:-blk]

                loss1 = criterion(sr, gt)
                loss2 = nn.MSELoss()(sr, gt)
                epoch_val_mae.append(loss1.item())
                epoch_val_mse.append(loss2.item())

            v_loss1 = np.nanmean(np.array(epoch_val_mae))
            v_loss2 = np.nanmean(np.array(epoch_val_mse))
            writer.add_scalar('val/v_mae', v_loss1, epoch)
            writer.add_scalar('val/v_mse', v_loss2, epoch)
            print('Epoch: {}/{} validate L1-loss: {:.7f}, L2-loss: {:7f}'.format(epochs, epoch, v_loss1,
                                                                                 v_loss2))  # print loss for each epoch

        ### during save and simple best save ###
        # vmin = 10000
        if (epoch + 1) % ckpt == 0:
            # print("saving PNN_model_{}.pth.tar".format(epoch))
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            optim_state=optimizer.state_dict(),
                            loss=v_loss1,
                            train_params=PNN_model),
                       './results/PNN/PNN_model_{}.pth.tar'.format(epoch + 1))

        if v_loss1 < vmin:
            if os.path.isfile(save_best_file):
                os.remove(save_best_file)
            # print("saving PNN_model.pth.tar")
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            optim_state=optimizer.state_dict(),
                            loss1=v_loss1,
                            train_params=PNN_model),
                       './results/PNN/best_PNN_model_{}.pth.tar'.format(epoch))
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            optim_state=optimizer.state_dict(),
                            loss1=v_loss1,
                            train_params=PNN_model),
                       './pretrained_models/' + sensor + '_PNNplus_model.pth.tar')
            vmin = v_loss1

    writer.close()  # close tensorboard


def fine_tune_test(file_path, training_data_loader):
    from main_test_wv3 import Tester, eval_test

    # tester = Tester(file_path, mode='ft')  # call initial model
    evaluator = Tester(file_path)
    criterion = nn.L1Loss(reduction='mean').cuda()
    " LOAD PRETRAINED MODEL"
    model_path = "./results/PNN/.pth.tar"
    if os.path.isfile(model_path):
        print("loading model")
        checkpoint = torch.load(model_path)
        # checkpoint = torch.load('./pretrained_models/' + sensor_model)
        print(checkpoint.keys())
        net = checkpoint['model']
        print(net.conv1.weight.data[0, 0, 0, 0])
        net.load_state_dict(checkpoint['model_state'])
        train_params = checkpoint['train_params']
        lr = train_params['lr']  # learning rate
        print("lr", lr)
        FT_epochs = 1000  # number of fine tuning epochs

    else:
        lr = 0.0001 * 17 * 17 * nr_bands
        FT_epochs = epochs
        net = APNN().cuda()
        print(net.conv1.weight.data[0, 0, 0, 0])
    '''
    tensor(-0.0003, device='cuda:0')
    tensor(-0.0204, device='cuda:0')
    '''
    print(net.conv1.weight.data[0, 0, 0, 0])
    print(net)
    # print("pretrain loss: ", checkpoint["loss1"])


    print(dict(net.named_parameters()).keys())
    target_layerParam = list(map(id, net.conv3.parameters()))
    base_layerParam = filter(lambda p: id(p) not in target_layerParam, net.parameters())
    training_parameters = [{'params': net.conv3.parameters(), 'lr': lr/10},
                           {'params': base_layerParam}]

    optimizer = optim.SGD(training_parameters, lr=lr, momentum=0.9, weight_decay=0)

    try:
        optimizer.load_state_dict(checkpoint["optim_state"])
    except:
        print("default optim_state")

    v_min = 10000
    ft_loss = np.zeros(FT_epochs)

    eval_test(net, evaluator, mode="eval", mode2="pre")#0.0114576
    for epoch in range(FT_epochs):
        net.train()
        epoch_train_mae = []
        for iteration, batch in enumerate(training_data_loader, 1):
            gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda()
            gt = gt - lms

            lms = torch.cat([lms, pan], dim=1)
            optimizer.zero_grad()  # fixed

            sr = net(lms)  # call model

            gt = gt[:, :, blk:-blk, blk:-blk]

            loss = criterion(sr, gt)  # compute loss
            new_loss = regularization(loss, net, flag=False)

            epoch_train_mae.append(loss.item())  # save all losses into a vector for one epoch

            new_loss.backward()  # fixed
            optimizer.step()  # fixed

        running_loss = np.nanmean(epoch_train_mae)
        ft_loss[epoch] = running_loss

        if running_loss < v_min:
            v_min = running_loss
            PATH = './ft_network/'
            if not os.path.exists(PATH):
                os.makedirs(PATH)
            torch.save(dict(model=net,
                            model_state=net.state_dict(),
                            loss=ft_loss),
                       PATH + '/net.pth.tar')
            net.eval()
            eval_test(net, evaluator, mode="eval", mode2="ft")
        print('[%d] loss: %.20f' % (epoch + 1, running_loss))


###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################
if __name__ == "__main__":
    train_set = Dataset_Pro('./training_data/train_wv3_10000.h5')  # creat data for training
    training_data_loader = DataLoader(dataset=train_set, num_workers=0, batch_size=batch_size, shuffle=True,
                                      pin_memory=True, drop_last=True)  # put training data to DataLoader for batches

    validate_set = Dataset_Pro('./training_data/valid_wv3_10000.h5')  # creat data for validation
    validate_data_loader = DataLoader(dataset=validate_set, num_workers=0, batch_size=batch_size, shuffle=False,
                                      pin_memory=True, drop_last=True)  # put training data to DataLoader for batches

    train(training_data_loader, validate_data_loader)  # call train function (call: Line 53)
    # file_path = "./test_data/TestData_wv3.h5"
    # fine_tune_test(file_path, training_data_loader)

'''

'''


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_pre_train_trainData_wv4.py
================================================
import os
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.autograd import Variable
from torch.utils.data import DataLoader
from data_wv4 import Dataset_Pro
from model_wv4 import APNN, summaries, loss_with_l2_regularization, weights_init
from logger import create_logger, log_string
import numpy as np


import argparse

parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')

parser.add_argument('--out_dir', metavar='DIR', default='../results',
                    help='path to save model')
parser.add_argument('--log_dir', metavar='DIR', default='logs',
                    help='path to save log')
parser.add_argument('--tfb_dir', metavar='DIR', default=None,
                    help='useless in this script.')
parser.add_argument('--arch', '-a', metavar='ARCH', default='APNN')

args = parser.parse_args()
args.experimental_desc = "APNN"
args.dataset = "WV4"

out_dir, model_save_dir, tfb_dir = create_logger(args, args.experimental_desc)
print(model_save_dir)
#import shutil
#from torch.utils.tensorboard import SummaryWriter

###################################################################
# ------------------- Pre-Define Part----------------------
###################################################################
# ================== Pre-Define =================== #
SEED = 10
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
# cudnn.benchmark = True  ###自动寻找最优算法
cudnn.deterministic = True

# ============= 2) HYPER PARAMS(Pre-Defined) ==========#
""" CHANGES: 
    1. row 48:         in APNN the L1 loss is averaged only on the minibatch size,
                       for the learning rate in case the loss is averaged on minibatches, patches size,
                       and bands is lr=0.0001*17*17*nr_bands
                           a. nr_bands takes into account the number of bands
                           
    2. row 46:          <sensor> should be indicated by user here, or taken from data 
    
    3. row 46:          <nr_bands>  depends on the <sensor>
    
    4. the dataset is already normalized, so we do not need anymore <L> and <ratio> 
    
    5. row 49:          in APNN epochs=10000
    
    6. row 71:          in APNN weight_decay=0
    
    7. rows 182-195:    in pretrained_models the best PNN model is saved
"""
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
sensor = 'WV4'
nr_bands = 4 #selected by user or taken from data?
lr = 0.0001*17*17*nr_bands
epochs = 15000
ckpt = 50
batch_size = 128
model_path = "../results/wv4/best_PNN_model_1706.pth.tar.pth.tar"
v_min = 10000
#TODO L2 norm to do where
# ============= 3) Load Model + Loss + Optimizer + Learn_rate_update ==========#
model = APNN().cuda()
model.apply(weights_init)
if os.path.isfile(model_path):
    log_string("loading")
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint["model_state"])   ## Load the pretrained Encoder
    v_min = checkpoint["loss1"]
    print("best_loss {:.7f}".format(v_min))
    log_string('APNN is Successfully Loaded from %s' % (model_path))

# summaries(model, grad=True)    ## Summary the Network
criterion = nn.L1Loss(reduction='mean').cuda()
regularization = loss_with_l2_regularization().cuda()
#用model里有的实例id去指定model中的其他参数,而不要遍历model.parameters()
target_layerParam = list(map(id, model.conv3.parameters()))
base_layerParam = filter(lambda p: id(p) not in target_layerParam, model.parameters())

training_parameters = [{'params': model.conv3.parameters(), 'lr': lr/10},
                       {'params': base_layerParam}]

optimizer = optim.SGD(training_parameters, lr=lr, momentum=0.9)

log_string("inspect optimizer setting: {}\n".format(optimizer.state_dict()))
log_string("target id: {}".format(target_layerParam))

#模型卷积层宽卷积零填充范围
# (input_size - kernel_size + 1) // 2 = 2* pad = 2 * blk = net_scope
net_scope = 0
for name, layer in model.named_parameters():
    if 'conv' in name and 'bias' not in name:
        net_scope += layer.shape[-1]-1

net_scope = np.sum(net_scope) + 1
blk = net_scope//2 #8

save_best_file = '../results/PNN/PNN_model.pth.tar'

PNN_model = {'sensor': sensor,
             'lr': lr,
             'epochs': epochs,
             'model_sampling_period': ckpt,
             'net_scope': net_scope,
             'batch_size': batch_size}


writer = SummaryWriter('../train_logs')    ## Tensorboard_show: case 2

def save_checkpoint(model, epoch):  # save model function
    model_out_path = 'Weights' + '/' + "{}.pth".format(epoch)
    torch.save(model.state_dict(), model_out_path)

###################################################################
# ------------------- Main Train (Run second)----------------------------------
###################################################################

def train(training_data_loader, validate_data_loader, start_epoch=0, v_min=10000):
    log_string('Start training...')

    for epoch in range(start_epoch, epochs, 1):

        epoch += 1
        epoch_train_mae, epoch_train_mse, epoch_val_mae, epoch_val_mse = [], [], [], []

        # ============Epoch Train=============== #
        model.train()

        for iteration, batch in enumerate(training_data_loader, 1):

            gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda()
            gt = gt - lms

            lms = torch.cat([lms, pan], dim=1)
            optimizer.zero_grad()  # fixed

            sr = model(lms)  # call model

            gt = gt[:, :, blk:-blk, blk:-blk]

            loss = criterion(sr, gt)  # compute loss
            new_loss = regularization(loss, model, flag=False)
            epoch_train_mae.append(loss.item())  # save all losses into a vector for one epoch

            new_loss.backward()  # fixed
            optimizer.step()  # fixed

            with torch.no_grad():
                loss = nn.MSELoss()(sr, gt)
                loss.requires_grad = False
                epoch_train_mse.append(loss.item())

        t_loss1 = np.nanmean(np.array(epoch_train_mae))  # compute the mean value of all losses, as one epoch loss
        t_loss2 = np.nanmean(np.array(epoch_train_mse))

        writer.add_scalar('mae_loss/t_mae', t_loss1, epoch)  # write to tensorboard to check
        writer.add_scalar('mae_loss/t_mse', t_loss2, epoch)
        log_string('Epoch: {}/{} training L1-loss: {:.7f}, L2-loss: {:.7f}'.format(epochs, epoch, t_loss1, t_loss2))  # print loss for each epoch
        # if epoch % ckpt == 0:  # if each ckpt epochs, then start to save model
        #     save_checkpoint(model, epoch)

        # ============Epoch Validate=============== #
        model.eval()
        with torch.no_grad():
            for iteration, batch in enumerate(validate_data_loader, 1):
                gt, lms, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda()
                gt = gt - lms
                lms = torch.cat([lms, pan], dim=1)
                sr = model(lms)

                gt = gt[:, :, blk:-blk, blk:-blk]
                
                loss1 = criterion(sr, gt)
                loss2 = nn.MSELoss()(sr, gt)
                epoch_val_mae.append(loss1.item())
                epoch_val_mse.append(loss2.item())

            v_loss1 = np.nanmean(np.array(epoch_val_mae))
            v_loss2 = np.nanmean(np.array(epoch_val_mse))
            writer.add_scalar('val/v_mae', v_loss1, epoch)
            writer.add_scalar('val/v_mse', v_loss2, epoch)
            log_string('Epoch: {}/{} validate L1-loss: {:.7f}, L2-loss: {:7f}'.format(epochs, epoch, v_loss1, v_loss2))  # print loss for each epoch


        ### during save and simple best save ###
        # vmin = 10000
        if (epoch + 1) % ckpt == 0:
            # print("saving PNN_model_{}.pth.tar".format(epoch))
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            loss=v_loss1,
                            train_params=PNN_model),
                        '{}/PNN_model_{}.pth.tar'.format(model_save_dir, epoch + 1))

        if v_loss1 < v_min:
            if os.path.isfile(save_best_file):
                os.remove(save_best_file)
            # print("saving PNN_model.pth.tar")
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            loss1=v_loss1,
                            train_params=PNN_model),
                            '{}/best_PNN_model_{}.pth.tar'.format(model_save_dir, epoch))
            torch.save(dict(model=model,
                            model_state=model.state_dict(),
                            loss1=v_loss1,
                            train_params=PNN_model),
                            '../pretrained_models/'+sensor+'_PNNplus_model.pth.tar')
            v_min = v_loss1


    writer.close()  # close tensorboard

###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################
if __name__ == "__main__":
    train_set = Dataset_Pro('../training_data/train_wv4_10000.h5')  # creat data for training
    training_data_loader = DataLoader(dataset=train_set, num_workers=0, batch_size=batch_size, shuffle=True,
                                      pin_memory=True, drop_last=True)  # put training data to DataLoader for batches

    validate_set = Dataset_Pro('../training_data/valid_wv4_10000.h5')  # creat data for validation
    validate_data_loader = DataLoader(dataset=validate_set, num_workers=0, batch_size=batch_size, shuffle=False,
                                      pin_memory=True, drop_last=True)  # put training data to DataLoader for batches

    train(training_data_loader, validate_data_loader, 1707, v_min)  # call train function (call: Line 53)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_test_qb.py
================================================
import os
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from data_qb import Dataset_Pro
import h5py
from data_single_read import load_set
from evaluate import compute_index
from model_qb import APNN, summaries, weights_init, loss_with_l2_regularization
import numpy as np
import scipy.io as sio
from time import time
from evaluate import analysis_accu


###################################################################
# ------------------- Sub-Functions (will be used) -------------------
###################################################################

def load_set(file_path, blk):

    suffix = file_path.split('.')
    if suffix[-1] == 'h5':
        data = h5py.File(file_path)  # NxCxHxW = 0x1x2x3
        ms1 = data["ms"][...]  # NxCxHxW=0,1,2,3
        shape_size = len(ms1.shape)
    elif suffix[-1] == 'mat':
        ## ===== case2: HxWxC
        data = sio.loadmat(file_path)  #
        print(data.keys())
        ms1 = data["I_MS_LR"][...]  # NxCxHxW=0,1,2,3
        shape_size = len(ms1.shape)
    else:
        raise NotImplemented("file foramt is not supported")

    if suffix[-2][-2:] == 'FR':
        data['I_GT'] = data['I_MS_LR'] #exception

    if shape_size == 4:  # NxCxHxW
        # tensor type:
        lms1 = data['lms'][...]  # NxCxHxW = 4x8x512x512
        lms1 = np.array(lms1, dtype=np.float32) / 2047.0
        lms = torch.from_numpy(lms1)

        pan1 = data['pan'][...]  # NxCxHxW = 4x8x512x512
        pan1 = np.array(pan1, dtype=np.float32) / 2047.0
        pan = torch.from_numpy(pan1)

        test_I_in1 = np.concatenate([lms1, pan1], axis=1)  # NxCxHxW = Nx9xHxW
        test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge')  # pading
        test_I_in = torch.from_numpy(test_I_in1)  # NxCxHxW = Nx9xHxW

        ms1 = data['ms'][...]  # NxCxHxW = 4x8x512x512
        ms1 = np.array(ms1, dtype=np.float32) / 2047.0
        ms = torch.from_numpy(ms1)

        gt1 = data['gt'][...]  # NxCxHxW = 4x8x512x512
        gt1 = np.array(gt1, dtype=np.float32) / 2047.0
        gt = torch.from_numpy(gt1)

        return test_I_in, ms, pan, gt
    if shape_size == 3:  # HxWxC

        # tensor type:
        lms1 = data['I_MS'][...]  # HxWxC=0,1,2
        lms1 = np.expand_dims(lms1, axis=0)  # 1xHxWxC
        lms1 = np.array(lms1, dtype=np.float32) / 2047.0  # 1xHxWxC
        lms = torch.from_numpy(lms1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        pan1 = data['I_PAN'][...]  # HxW
        pan1 = np.expand_dims(pan1, axis=0)  # 1xHxW
        pan1 = np.expand_dims(pan1, axis=3)  # 1xHxWx1
        pan1 = np.array(pan1, dtype=np.float32) / 2047.  # 1xHxWx1
        pan = torch.from_numpy(pan1).permute(0, 3, 1, 2)  # Nx1xHxW:

        test_I_in1 = np.concatenate([lms1, pan1], axis=3)  # 1xHxWx(C+1) = Nx9xHxW
        test_I_in1 = np.transpose(test_I_in1, (0, 3, 1, 2))  # 1x(C+1)xHxW
        test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge')  # NCHW
        test_I_in = torch.from_numpy(test_I_in1)  # NxCxHxW = Nx9xHxW

        ms1 = data['I_MS_LR'][...]  # HxWxC=0,1,2
        ms1 = np.expand_dims(ms1, axis=0)  # 1xHxWxC
        ms1 = np.array(ms1, dtype=np.float32) / 2047.0  # 1xHxWxC
        ms = torch.from_numpy(ms1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        gt1 = data['I_GT'][...]  # HxWxC=0,1,2
        gt1 = np.expand_dims(gt1, axis=0)  # 1xHxWxC
        gt1 = np.array(gt1, dtype=np.float32) / 2047.0  # 1xHxWxC
        gt = torch.from_numpy(gt1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        return test_I_in, ms, pan, gt


###################################################################
# ------------------- Main Test (Run second) -------------------
###################################################################

## 1) initial test by model ##\
blk = 8  # 4


class Tester():
    def __init__(self, file_path, mode):

        test_I_in, test_ms, test_pan, test_gt = load_set(file_path, blk)
        self.test_I_in = test_I_in
        self.test_ms = test_ms
        self.test_pan = test_pan
        self.test_gt = test_gt
        self.mode = mode
        self.file_path = file_path
        "the fine tuning phase requires downgraded input resolution"
        if mode == 'ft':
            from wald_utilities import wald_protocol
            ms_lr, pan_lr = wald_protocol(test_ms, test_pan, 4., 'QB', channels=4)
            self.test_I_in = torch.cat((ms_lr, pan_lr), dim=1)
            self.test_I_in = torch.nn.functional.pad(self.test_I_in, (8, 8, 8, 8), mode='reflect')  # NCHW
            self.test_gt = self.test_ms

    def __call__(self, model):
        x = self.test_I_in  # send to cuda, important!
        x = x.cuda().float()  # convert to tensor type:
        out2 = model(x)
        if self.mode == 'test' or self.mode == 'RR':
            sr = out2 + self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda()  # NxCxHxW
            sr = sr.permute(0, 2, 3, 1)
            gt = self.test_gt.permute(0, 2, 3, 1).cuda()
            our_CC, our_PSNR, our_SSIM, our_SAM, our_ERGAS = analysis_accu(gt[0, ...], sr[0, ...], 4)
            print(f'[{self.file_path}]: our_CC: {our_CC}, our_PSNR: {our_PSNR}, '
                  f'our_SSIM: {our_SSIM},\n'
                  f'our_SAM: {our_SAM} our_ERGAS: {our_ERGAS}')
        return out2


## 2) target-adative's fine_tune_training, i.e., PNNplus##

def test(file_path, sensor_model):
    suffix = file_path.split('.')[-2][-2:]
    if suffix == 'RR' or suffix == 'FR':
        simulated = suffix
    else:
        simulated = 'test'

    tester = Tester(file_path, mode='ft')  # call initial model
    evaluator = Tester(file_path, mode=simulated)
    criterion = nn.L1Loss(reduction='mean').cuda()
    regularization = loss_with_l2_regularization().cuda()
    " LOAD PRETRAINED MODEL"
    init_loss = 0
    model_path = "../pretrained_models/1QB_PNNplus_model.pth.tar"
    if os.path.isfile(model_path):
        print("loading model")
        checkpoint = torch.load(model_path)
        # checkpoint = torch.load('./pretrained_models/' + sensor_model)
        print(checkpoint.keys())
        net = checkpoint['model']
        print(net.conv1.weight.data[0, 0, 0, 0])
        net.load_state_dict(checkpoint['model_state'])
        '''
        ft: lr setting
        1-SF
        2e-4  epoch 150
        1-ik
        2e-4
        7-h5
        1e-5 
        '''
        lr_ = 2e-4#0.0001 * 17 * 17 * 8  # 1e-3#2e-4#0.0001 * 17 * 17 * 8
        FT_epochs = 50  # number of fine tuning epochs
        # init_loss = checkpoint["loss1"]
    else:
        nr_bands = 4  # selected by user or taken from data?
        lr_ = 0.0001 * 17 * 17 * nr_bands
        FT_epochs = 5000
        net = APNN().cuda()
        net.apply(weights_init)
        print(net.conv1.weight.data[0, 0, 0, 0])

    print(net.conv1.weight.data[0, 0, 0, 0])
    print(net)

    test_gt = tester.test_gt - tester.test_I_in[:, :-1, blk:-blk, blk:-blk]
    pretrain_inIt_loss = criterion(net(tester.test_I_in.cuda()), test_gt.cuda()).item()
    print("init loss: {:.20f} pretrain_inIt loss: {:.20f}".format(init_loss, pretrain_inIt_loss))
    eval_test(net, evaluator, mode="eval", mode2="pre")
    print('-'*100)
    "scaling learning rate on last layer"
    target_layerParam = list(map(id, net.conv3.parameters()))
    base_layerParam = filter(lambda p: id(p) not in target_layerParam, net.parameters())

    training_parameters = [{'params': net.conv3.parameters(), 'lr': lr_ / 10},
                           {'params': base_layerParam}]

    optimizer = optim.SGD(training_parameters, lr=lr_, momentum=0.9)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    v_min = 10000
    ft_loss = np.zeros(FT_epochs)
    Train_time = time()

    ## 2.1) "FINE TUNING"--training
    for epoch in range(FT_epochs):  # loop over the testing image multiple times
        net.train()
        # running_loss = 0.0
        # loading testing image
        test_I_in = tester.test_I_in
        test_gt = tester.test_gt

        # residual
        test_gt = test_gt - test_I_in[:, :-1, blk:-blk, blk:-blk]

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        x1 = test_I_in  # send to cuda, important!
        x2 = test_gt
        x1 = x1.cuda().float()  # convert to tensor type:
        x2 = x2.cuda().float()  # convert to tensor type:

        outputs = net(x1)

        loss = criterion(outputs, x2)  # compute loss
        new_loss = regularization(loss, net, flag=False)

        new_loss.backward()
        optimizer.step()

        running_loss = loss.item()
        ft_loss[epoch] = running_loss

        if running_loss < v_min:

            PATH = '../ft_network/QB'
            if not os.path.exists(PATH):
                os.makedirs(PATH)
            torch.save(dict(model=net,
                            model_state=net.state_dict(),
                            loss=ft_loss),
                       PATH + '/net.pth.tar')
            # if np.abs(running_loss - v_min) > 1e-3:
            net.eval()
            eval_test(net, evaluator, mode="eval", mode2="ft")
            v_min = running_loss

        print('[%d] loss: %.7f' % (epoch + 1, running_loss))

    Train_time = time() - Train_time

    ## 2.2) "FINE TUNING"--testing
    " LOAD BEST MODEL"
    checkpoint = torch.load('../ft_network/QB/net.pth.tar')
    net = checkpoint['model']
    net.load_state_dict(checkpoint['model_state'])

    " PANSHARPENING "
    net.to(device)
    net.eval()
    print(net.conv3.weight.data[0, 0, 0, 0], net.conv2.weight.data[0, 0, 0, 0], net.conv1.weight.data[0, 0, 0, 0])
    eval_test(net, evaluator, mode="eval", mode2="ft")


def eval_test(net, evaluator, mode="pre", mode2="pre"):
    with torch.no_grad():
        Test_time = time()
        sr = evaluator(net)  # NxCxHxW
        Test_time = time() - Test_time

        # skip connection to add low resolution ms and residual(np version)
        sr = sr.cpu().detach().numpy() + evaluator.test_I_in[:, :-1, blk:-blk,
                                         blk:-blk].cpu().detach().numpy()  # NxCxHxW

        # convert to numpy type with permute and squeeze: HxWxC (go to cpu for easy saving)
        sr = torch.from_numpy(sr)  # convert to tensor version
        sr = sr.permute(0, 2, 3, 1).cpu().detach().numpy()  # to: NxHxWxC
        sr = np.clip(sr, 0, 1)

        # print('------>  [PNN+]: Fine-tuning (%d it) time = %0.4f  //  Prediction time = %0.4f' % (
        # FT_epochs, Train_time, Test_time))

        num_exm = sr.shape[0]
        if num_exm == 1:
            if evaluator.mode == "RR":
                file_name = "apnn_qb_rs" + '_ik_' + mode2 + ".mat"
            if evaluator.mode == "FR":
                file_name = "apnn_qb_os" + '_ik_' + mode2 + ".mat"
            # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN"
            file_name2 = "../results"
            save_name = os.path.join(file_name2, file_name)
            sio.savemat(save_name, {'apnn_qb': sr[0, :, :, :]})
        else:
            for index in range(num_exm):  # save the DL results to the 03-Comparisons(Matlab)
                file_name = "apnn_qb_rs" + str(index) + mode2 + ".mat"
                # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN"
                file_name2 = "../results"
                save_name = os.path.join(file_name2, file_name)
                sio.savemat(save_name, {'apnn_qb_rs': sr[index, :, :, :]})


###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################
if __name__ == '__main__':
    # file_path = "E:/01-DL-Pansharpening-Toolbox/01-Data-Simulation(Matlab)/Dataset_Gemine/01-DataSimu/QB/TestData_qb.h5"
    # file_path = "../test_data/imgs/San_Francisco_QB_RR.mat"
    # file_path = "../test_data/TestData_qb.h5"
    file_path = "../test_data/imgs/Toulouse_IKONOS_RR.mat"
    "SELECT SENSOR AND TESTING IMAGE"
    sensor_model = 'QB'
    available_models = ['IKONOS', 'GeoEye1', 'WV2', 'WV3', 'WV4', 'QB']
    if sensor_model in available_models:
        sensor_model = sensor_model + '_PNNplus_model.pth.tar'

    test(file_path, sensor_model)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_test_wv2.py
================================================
import os
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from data_wv2 import Dataset_Pro
import h5py
from data_single_read import load_set
from evaluate import compute_index
from model_wv2 import APNN, summaries, weights_init, loss_with_l2_regularization
import numpy as np
import scipy.io as sio
from time import time
from evaluate import analysis_accu


###################################################################
# ------------------- Sub-Functions (will be used) -------------------
###################################################################

def load_set(file_path, blk):

    suffix = file_path.split('.')

    if suffix[-1] == 'h5':
        data = h5py.File(file_path)  # NxCxHxW = 0x1x2x3
        ms1 = data["ms"][...]  # NxCxHxW=0,1,2,3
        shape_size = len(ms1.shape)
    elif suffix[-1] == 'mat':
        # ===== case2: HxWxC
        data = sio.loadmat(file_path)  #
        print(data.keys())
        ms1 = data["I_MS_LR"][...]  # NxCxHxW=0,1,2,3
        shape_size = len(ms1.shape)
    else:
        print("file format is not supporetd")
        raise NotImplemented

    if suffix[-2][-2:] == 'FR':
        data['I_GT'] = data['I_MS_LR'] #exception

    if shape_size == 4:  # NxCxHxW
        # tensor type:
        lms1 = data['lms'][...]  # NxCxHxW = 4x8x512x512
        lms1 = np.array(lms1, dtype=np.float32) / 2047.0
        lms = torch.from_numpy(lms1)

        pan1 = data['pan'][...]  # NxCxHxW = 4x8x512x512
        pan1 = np.array(pan1, dtype=np.float32) / 2047.0
        pan = torch.from_numpy(pan1)

        test_I_in1 = np.concatenate([lms1, pan1], axis=1)  # NxCxHxW = Nx9xHxW
        test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge')  # pading
        test_I_in = torch.from_numpy(test_I_in1)  # NxCxHxW = Nx9xHxW

        ms1 = data['ms'][...]  # NxCxHxW = 4x8x512x512
        ms1 = np.array(ms1, dtype=np.float32) / 2047.0
        ms = torch.from_numpy(ms1)

        gt1 = data['gt'][...]  # NxCxHxW = 4x8x512x512
        gt1 = np.array(gt1, dtype=np.float32) / 2047.0
        gt = torch.from_numpy(gt1)

        return test_I_in, ms, pan, gt

    if shape_size == 3:  # HxWxC

        # tensor type:
        lms1 = data['I_MS'][...]  # HxWxC=0,1,2
        lms1 = np.expand_dims(lms1, axis=0)  # 1xHxWxC
        lms1 = np.array(lms1, dtype=np.float32) / 2047.0  # 1xHxWxC
        lms = torch.from_numpy(lms1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        pan1 = data['I_PAN'][...]  # HxW
        pan1 = np.expand_dims(pan1, axis=0)  # 1xHxW
        pan1 = np.expand_dims(pan1, axis=3)  # 1xHxWx1
        pan1 = np.array(pan1, dtype=np.float32) / 2047.  # 1xHxWx1
        pan = torch.from_numpy(pan1).permute(0, 3, 1, 2)  # Nx1xHxW:

        test_I_in1 = np.concatenate([lms1, pan1], axis=3)  # 1xHxWx(C+1) = Nx9xHxW
        test_I_in1 = np.transpose(test_I_in1, (0, 3, 1, 2))  # 1x(C+1)xHxW
        test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge')  # NCHW
        test_I_in = torch.from_numpy(test_I_in1)  # NxCxHxW = Nx9xHxW

        ms1 = data['I_MS_LR'][...]  # HxWxC=0,1,2
        ms1 = np.expand_dims(ms1, axis=0)  # 1xHxWxC
        ms1 = np.array(ms1, dtype=np.float32) / 2047.0  # 1xHxWxC
        ms = torch.from_numpy(ms1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        gt1 = data['I_GT'][...]  # HxWxC=0,1,2
        gt1 = np.expand_dims(gt1, axis=0)  # 1xHxWxC
        gt1 = np.array(gt1, dtype=np.float32) / 2047.0  # 1xHxWxC
        gt = torch.from_numpy(gt1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        return test_I_in, ms, pan, gt


###################################################################
# ------------------- Main Test (Run second) -------------------
###################################################################

## 1) initial test by model ##
blk = 8


class Tester():
    def __init__(self, file_path, mode):

        test_I_in, test_ms, test_pan, test_gt = load_set(file_path, blk)
        self.test_I_in = test_I_in
        self.test_ms = test_ms
        self.test_pan = test_pan
        self.test_gt = test_gt
        self.mode = mode
        self.file_path = file_path
        "the fine tuning phase requires downgraded input resolution"
        if mode == 'ft':
            from wald_utilities import wald_protocol
            ms_lr, pan_lr = wald_protocol(test_ms, test_pan, 4., 'WV2', channels=8)
            self.test_I_in = torch.cat((ms_lr, pan_lr), dim=1)
            self.test_I_in = torch.nn.functional.pad(self.test_I_in, (8, 8, 8, 8), mode='reflect')  # NCHW
            self.test_gt = self.test_ms

    def __call__(self, model):
        x = self.test_I_in  # send to cuda, important!
        x = x.cuda().float()  # convert to tensor type:
        out2 = model(x)

        if self.mode == 'test' or self.mode == 'RR':
            sr = out2 + self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda()  # NxCxHxW
            sr = sr.permute(0, 2, 3, 1)
            gt = self.test_gt.permute(0, 2, 3, 1).cuda()
            our_CC, our_PSNR, our_SSIM, our_SAM, our_ERGAS = analysis_accu(gt[0, ...], sr[0, ...], 4)
            print(f'[{self.file_path}]: our_CC: {our_CC}, our_PSNR: {our_PSNR}, '
                  f'our_SSIM: {our_SSIM},\n'
                  f'our_SAM: {our_SAM} our_ERGAS: {our_ERGAS}')

        return out2


## 2) target-adative's fine_tune_training, i.e., PNNplus##

def test(file_path, sensor_model):

    suffix = file_path.split('.')[-2][-2:]
    if suffix== 'RR' or suffix == 'FR':
        simulated = suffix
    else:
        simulated = 'test'

    tester = Tester(file_path, mode='ft')  # call initial model
    evaluator = Tester(file_path, mode=simulated)
    criterion = nn.L1Loss(reduction='mean').cuda()
    regularization = loss_with_l2_regularization().cuda()
    " LOAD PRETRAINED MODEL"
    init_loss = 0
    model_path = "../pretrained_models/1WV2_PNNplus_model.pth.tar"
    if os.path.isfile(model_path):
        print("loading model")
        checkpoint = torch.load(model_path)
        # checkpoint = torch.load('./pretrained_models/' + sensor_model)
        print(checkpoint.keys())
        net = checkpoint['model']
        print(net.conv1.weight.data[0, 0, 0, 0])
        net.load_state_dict(checkpoint['model_state'])
        lr_ = 1e-6
        FT_epochs = 200  # number of fine tuning epochs
        # init_loss = checkpoint["loss1"]
    else:
        nr_bands = 8  # selected by user or taken from data?
        lr_ = 0.0001 * 17 * 17 * nr_bands
        FT_epochs = 5000
        net = APNN().cuda()
        net.apply(weights_init)
        print(net.conv1.weight.data[0, 0, 0, 0])

    print(net.conv1.weight.data[0, 0, 0, 0])
    print(net)

    test_gt = tester.test_gt - tester.test_I_in[:, :-1, blk:-blk, blk:-blk]
    pretrain_inIt_loss = criterion(net(tester.test_I_in.cuda()), test_gt.cuda()).item()
    print("init loss: {:.20f} pretrain_inIt loss: {:.20f}".format(init_loss, pretrain_inIt_loss))
    eval_test(net, evaluator, mode="eval", mode2="pre")
    print('-' * 100)
    "scaling learning rate on last layer"
    target_layerParam = list(map(id, net.conv3.parameters()))
    base_layerParam = filter(lambda p: id(p) not in target_layerParam, net.parameters())

    training_parameters = [{'params': net.conv3.parameters(), 'lr': lr_ / 10},
                           {'params': base_layerParam}]

    optimizer = optim.SGD(training_parameters, lr=lr_, momentum=0.9)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    v_min = 10000
    ft_loss = np.zeros(FT_epochs)
    Train_time = time()
    print(tester.test_gt.shape)
    ## 2.1) "FINE TUNING"--training
    for epoch in range(FT_epochs):  # loop over the testing image multiple times
        net.train()
        # running_loss = 0.0
        # loading testing image
        test_I_in = tester.test_I_in
        test_gt = tester.test_gt

        # residual
        test_gt = test_gt - test_I_in[:, :-1, blk:-blk, blk:-blk]

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        x1 = test_I_in  # send to cuda, important!
        x2 = test_gt
        x1 = x1.cuda().float()  # convert to tensor type:
        x2 = x2.cuda().float()  # convert to tensor type:

        outputs = net(x1)

        loss = criterion(outputs, x2)  # compute loss
        new_loss = regularization(loss, net, flag=False)

        new_loss.backward()
        optimizer.step()

        running_loss = loss.item()
        ft_loss[epoch] = running_loss

        if running_loss < v_min:

            PATH = '../ft_network/WV2'
            if not os.path.exists(PATH):
                os.makedirs(PATH)
            torch.save(dict(model=net,
                            model_state=net.state_dict(),
                            loss=ft_loss),
                       PATH + '/net.pth.tar')
            if np.abs(running_loss - v_min) > 1e-3:
                net.eval()
                eval_test(net, evaluator, mode="eval", mode2="ft")
            v_min = running_loss

        print('[%d] loss: %.20f' % (epoch + 1, running_loss))

    Train_time = time() - Train_time

    ## 2.2) "FINE TUNING"--testing
    " LOAD BEST MODEL"
    checkpoint = torch.load('../ft_network/WV2/net.pth.tar')
    net = checkpoint['model']
    net.load_state_dict(checkpoint['model_state'])

    " PANSHARPENING "
    net.to(device)
    net.eval()
    print(net.conv3.weight.data[0, 0, 0, 0], net.conv2.weight.data[0, 0, 0, 0], net.conv1.weight.data[0, 0, 0, 0])
    eval_test(net, evaluator, mode="eval", mode2="ft")


def eval_test(net, evaluator, mode="pre", mode2="pre"):
    with torch.no_grad():
        Test_time = time()
        sr = evaluator(net)  # NxCxHxW
        Test_time = time() - Test_time

        # skip connection to add low resolution ms and residual(np version)
        sr = sr.cpu().detach().numpy() + evaluator.test_I_in[:, :-1, blk:-blk,
                                         blk:-blk].cpu().detach().numpy()  # NxCxHxW

        # convert to numpy type with permute and squeeze: HxWxC (go to cpu for easy saving)
        sr = torch.from_numpy(sr)  # convert to tensor version
        sr = sr.permute(0, 2, 3, 1).cpu().detach().numpy()  # to: NxHxWxC
        sr = np.clip(sr, 0, 1)

        # print('------>  [PNN+]: Fine-tuning (%d it) time = %0.4f  //  Prediction time = %0.4f' % (
        # FT_epochs, Train_time, Test_time))

        num_exm = sr.shape[0]
        if num_exm == 1:
            if evaluator.mode == "RR":
                file_name = "apnn_wv2_rs" + '_rio_' + mode2 + ".mat"
            if evaluator.mode == "FR":
                file_name = "apnn_wv2_os" + '_rio_' + mode2 + ".mat"

            # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN"
            file_name2 = "../results"
            save_name = os.path.join(file_name2, file_name)
            sio.savemat(save_name, {'apnn_wv2': sr[0, :, :, :]})
        else:
            for index in range(num_exm):  # save the DL results to the 03-Comparisons(Matlab)
                file_name = "apnn_wv2_rs" + str(index) + mode2 + ".mat"
                # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN"
                file_name2 = "../results"
                save_name = os.path.join(file_name2, file_name)
                sio.savemat(save_name, {'apnn_wv2_rs': sr[index, :, :, :]})


###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################
if __name__ == '__main__':
    file_path = "../test_data/imgs/Rio_WV2_FR.mat"
    # file_path = "../test_data/TestData_wv2.h5"
    "SELECT SENSOR AND TESTING IMAGE"
    sensor_model = 'WV2'
    available_models = ['IKONOS', 'GeoEye1', 'WV2', 'WV3', 'WV4', 'QB']
    if sensor_model in available_models:
        sensor_model = sensor_model + '_PNNplus_model.pth.tar'

    test(file_path, sensor_model)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_test_wv3.py
================================================
import os
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from data_wv3 import Dataset_Pro
import h5py
from data_single_read import load_set
from evaluate import compute_index
from model_wv3 import APNN, summaries, weights_init, loss_with_l2_regularization
import numpy as np
import scipy.io as sio
from time import time
from evaluate import analysis_accu

import math

class L1_Loss_clip(nn.Module):
    def __init__(self, multiple=2048.0, clip_flag=True, inter=False):
        super(L1_Loss_clip, self).__init__()
        self.clip_flag = clip_flag
        self.inter = inter
        self.multiple = multiple
        self.criterion = nn.L1Loss(reduction='mean')


    def forward(self, x, target, lms):
        if self.clip_flag:
            # x = torch.clamp(x * self.multiple, 0, 2048) / self.multiple
            x = x - lms
        if self.inter:
            loss = torch.mean((x - target) ** 2)#self.criterion(x, target)#torch.mean(torch.abs(x - target))
            return loss
        else:
            l1_loss = self.criterion(x, target)

        return l1_loss


###################################################################
# ------------------- Sub-Functions (will be used) -------------------
###################################################################
def load_set(file_path, blk):

    suffix = file_path.split('.')

    if suffix[-1] == 'h5':
        ## ===== case1: NxCxHxW
        data = h5py.File(file_path)
        ms1 = data["ms"][...]  # NxCxHxW=0,1,2,3
        shape_size = len(ms1.shape)
    elif suffix[-1] == 'mat':
        # ===== case2: HxWxC
        data = sio.loadmat(file_path)  #
        ms1 = data["I_MS_LR"][...]  # NxCxHxW=0,1,2,3
        shape_size = len(ms1.shape)
    else:
        raise NotImplemented("file format is not suppoted")
    """this is en exception to be addressed: RR data has four fields, FR has no GT
    For the finetuing the GT is necessary both in RR and FR testing case.
    the I_MS_LR has taken as GT in both cases
    """

    if suffix[-2][-2:] == 'FR':
        data['I_GT'] = data['I_MS_LR'] #exception

    if shape_size == 4:  # NxCxHxW

        # tensor type:
        lms1 = data['lms'][...]  # NxCxHxW = 4x8x512x512
        lms1 = np.array(lms1, dtype=np.float32) / 2047.0
        lms = torch.from_numpy(lms1)

        pan1 = data['pan'][...]  # NxCxHxW = 4x8x512x512
        pan1 = np.array(pan1, dtype=np.float32) / 2047.0
        pan = torch.from_numpy(pan1)

        test_I_in1 = np.concatenate([lms1, pan1], axis=1)  # NxCxHxW = Nx9xHxW
        test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge')  # pading
        test_I_in = torch.from_numpy(test_I_in1)  # NxCxHxW = Nx9xHxW

        ms1 = data['ms'][...]  # NxCxHxW = 4x8x512x512
        ms1 = np.array(ms1, dtype=np.float32) / 2047.0
        ms = torch.from_numpy(ms1)

        gt1 = data['gt'][...]  # NxCxHxW = 4x8x512x512
        gt1 = np.array(gt1, dtype=np.float32) / 2047.0
        gt = torch.from_numpy(gt1)

        return test_I_in, ms, pan, gt

    if shape_size == 3:  # HxWxC

        # tensor type:
        lms1 = data['I_MS'][...]  # HxWxC=0,1,2
        lms1 = np.expand_dims(lms1, axis=0)  # 1xHxWxC
        lms1 = np.array(lms1, dtype=np.float32) / 2047.0  # 1xHxWxC
        lms = torch.from_numpy(lms1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        pan1 = data['I_PAN'][...]  # HxW
        pan1 = np.expand_dims(pan1, axis=0)  # 1xHxW
        pan1 = np.expand_dims(pan1, axis=3)  # 1xHxWx1
        pan1 = np.array(pan1, dtype=np.float32) / 2047.  # 1xHxWx1
        pan = torch.from_numpy(pan1).permute(0, 3, 1, 2)  # Nx1xHxW:

        test_I_in1 = np.concatenate([lms1, pan1], axis=3)  # 1xHxWx(C+1) = Nx9xHxW
        test_I_in1 = np.transpose(test_I_in1, (0, 3, 1, 2))  # 1x(C+1)xHxW
        test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge')  # NCHW
        test_I_in = torch.from_numpy(test_I_in1)  # NxCxHxW = Nx9xHxW

        #预先wald仿真的
        ms1 = data['I_MS_LR'][...]  # HxWxC=0,1,2
        ms1 = np.expand_dims(ms1, axis=0)  # 1xHxWxC
        ms1 = np.array(ms1, dtype=np.float32) / 2047.0  # 1xHxWxC
        ms_lr = torch.from_numpy(ms1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC 1,8,128,128

        gt1 = data['I_GT'][...]  # HxWxC=0,1,2
        gt1 = np.expand_dims(gt1, axis=0)  # 1xHxWxC
        gt1 = np.array(gt1, dtype=np.float32) / 2047.0  # 1xHxWxC
        gt = torch.from_numpy(gt1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        return test_I_in, ms_lr, pan, gt


###################################################################
# ------------------- Main Test (Run second) -------------------
###################################################################

## 1) initial test by model ##
blk = 8


class Tester():
    def __init__(self, file_path, mode):
        "if mode==ft the input data need to be downgraded as in row 134"
        test_I_in, test_ms_lr, test_pan, test_gt = load_set(file_path, blk)
        self.test_I_in = test_I_in #ms 1,9,528,528
        # self.test_lms = test_lms #lms - wald 1,8,512,512
        self.test_pan = test_pan #pan 1,1,512,512
        self.test_gt = test_gt #ms^ 1,8,512,512
        self.test_ms = test_ms_lr #1,8,128,128
        self.mode = mode
        self.file_path = file_path
        self.loss_clip = L1_Loss_clip(inter=True, clip_flag=True).cuda()
        "the fine tuning phase requires downgraded input resolution"
        if mode == 'ft':
            from wald_utilities import wald_protocol
            ms_lr, pan_lr = wald_protocol(test_ms_lr, test_pan, 4., 'WV3')
            self.test_I_in = torch.cat((test_ms_lr, pan_lr), dim=1)
            self.test_I_in = torch.nn.functional.pad(self.test_I_in, (8, 8, 8, 8), mode='reflect')  # NCHW
            self.test_gt = self.test_ms
            #假如数据在matlab里仿真了，则有ms_lr: 512, pan 512, ms: 512, pan 2048，那应该不再需要再处理一次
            # lms是128 but pan是512, pan没有仿真数据，即pan_lr 128, 又没有gt是128
            #所以pan应该是仿真过的,即原始是2048,现在是512, 那ms还要wald处理一下吗
            # from wald_utilities import wald_protocol_v2
            #
            # pan_lr = wald_protocol_v2(None, test_pan, 4., 'WV3')
            # self.test_I_in = torch.cat((test_lms, pan_lr), dim=1)
            # self.test_I_in = torch.nn.functional.pad(self.test_I_in, (8, 8, 8, 8), mode='reflect')  # NCHW

        # self.test_ms = test_ms.unsqueeze(dim=0).float()  # convert to tensor type: 1xCxHxW (unsqueeze(dim=0))
        # self.test_pan = test_pan.unsqueeze(dim=0).float()  # convert to tensor type: 1x1xHxW
        # self.test_I_in = test_I_in.unsqueeze(dim=0).float()  # 1xCxHxW
        # self.test_gt = (test_gt * 2047.0).cuda()

    def __call__(self, model, err=None):
        x = self.test_I_in  # send to cuda, important!
        x = x.cuda().float()  # convert to tensor type:
        out2 = model(x)#1.4942e-03
        # result_our = out2 + x[:, :-1, blk:-blk, blk:-blk]
        # out2 = model(self.test_I_in)
        # result_our = out2 + self.test_I_in[:, :-1, blk:-blk, blk:-blk]
        # result_our = torch.squeeze(result_our).permute(1, 2, 0)
        # result_our = result_our* 2047
        # our_SAM, our_ERGAS = compute_index(self.test_gt, result_our, 4)
        # print('our_SAM: {} our_ERGAS: {}'.format(our_SAM, our_ERGAS))  # print loss for each epoch

        if self.mode == 'RR' or self.mode == 'test':
            test_in = self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda()
            sr = out2 + test_in#self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda()  # NxCxHxW
            sr = sr.permute(0, 2, 3, 1)
            gt = self.test_gt.permute(0, 2, 3, 1).cuda()

            # sr = out2.permute(0, 2, 3, 1)
            # gt = self.test_gt.cuda() - test_in  # NxCxHxW
            # gt = gt.permute(0, 2, 3, 1)
            our_CC, our_PSNR, our_SSIM, our_SAM, our_ERGAS, our_Q8 = analysis_accu(gt[0, ...], sr[0, ...], 4)
            print(f'[{self.file_path}]: our_CC: {our_CC}, our_PSNR: {our_PSNR}, '
                  f'our_SSIM: {our_SSIM},\n'
                  f'our_SAM: {our_SAM} our_ERGAS: {our_ERGAS} our_Q8: {our_Q8}')


        return out2


## 2) target-adative's fine_tune_training, i.e., PNNplus##

def test(file_path, sensor_model):

    suffix = file_path.split('.')[-2][-2:]
    if suffix== 'RR' or suffix == 'FR':
        simulated = suffix
    else:
        simulated = 'test'

    tester = Tester(file_path, mode='ft')  # call initial model
    evaluator = Tester(file_path, mode=simulated)
    criterion = nn.MSELoss(reduction='mean').cuda()#L1_Loss_clip(clip_flag=True, inter=True).cuda()
    regularization = loss_with_l2_regularization().cuda()
    " LOAD PRETRAINED MODEL"
    init_loss = 0
    model_path = "./pretrained_models/1WV3_PNNplus_model.tar"
    if os.path.isfile(model_path):
        print("loading model")
        checkpoint = torch.load(model_path)
        # checkpoint = torch.load('./pretrained_models/' + sensor_model)
        print(checkpoint.keys())
        net = checkpoint['model']
        print(net.conv1.weight.data[0, 0, 0, 0])
        net.load_state_dict(checkpoint['model_state'])
        '''
        4-.h5
        1e-4
        1-.mat
        1e-4 
        '''
        lr_ = 1e-4#0.0001 * 17 * 17 * 8#1e-4#0.0001 * 17 * 17 * 8#1e-3#2e-4#0.0001 * 17 * 17 * 8
        FT_epochs = 50  # number of fine tuning epochs
        # init_loss = checkpoint["loss1"]
    else:
        nr_bands = 8  # selected by user or taken from data?
        lr_ = 0.0001 * 17 * 17 * nr_bands
        FT_epochs = 5000
        net = APNN().cuda()
        net.apply(weights_init)
        print(net.conv1.weight.data[0, 0, 0, 0])
    print(net.conv1.weight.data[0, 0, 0, 0])
    print(net)


    test_gt = tester.test_gt - tester.test_I_in[:, :-1, blk:-blk, blk:-blk]
    pretrain_inIt_loss = criterion(net(tester.test_I_in.cuda()), test_gt.cuda())#, tester.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda())
    print("init loss: {:.20f} pretrain_inIt loss: {:.20f}".format(init_loss, pretrain_inIt_loss.item()))
    eval_test(net, evaluator, mode="eval", mode2="pre", err=pretrain_inIt_loss)
    # print("-" * 30)
    "scaling learning rate on last layer"
    # print(dict(net.conv3.named_parameters()).keys())
    target_layerParam = list(map(id, net.conv3.parameters()))
    base_layerParam = filter(lambda p: id(p) not in target_layerParam, net.parameters())

    training_parameters = [{'params': net.conv3.parameters(), 'lr': lr_/10 },
                           {'params': base_layerParam}]

    optimizer = optim.SGD(training_parameters, lr=lr_, momentum=0.9, weight_decay=0)
    try:
        optimizer.load_state_dict(checkpoint["optim_state"])
    except:
        print("default optim_state")
    print(net.conv3.weight.requires_grad)
    print("inspect optimizer setting:\n", optimizer.state_dict())
    print("target id:", target_layerParam)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


    v_min = 10000
    ft_loss = np.zeros(FT_epochs)
    Train_time = time()
    print(tester.test_gt.shape)
    ## 2.1) "FINE TUNING"--training
    for epoch in range(FT_epochs):  # loop over the testing image multiple times
        net.train()
        # running_loss = 0.0
        # loading testing image
        test_I_in = tester.test_I_in[:, :-1, blk:-blk, blk:-blk]
        test_I_in = test_I_in.cuda()
        test_gt = tester.test_gt.cuda()

        # residual

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        x1 = tester.test_I_in  # send to cuda, important!
        x2 = test_gt - test_I_in
        x1 = x1.cuda().float()  # convert to tensor type:
        x2 = x2.cuda().float()  # convert to tensor type:

        outputs = net(x1)
        test_I_in.requires_grad = False
        loss = criterion(outputs, x2)  # compute loss
        # loss = criterion(outputs+test_I_in, x2, test_I_in)  # compute loss
        # loss = criterion(outputs + test_I_in, test_gt, 0)  # compute loss
        new_loss = regularization(loss, net, flag=False)

        new_loss.backward()
        # try:
        #     scheduler.step(epoch)
        # except:
        #     print()
        optimizer.step()

        running_loss = loss.item()
        ft_loss[epoch] = running_loss

        if running_loss < v_min:
            PATH = './ft_network/'
            if not os.path.exists(PATH):
                os.makedirs(PATH)
            torch.save(dict(model=net,
                            model_state=net.state_dict(),
                            loss=ft_loss),
                       PATH + '/wv_3_net.pth.tar')
            # if np.abs(running_loss - v_min) > 1e-3:
            net.eval()
            eval_test(net, evaluator, mode="eval", mode2="ft", err=loss)
            v_min = running_loss
        print('[%d] loss: %.20f' % (epoch + 1, running_loss))
        print('-')
    Train_time = time() - Train_time

    ## 2.2) "FINE TUNING"--testing
    " LOAD BEST MODEL"
    checkpoint = torch.load('./ft_network/wv_3_net.pth.tar')
    net = checkpoint['model']
    net.load_state_dict(checkpoint['model_state'])
    print("-" * 100)
    print("pretrain_InIt_loss {:.20f}".format(pretrain_inIt_loss))
    for loss in checkpoint['loss']:
        if loss > 0:
            print(loss, " ");
    # print("-"*100)
    # print(checkpoint['loss'])
    " PANSHARPENING "

    "testing phase requires input at actual testing resolution"

    net.to(device)
    net.eval()
    print(net.conv3.weight.data[0, 0, 0, 0], net.conv2.weight.data[0, 0, 0, 0], net.conv1.weight.data[0, 0, 0, 0])
    eval_test(net, evaluator, mode="eval", mode2="ft")


def eval_test(net, evaluator, mode="pre", mode2="pre", err=None):
    with torch.no_grad():
        Test_time = time()
        sr = evaluator(net, err=err)  # NxCxHxW
        Test_time = time() - Test_time

        # skip connection to add low resolution ms and residual(np version)
        sr = sr.cpu().detach().numpy() + evaluator.test_I_in[:, :-1, blk:-blk, blk:-blk].cpu().detach().numpy()  # NxCxHxW

        # convert to numpy type with permute and squeeze: HxWxC (go to cpu for easy saving)
        sr = torch.from_numpy(sr)  # convert to tensor version
        sr = sr.permute(0, 2, 3, 1).cpu().detach().numpy()  # to: NxHxWxC

        "clipping is not necessary"
        sr = np.clip(sr, 0, 1)

        num_exm = sr.shape[0]
        if mode == "eval":
            if num_exm == 1:

                if evaluator.mode == "RR":
                    key = "apnn_wv3_rs"
                    file_name = key + '_ny_' + mode2 + ".mat"
                if evaluator.mode == "FR":
                    key = "apnn_wv3_os"
                    file_name = key + '_ny_' + mode2 + ".mat"
                file_name2 = './results/'
                save_name = os.path.join(file_name2, file_name)
                sio.savemat(save_name, {key: sr[0, :, :, :]})

            else:
                for index in range(num_exm):  # save the DL results to the 03-Comparisons(Matlab)
                    file_name = "apnn_wv3_rs" + str(index) + mode2 + ".mat"
                    file_name2 = './results/'
                    save_name = os.path.join(file_name2, file_name)
                    sio.savemat(save_name, {'apnn_wv3_rs': sr[index, :, :, :]})


###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################
if __name__ == '__main__':

    import random
    import torch.backends.cudnn as cudnn

    file_path = "./test_data/imgs/NY1_WV3_RR.mat"

    "SELECT SENSOR AND TESTING IMAGE"
    sensor_model = 'WV3'
    available_models = ['IKONOS', 'GeoEye1', 'WV2', 'WV3', 'WV4', 'QB']
    if sensor_model in available_models:
        sensor_model = sensor_model + '_PNNplus_model.pth.tar'

    test(file_path, sensor_model)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/main_test_wv4.py
================================================
import os
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from data_wv4 import Dataset_Pro
import h5py
from data_single_read import load_set
from evaluate import compute_index
from model_wv4 import APNN, summaries, weights_init, loss_with_l2_regularization
import numpy as np
import scipy.io as sio
from time import time
from evaluate import analysis_accu
import math

class L1_Loss_clip(nn.Module):
    def __init__(self, multiple=2048.0, clip_flag=True, inter=False):
        super(L1_Loss_clip, self).__init__()
        self.clip_flag = clip_flag
        self.inter = inter
        self.multiple = multiple
        self.criterion = nn.L1Loss(reduction='mean')


    def forward(self, x, target, lms):
        if self.clip_flag:
            # x = torch.clamp(x * self.multiple, 0, 2048) / self.multiple
            x = x - lms
        if self.inter:
            loss = torch.mean((x - target) ** 2)#self.criterion(x, target)#torch.mean(torch.abs(x - target))
            return loss
        else:
            l1_loss = self.criterion(x, target)

        return l1_loss

###################################################################
# ------------------- Sub-Functions (will be used) -------------------
###################################################################
def load_set(file_path, blk):

    suffix = file_path.split('.')
    if suffix[-1] == 'h5':
        ## ===== case1: NxCxHxW
        data = h5py.File(file_path)
        ms1 = data["ms"][...]  # NxCxHxW=0,1,2,3
        shape_size = len(ms1.shape)
    elif suffix[-1] == 'mat':
        # ===== case2: HxWxC
        data = sio.loadmat(file_path)  #
        print(data.keys())
        ms1 = data["I_MS_LR"][...]  # NxCxHxW=0,1,2,3
        shape_size = len(ms1.shape)
    else:
        print("file format is not supported")
        raise NotImplemented

    if suffix[-2][-2:] == 'FR':
        data['I_GT'] = data['I_MS_LR'] #exception

    if shape_size == 4:  # NxCxHxW

        # tensor type:
        lms1 = data['lms'][...]  # NxCxHxW = 4x8x512x512
        lms1 = np.array(lms1, dtype=np.float32) / 2047.0
        lms = torch.from_numpy(lms1)

        pan1 = data['pan'][...]  # NxCxHxW = 4x8x512x512
        pan1 = np.array(pan1, dtype=np.float32) / 2047.0
        pan = torch.from_numpy(pan1)

        test_I_in1 = np.concatenate([lms1, pan1], axis=1)  # NxCxHxW = Nx9xHxW
        test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge')  # pading
        test_I_in = torch.from_numpy(test_I_in1)  # NxCxHxW = Nx9xHxW

        ms1 = data['ms'][...]  # NxCxHxW = 4x8x512x512
        ms1 = np.array(ms1, dtype=np.float32) / 2047.0
        ms = torch.from_numpy(ms1)

        gt1 = data['gt'][...]  # NxCxHxW = 4x8x512x512
        gt1 = np.array(gt1, dtype=np.float32) / 2047.0
        gt = torch.from_numpy(gt1)

        return test_I_in, ms, pan, gt

    if shape_size == 3:  # HxWxC

        # tensor type:
        lms1 = data['I_MS'][...]  # HxWxC=0,1,2
        lms1 = np.expand_dims(lms1, axis=0)  # 1xHxWxC
        lms1 = np.array(lms1, dtype=np.float32) / 2047.0  # 1xHxWxC
        lms = torch.from_numpy(lms1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        pan1 = data['I_PAN'][...]  # HxW
        pan1 = np.expand_dims(pan1, axis=0)  # 1xHxW
        pan1 = np.expand_dims(pan1, axis=3)  # 1xHxWx1
        pan1 = np.array(pan1, dtype=np.float32) / 2047.  # 1xHxWx1
        pan = torch.from_numpy(pan1).permute(0, 3, 1, 2)  # Nx1xHxW:

        test_I_in1 = np.concatenate([lms1, pan1], axis=3)  # 1xHxWx(C+1) = Nx9xHxW
        test_I_in1 = np.transpose(test_I_in1, (0, 3, 1, 2))  # 1x(C+1)xHxW
        test_I_in1 = np.pad(test_I_in1, ((0, 0), (0, 0), (blk, blk), (blk, blk)), mode='edge')  # NCHW
        test_I_in = torch.from_numpy(test_I_in1)  # NxCxHxW = Nx9xHxW

        ms1 = data['I_MS_LR'][...]  # HxWxC=0,1,2
        ms1 = np.expand_dims(ms1, axis=0)  # 1xHxWxC
        ms1 = np.array(ms1, dtype=np.float32) / 2047.0  # 1xHxWxC
        ms = torch.from_numpy(ms1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        gt1 = data['I_GT'][...]  # HxWxC=0,1,2
        gt1 = np.expand_dims(gt1, axis=0)  # 1xHxWxC
        gt1 = np.array(gt1, dtype=np.float32) / 2047.0  # 1xHxWxC
        gt = torch.from_numpy(gt1).permute(0, 3, 1, 2)  # NxCxHxW  or HxWxC

        return test_I_in, ms, pan, gt


###################################################################
# ------------------- Main Test (Run second) -------------------
###################################################################

## 1) initial test by model ##
blk = 8#4


class Tester():
    def __init__(self, file_path, mode):
        test_I_in, test_ms, test_pan, test_gt = load_set(file_path, blk)
        self.test_I_in = test_I_in
        self.test_ms = test_ms
        self.test_pan = test_pan
        self.test_gt = test_gt
        self.mode = mode
        self.file_path = file_path
        self.loss_clip = L1_Loss_clip(inter=True, clip_flag=True).cuda()
        "the fine tuning phase requires downgraded input resolution"
        if mode == 'ft':
            from wald_utilities import wald_protocol
            ms_lr, pan_lr = wald_protocol(test_ms, test_pan, 4., 'WV4', channels=4)
            self.test_I_in = torch.cat((ms_lr, pan_lr), dim=1)
            self.test_I_in = torch.nn.functional.pad(self.test_I_in, (8, 8, 8, 8), mode='reflect')  # NCHW
            self.test_gt = self.test_ms
            # ...
    def __call__(self, model):
        x = self.test_I_in  # send to cuda, important!
        x = x.cuda().float()  # convert to tensor type:
        out2 = model(x)
        # result_our = out2 + x[:, :-1, blk:-blk, blk:-blk]
        # out2 = model(self.test_I_in)
        # result_our = out2 + self.test_I_in[:, :-1, blk:-blk, blk:-blk]
        # result_our = torch.squeeze(result_our).permute(1, 2, 0)
        # result_our = result_our* 2047
        # our_SAM, our_ERGAS = compute_index(self.test_gt, result_our, 4)
        # print('our_SAM: {} our_ERGAS: {}'.format(our_SAM, our_ERGAS))  # print loss for each epoch

        if self.mode == 'RR' or self.mode == 'test':
            test_in = self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda()
            sr = out2 + test_in  # self.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda()  # NxCxHxW
            sr = sr.permute(0, 2, 3, 1)
            gt = self.test_gt.permute(0, 2, 3, 1).cuda()

            # sr = out2.permute(0, 2, 3, 1)
            # gt = self.test_gt.cuda() - test_in  # NxCxHxW
            # gt = gt.permute(0, 2, 3, 1)
            our_CC, our_PSNR, our_SSIM, our_SAM, our_ERGAS = analysis_accu(gt[0, ...], sr[0, ...], 4)
            print(f'[{self.file_path}]: our_CC: {our_CC}, our_PSNR: {our_PSNR}, '
                  f'our_SSIM: {our_SSIM},\n'
                  f'our_SAM: {our_SAM} our_ERGAS: {our_ERGAS}')
        return out2


## 2) target-adative's fine_tune_training, i.e., PNNplus##

def test(file_path, sensor_model):

    suffix = file_path.split('.')[-2][-2:]
    if suffix== 'RR' or suffix == 'FR':
        simulated = suffix
    else:
        simulated = 'test'

    tester = Tester(file_path, mode='ft')  # call initial model
    evaluator = Tester(file_path, mode=simulated)
    criterion = nn.L1Loss(reduction='mean').cuda()#L1_Loss_clip(clip_flag=True, inter=True).cuda()#
    regularization = loss_with_l2_regularization().cuda()
    " LOAD PRETRAINED MODEL"
    init_loss = 0
    model_path = "../pretrained_models/1WV4_PNNplus_model.pth.tar"
    if os.path.isfile(model_path):
        print("loading model")
        checkpoint = torch.load(model_path)
        # checkpoint = torch.load('./pretrained_models/' + sensor_model)
        print(checkpoint.keys())
        net = checkpoint['model']
        print(net.conv1.weight.data[0, 0, 0, 0])
        net.load_state_dict(checkpoint['model_state'])
        lr_ = 1e-4
        FT_epochs = 50  # number of fine tuning epochs
        # init_loss = checkpoint["loss1"]

    else:
        nr_bands = 4  # selected by user or taken from data?
        lr_ = 0.0001 * 17 * 17 * nr_bands
        FT_epochs = 5000
        net = APNN().cuda()
        net.apply(weights_init)
        print(net.conv1.weight.data[0, 0, 0, 0])

    print(net.conv1.weight.data[0, 0, 0, 0])
    print(net)

    test_gt = tester.test_gt - tester.test_I_in[:, :-1, blk:-blk, blk:-blk]
    pretrain_inIt_loss = criterion(net(tester.test_I_in.cuda()), test_gt.cuda()).item()#, tester.test_I_in[:, :-1, blk:-blk, blk:-blk].cuda()).item()
    print("init loss: {:.20f} pretrain_inIt loss: {:.20f}".format(init_loss, pretrain_inIt_loss))
    eval_test(net, evaluator, mode="eval", mode2="pre")
    print('-' * 100)
    "scaling learning rate on last layer"
    target_layerParam = list(map(id, net.conv3.parameters()))
    base_layerParam = filter(lambda p: id(p) not in target_layerParam, net.parameters())

    training_parameters = [{'params': net.conv3.parameters(), 'lr': lr_ / 10},
                           {'params': base_layerParam}]

    optimizer = optim.SGD(training_parameters, lr=lr_, momentum=0.9)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    v_min = 10000
    ft_loss = np.zeros(FT_epochs)
    Train_time = time()
    print(tester.test_gt.shape)
    ## 2.1) "FINE TUNING"--training
    for epoch in range(FT_epochs):  # loop over the testing image multiple times
        net.train()
        # running_loss = 0.0
        # loading testing image
        test_I_in = tester.test_I_in[:, :-1, blk:-blk, blk:-blk]
        test_I_in = test_I_in.cuda()
        test_gt = tester.test_gt.cuda()

        # residual

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        x1 = tester.test_I_in  # send to cuda, important!
        x2 = test_gt - test_I_in
        x1 = x1.cuda().float()  # convert to tensor type:
        x2 = x2.cuda().float()  # convert to tensor type:

        outputs = net(x1)
        test_I_in.requires_grad = False
        loss = criterion(outputs, x2)  # compute loss
        # loss = criterion(outputs + test_I_in, x2, test_I_in)  # compute loss
        # loss = criterion(outputs + test_I_in, test_gt, 0)  # compute loss

        new_loss = regularization(loss, net, flag=False)

        new_loss.backward()
        optimizer.step()

        running_loss = loss.item()
        ft_loss[epoch] = running_loss


        if running_loss < v_min:
            PATH = '../ft_network/WV4'
            if not os.path.exists(PATH):
                os.makedirs(PATH)
            torch.save(dict(model=net,
                            model_state=net.state_dict(),
                            loss=ft_loss),
                       PATH + '/net.pth.tar')
            # if np.abs(running_loss - v_min) > 1e-3:
            net.eval()
            eval_test(net, evaluator, mode="eval", mode2="ft")
            v_min = running_loss

        print('[%d] loss: %.20f' % (epoch + 1, running_loss))

    Train_time = time() - Train_time

    ## 2.2) "FINE TUNING"--testing
    " LOAD BEST MODEL"
    checkpoint = torch.load('../ft_network/WV4/net.pth.tar')
    net = checkpoint['model']
    net.load_state_dict(checkpoint['model_state'])
    print("-" * 100)
    print("pretrain_InIt_loss {:.20f}".format(pretrain_inIt_loss))
    for loss in checkpoint['loss']:
        if loss > 0:
            print(loss, " ");
    " PANSHARPENING "
    "testing phase requires input at actual testing resolution"

    net.to(device)
    net.eval()
    print(net.conv3.weight.data[0, 0, 0, 0], net.conv2.weight.data[0, 0, 0, 0], net.conv1.weight.data[0, 0, 0, 0])
    eval_test(net, evaluator, mode="eval", mode2="ft")

def eval_test(net, evaluator, mode="pre", mode2="pre"):
    with torch.no_grad():
        Test_time = time()
        sr = evaluator(net)  # NxCxHxW
        Test_time = time() - Test_time

        # skip connection to add low resolution ms and residual(np version)
        sr = sr.cpu().detach().numpy() + evaluator.test_I_in[:, :-1, blk:-blk, blk:-blk].cpu().detach().numpy()  # NxCxHxW

        # convert to numpy type with permute and squeeze: HxWxC (go to cpu for easy saving)
        sr = torch.from_numpy(sr)  # convert to tensor version
        sr = sr.permute(0, 2, 3, 1).cpu().detach().numpy()  # to: NxHxWxC
        sr = np.clip(sr, 0, 1)

        # print('------>  [PNN+]: Fine-tuning (%d it) time = %0.4f  //  Prediction time = %0.4f' % (
        #     FT_epochs, Train_time, Test_time))

        num_exm = sr.shape[0]

        if num_exm == 1:
            if evaluator.mode == "RR":
                key = 'apnn_wv4_rs_alice'
                file_name = "apnn_wv4_rs" + '_alice_' + mode2 + ".mat"
            if evaluator.mode == "FR":
                key = 'apnn_wv4_os_alice'
                file_name = "apnn_wv4_os" + '_alice_' + mode2 + ".mat"


            # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN"
            file_name2 = "../results"
            save_name = os.path.join(file_name2, file_name)
            sio.savemat(save_name, {key: sr[0, :, :, :]})
        else:
            for index in range(num_exm):  # save the DL results to the 03-Comparisons(Matlab)
                file_name = "apnn_wv4_rs" + str(index)+ mode2 + ".mat"
                # file_name2 = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/2_DL_Result/WV4/APNN"
                file_name2 = "../results"
                save_name = os.path.join(file_name2, file_name)
                sio.savemat(save_name, {'apnn_wv4_rs': sr[index, :, :, :]})

###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################
if __name__ == '__main__':
    # file_path = "../test_data/TestData_wv4.h5"
    file_path = "../test_data/imgs/Alice_WV4_RR.mat"
    ##  case2: test on single image with the size of HxWxC
    # file_path = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/1_TestData/Datasets Testing/NY1_WV3_RR.mat"

    # file_path = "E:/01-DL-Pansharpening-Toolbox/03-Comparisons(Matlab)/1_TestData/Datasets Testing/NY1_WV3_FR.mat"

    "SELECT SENSOR AND TESTING IMAGE"
    sensor_model = 'WV4'
    available_models = ['IKONOS', 'GeoEye1', 'WV2', 'WV3', 'WV4', 'QB']
    if sensor_model in available_models:
        sensor_model = sensor_model + '_PNNplus_model.pth.tar'

    test(file_path, sensor_model)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/model_qb.py
================================================
import torch
import torch.nn as nn
import math

class loss_with_l2_regularization(nn.Module):
    def __init__(self):
        super(loss_with_l2_regularization, self).__init__()

    def forward(self, criterion, model, weight_decay=1e-5, flag=True):
        regularizations = []
        for k, v in model.named_parameters():
            if 'conv' in k and 'weight' in k:
                # print(k)
                penality = weight_decay * ((v.data ** 2).sum() / 2)
                regularizations.append(penality)
                if flag:
                    print("{} : {}".format(k, penality))
        # r = torch.sum(regularizations)

        loss = criterion + sum(regularizations)
        return loss

# def weights_init(m):                                               # 1
#     classname = m.__class__.__name__                               # 2
#     if classname.find('Conv') != -1:                               # 3
#         variance_scaling_initializer(m.weight.data)

# -------------Initialization----------------------------------------
def init_weights(*modules):
    for module in modules:
        for m in module.modules():
            if isinstance(m, nn.Conv2d):   ## initialization for Conv2d
                variance_scaling_initializer(m.weight)  # method 1: initialization
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.BatchNorm2d):   ## initialization for BN
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.Linear):     ## initialization for nn.Linear
                # variance_scaling_initializer(m.weight)
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)


class APNN(nn.Module):
    def __init__(self):
        super(APNN, self).__init__()

        channel = 48
        spectral_num = 4
        # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize

        '''
        C.Using deeper network
        Finally, during training, we stabilize the layers’
        inputs by means of batch normalization
        
        '''

        self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=9, stride=1,
                               bias=True)
        self.conv2 = nn.Conv2d(in_channels=channel, out_channels=32, kernel_size=5, stride=1,
                               bias=True)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=spectral_num, kernel_size=5, stride=1,
                               bias=True)

        self.relu = nn.ReLU(inplace=True)

        init_weights(self.conv1, self.conv2, self.conv3)

    def forward(self, x):  # x= lms; y = pan

        #input1 = torch.cat((x, y), 1)  # Bsx9x64x64

        # input1 = self.bn(input1)
        rs = self.relu(self.conv1(x))
        rs = self.relu(self.conv2(rs))
        

        output = self.conv3(rs)

        return output


# ----------------- End-Main-Part ------------------------------------
# QB
def variance_scaling_initializer(tensor):
    from scipy.stats import truncnorm

    def truncated_normal_(tensor, mean=0, std=1):
        with torch.no_grad():
            size = tensor.shape
            tmp = tensor.new_empty(size + (4,)).normal_()
            valid = (tmp < 2) & (tmp > -2)
            ind = valid.max(-1, keepdim=True)[1]
            tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
            tensor.data.mul_(std).add_(mean)
            return tensor

    def variance_scaling(x, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None):
        fan_in, fan_out = torch.nn.init._calculate_fan_in_and_fan_out(x)
        if mode == "fan_in":
            scale /= max(1., fan_in)
        elif mode == "fan_out":
            scale /= max(1., fan_out)
        else:
            scale /= max(1., (fan_in + fan_out) / 2.)
        if distribution == "normal" or distribution == "truncated_normal":
            # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
            stddev = math.sqrt(scale) / .87962566103423978
        # print(fan_in,fan_out,scale,stddev)#100,100,0.01,0.1136
        truncated_normal_(x, 0.0, 0.001)
        return x/10*1.28

    variance_scaling(tensor)

    return tensor


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/model_wv2.py
================================================
import torch
import torch.nn as nn
import math

class loss_with_l2_regularization(nn.Module):
    def __init__(self):
        super(loss_with_l2_regularization, self).__init__()

    def forward(self, criterion, model, weight_decay=1e-5, flag=True):
        regularizations = []
        for k, v in model.named_parameters():
            if 'conv' in k and 'weight' in k:
                # print(k)
                penality = weight_decay * ((v.data ** 2).sum() / 2)
                regularizations.append(penality)
                if flag:
                    print("{} : {}".format(k, penality))
        # r = torch.sum(regularizations)

        loss = criterion + sum(regularizations)
        return loss

def weights_init(m):                                               # 1
    classname = m.__class__.__name__                               # 2
    if classname.find('Conv') != -1:                               # 3
        variance_scaling_initializer(m.weight.data)

# -------------Initialization----------------------------------------
def init_weights(*modules):
    for module in modules:
        for m in module.modules():
            if isinstance(m, nn.Conv2d):   ## initialization for Conv2d
                # try:
                #     import tensorflow as tf
                #     tensor = tf.get_variable(shape=m.weight.shape, initializer=tf.variance_scaling_initializer(seed=1))
                #     m.weight.data = tensor.eval()
                # except:
                #     print("try error, run variance_scaling_initializer")
                # variance_scaling_initializer(m.weight)
                variance_scaling_initializer(m.weight)  # method 1: initialization
                #nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')  # method 2: initialization
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.BatchNorm2d):   ## initialization for BN
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.Linear):     ## initialization for nn.Linear
                # variance_scaling_initializer(m.weight)
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)


class APNN(nn.Module):
    def __init__(self):
        super(APNN, self).__init__()

        channel = 48
        spectral_num = 8
        # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize

        '''
        C.Using deeper network
        Finally, during training, we stabilize the layers’
        inputs by means of batch normalization
        
        '''

        self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=9, stride=1,
                               bias=True)
        self.conv2 = nn.Conv2d(in_channels=channel, out_channels=32, kernel_size=5, stride=1,
                               bias=True)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=spectral_num, kernel_size=5, stride=1,
                               bias=True)

        self.relu = nn.ReLU(inplace=True)

        # init_weights(self.conv1, self.conv2, self.conv3)

    def forward(self, x):  # x= lms; y = pan

        rs = self.relu(self.conv1(x))
        rs = self.relu(self.conv2(rs))
        

        output = self.conv3(rs)

        return output


# ----------------- End-Main-Part ------------------------------------
def variance_scaling_initializer(tensor):
    from scipy.stats import truncnorm

    def truncated_normal_(tensor, mean=0, std=1):
        with torch.no_grad():
            size = tensor.shape
            tmp = tensor.new_empty(size + (4,)).normal_()
            valid = (tmp < 2) & (tmp > -2)
            ind = valid.max(-1, keepdim=True)[1]
            tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
            tensor.data.mul_(std).add_(mean)
            return tensor

    def variance_scaling(x, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None):
        fan_in, fan_out = torch.nn.init._calculate_fan_in_and_fan_out(x)
        if mode == "fan_in":
            scale /= max(1., fan_in)
        elif mode == "fan_out":
            scale /= max(1., fan_out)
        else:
            scale /= max(1., (fan_in + fan_out) / 2.)
        if distribution == "normal" or distribution == "truncated_normal":
            # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
            stddev = math.sqrt(scale) / .87962566103423978
        # print(fan_in,fan_out,scale,stddev)#100,100,0.01,0.1136
        truncated_normal_(x, 0.0, 0.001)
        return x/10*1.28

    variance_scaling(tensor)

    return tensor

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/model_wv3.py
================================================
import torch
import torch.nn as nn
import math
from variance_sacling_initializer import variance_scaling_initializer
class loss_with_l2_regularization(nn.Module):
    def __init__(self):
        super(loss_with_l2_regularization, self).__init__()

    def forward(self, criterion, model, weight_decay=1e-5, flag=True):
        regularizations = []
        for k, v in model.named_parameters():
            if 'conv' in k and 'weight' in k:
                # print(k)
                penality = weight_decay * ((v.data ** 2).sum() / 2)
                regularizations.append(penality)
                if flag:
                    print("{} : {}".format(k, penality))
        # r = torch.sum(regularizations)

        loss = criterion + sum(regularizations)
        return loss

def weights_init(m):                                               # 1
    classname = m.__class__.__name__                               # 2
    if classname.find('Conv') != -1:                               # 3
        variance_scaling_initializer(m.weight.data)

# netG.apply(weights_init)                                           # 8


class APNN(nn.Module):
    def __init__(self):
        super(APNN, self).__init__()

        channel = 48
        spectral_num = 8
        # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize

        '''
        C.Using deeper network
        Finally, during training, we stabilize the layers’
        inputs by means of batch normalization
        
        '''

        self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=9, stride=1,
                               bias=True)
        self.conv2 = nn.Conv2d(in_channels=channel, out_channels=32, kernel_size=5, stride=1,
                               bias=True)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=spectral_num, kernel_size=5, stride=1,
                               bias=True)

        self.relu = nn.ReLU(inplace=True)

        # init_weights(self.conv1, self.conv2, self.conv3)

    def forward(self, x):  # x= lms; y = pan

        #input1 = torch.cat((x, y), 1)  # Bsx9x64x64

        # input1 = self.bn(input1)
        rs = self.relu(self.conv1(x))
        rs = self.relu(self.conv2(rs))
        

        output = self.conv3(rs)

        return output


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/model_wv4.py
================================================
import torch
import torch.nn as nn
import math
class loss_with_l2_regularization(nn.Module):
    def __init__(self):
        super(loss_with_l2_regularization, self).__init__()

    def forward(self, criterion, model, weight_decay=1e-5, flag=True):
        regularizations = []
        for k, v in model.named_parameters():
            if 'conv' in k and 'weight' in k:
                # print(k)
                penality = weight_decay * ((v.data ** 2).sum() / 2)
                regularizations.append(penality)
                if flag:
                    print("{} : {}".format(k, penality))
        # r = torch.sum(regularizations)

        loss = criterion + sum(regularizations)
        return loss


class APNN(nn.Module):
    def __init__(self):
        super(APNN, self).__init__()

        channel = 48
        spectral_num = 4
        # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize

        '''
        C.Using deeper network
        Finally, during training, we stabilize the layers’
        inputs by means of batch normalization
        
        '''

        self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=9, stride=1,
                               bias=True)
        self.conv2 = nn.Conv2d(in_channels=channel, out_channels=32, kernel_size=5, stride=1,
                               bias=True)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=spectral_num, kernel_size=5, stride=1,
                               bias=True)

        self.relu = nn.ReLU(inplace=True)

        # init_weights(self.conv1, self.conv2, self.conv3)

    def forward(self, x):  # x= lms; y = pan

        rs = self.relu(self.conv1(x))
        rs = self.relu(self.conv2(rs))
        

        output = self.conv3(rs)

        return output


# ----------------- End-Main-Part ------------------------------------
def variance_scaling_initializer(tensor):
    from scipy.stats import truncnorm

    def truncated_normal_(tensor, mean=0, std=1):
        with torch.no_grad():
            size = tensor.shape
            tmp = tensor.new_empty(size + (4,)).normal_()
            valid = (tmp < 2) & (tmp > -2)
            ind = valid.max(-1, keepdim=True)[1]
            tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
            tensor.data.mul_(std).add_(mean)
            return tensor

    def variance_scaling(x, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None):
        fan_in, fan_out = torch.nn.init._calculate_fan_in_and_fan_out(x)
        if mode == "fan_in":
            scale /= max(1., fan_in)
        elif mode == "fan_out":
            scale /= max(1., fan_out)
        else:
            scale /= max(1., (fan_in + fan_out) / 2.)
        if distribution == "normal" or distribution == "truncated_normal":
            # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
            stddev = math.sqrt(scale) / .87962566103423978
        # print(fan_in,fan_out,scale,stddev)#100,100,0.01,0.1136
        truncated_normal_(x, 0.0, 0.001)
        return x/10*1.28

    variance_scaling(tensor)

    return tensor


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/variance_sacling_initializer.py
================================================
import torch
import torch.nn as nn
import math


def truncated_normal_(tensor, mean=0.0, std=1.0):
    with torch.no_grad():
        size = tensor.shape
        tmp = tensor.new_empty(size + (4,)).normal_()
        valid = (tmp < 2) & (tmp > -2)
        ind = valid.max(-1, keepdim=True)[1]
        tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
        tensor.data.mul_(std).add_(mean)
        return tensor


def variance_scaling_initializer(tensor):
    from scipy.stats import truncnorm
    def calculate_fan(shape, factor=2.0, mode='FAN_IN', uniform=False):
        # 64 9 3 3 -> 3 3 9 64
        # 64 64 3 3 -> 3 3 64 64
        if shape:
            # fan_in = float(shape[1]) if len(shape) > 1 else float(shape[0])
            # fan_out = float(shape[0])
            fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1])
            fan_out = float(shape[-1])
        else:
            fan_in = 1.0
            fan_out = 1.0
        for dim in shape[:-2]:
            fan_in *= float(dim)
            fan_out *= float(dim)
        if mode == 'FAN_IN':
            # Count only number of input connections.
            n = fan_in
        elif mode == 'FAN_OUT':
            # Count only number of output connections.
            n = fan_out
        elif mode == 'FAN_AVG':
            # Average number of inputs and output connections.
            n = (fan_in + fan_out) / 2.0
        if uniform:
            raise NotImplemented
            # # To get stddev = math.sqrt(factor / n) need to adjust for uniform.
            # limit = math.sqrt(3.0 * factor / n)
            # return random_ops.random_uniform(shape, -limit, limit,
            #                                  dtype, seed=seed)
        else:
            # To get stddev = math.sqrt(factor / n) need to adjust for truncated.
            trunc_stddev = math.sqrt(1.3 * factor / n)
        return fan_in, fan_out, trunc_stddev

    def variance_scaling(x, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None):
        # fan_in, fan_out = torch.nn.init._calculate_fan_in_and_fan_out(x)
        x = x.permute(3, 2, 1, 0)  # .permute(2, 3, 1, 0)
        fan_in, fan_out, trunc_stddev = calculate_fan(x.shape)
        print(trunc_stddev)
        # if mode == "fan_in":
        #     scale /= max(1., fan_in)
        # elif mode == "fan_out":
        #     scale /= max(1., fan_out)
        # else:
        #     scale /= max(1., (fan_in + fan_out) / 2.)
        # if distribution == "normal" or distribution == "truncated_normal":
        #     # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
        #     stddev = math.sqrt(scale) / .87962566103423978
        # print(fan_in,fan_out,scale,stddev)#100,100,0.01,0.1136
        truncated_normal_(x, 0.0, trunc_stddev)  # 0.001)
        x = x.permute(3, 2, 0, 1)
        print(x.min(), x.max())
        return x  # /10*1.28

    variance_scaling(tensor)

    return tensor


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/APNN/wald_utilities.py
================================================
import numpy as np
import torch
import torch.nn as nn
import math
import scipy.ndimage.filters as ft

def fspecial_gauss(size, sigma):
    # Function to mimic the 'fspecial' gaussian MATLAB function
    m, n = [(ss-1.)/2. for ss in size]
    y, x = np.ogrid[-m:m+1, -n:n+1]
    h = np.exp( -(x*x + y*y) / (2.*sigma*sigma) )
    h[ h < np.finfo(h.dtype).eps*h.max() ] = 0
    sumh = h.sum()
    if sumh != 0:
        h /= sumh
    #h = np.round(h, 4)
    return h
def fir_filter_wind(Hd, w):
    """
    compute fir filter with window method
    Hd:     desired freqeuncy response (2D)
    w:      window (2D)
    """
    hd = np.rot90(np.fft.fftshift(np.rot90(Hd, 2)), 2)
    h = np.fft.fftshift(np.fft.ifft2(hd))
    h = np.rot90(h, 2)
    h = h * w
    h = np.clip(h, a_min=0, a_max=np.max(h))
    h = h / np.sum(h)
    return h

def NyquistFilterGenerator(Gnyq, ratio, N):
    assert isinstance(Gnyq, (np.ndarray, list)), 'Error: GNyq must be a list or a ndarray'
    if isinstance(Gnyq, list):
        Gnyq = np.asarray(Gnyq)
    nbands = Gnyq.shape[0]

    kernel = np.zeros((N, N, nbands))  # generic kerenel (for normalization purpose)
    fcut = 1 / np.double(ratio)
    for j in range(nbands):
        alpha = np.sqrt(((N - 1) * (fcut / 2)) ** 2 / (-2 * np.log(Gnyq[j])))
        H = fspecial_gauss((N,N), alpha)
        Hd = H / np.max(H)
        h = np.kaiser(N, 0.5)
        kernel[:, :, j] = np.real(fir_filter_wind(Hd, h))
    #kernel = np.round(kernel, 4)
    return kernel

def MTF(ratio, sensor, N=41):
    if (sensor=='QB'):
        GNyq = np.asarray([0.34, 0.32, 0.30, 0.22]) #Bands Order: B,G,R,NIR
    elif ((sensor=='Ikonos') or (sensor=='IKONOS')):
        GNyq = np.asarray([0.26, 0.28, 0.29, 0.28]) #Bands Order: B,G,R,NIR
    elif (sensor=='GeoEye1') or (sensor == 'WV4'):
        GNyq = np.asarray([0.23, 0.23, 0.23, 0.23]) #Bands Order: B, G, R, NIR
    elif (sensor=='WV2'):
        GNyq = 0.35 * np.ones((1, 7)); GNyq = np.append(GNyq, 0.27)
    elif (sensor=='WV3'):
        GNyq = [0.325, 0.355, 0.360, 0.350, 0.365, 0.360, 0.335, 0.315]


    h = NyquistFilterGenerator(GNyq,ratio, N)
    return h


def MTF_PAN(ratio, sensor, N=41):
    if (sensor=='QB'):
        GNyq = np.array([0.15])
    elif ((sensor=='Ikonos') or (sensor=='IKONOS')):
        GNyq = np.array([0.17])
    elif (sensor=='GeoEye1') or (sensor == 'WV4'):
        GNyq = np.array([0.16])
    elif (sensor=='WV2'):
        GNyq = np.array([0.11])
    elif (sensor=='WV3'):
        GNyq = np.array([0.14])
    else:
        GNyq = np.array([0.15])
    return NyquistFilterGenerator(GNyq, ratio, N)


def interp23tap(img, ratio):

    assert((2**(round(math.log(ratio, 2)))) == ratio), 'Error: Only resize factors power of 2'

    r,c,b = img.shape

    CDF23 = np.asarray([0.5, 0.305334091185, 0, -0.072698593239, 0, 0.021809577942, 0, -0.005192756653, 0, 0.000807762146, 0, -0.000060081482])
    CDF23 = [element * 2 for element in CDF23]
    BaseCoeff = np.expand_dims(np.concatenate([np.flip(CDF23[1:]), CDF23]), axis=-1)


    for z in range(int(ratio/2)):

        I1LRU = np.zeros(((2 ** (z+1)) * r, (2 ** (z+1)) * c, b))

        if z == 0:
            I1LRU[1::2, 1::2,:] = img
        else:
            I1LRU [::2,::2,:] = img

        for i in range(b):
            temp = ft.convolve(np.transpose(I1LRU[:,:,i]), BaseCoeff, mode='wrap')
            I1LRU[:, :, i] = ft.convolve(np.transpose(temp), BaseCoeff, mode='wrap')

        img = I1LRU

    return img

def interp23tap_GPU(img, ratio):

    assert((2**(round(math.log(ratio, 2)))) == ratio), 'Error: Only resize factors power of 2'

    r,c,b = img.shape

    CDF23 = np.asarray([0.5, 0.305334091185, 0, -0.072698593239, 0, 0.021809577942, 0, -0.005192756653, 0, 0.000807762146, 0, -0.000060081482])
    CDF23 = [element * 2 for element in CDF23]
    BaseCoeff = np.expand_dims(np.concatenate([np.flip(CDF23[1:]), CDF23]), axis=-1)
    BaseCoeff = np.expand_dims(BaseCoeff, axis=(0,1))
    BaseCoeff = np.concatenate([BaseCoeff]*b, axis=0)


    BaseCoeff = torch.from_numpy(BaseCoeff)
    img = img.astype(np.float32)
    img = np.moveaxis(img, -1, 0)


    for z in range(int(ratio/2)):

        I1LRU = np.zeros((b, (2 ** (z+1)) * r, (2 ** (z+1)) * c))

        if z == 0:
            I1LRU[:,1::2, 1::2] = img
        else:
            I1LRU [:,::2,::2] = img

        I1LRU = np.expand_dims(I1LRU, axis=0)
        conv = nn.Conv2d(in_channels=b, out_channels=b, padding=(11,0),
                            kernel_size=BaseCoeff.shape, groups=b, bias=False, padding_mode='circular')

        conv.weight.data = BaseCoeff
        conv.weight.requires_grad = False

        t = conv(torch.transpose(torch.from_numpy(I1LRU), 2, 3))
        img = conv(torch.transpose(t, 2,3)).numpy()
        img = np.squeeze(img)

    img = np.moveaxis(img, 0,-1)


    return img

def wald_protocol(ms,pan,ratio,sensor, channels=8):
    
    mtf_kernel = MTF(ratio, sensor)


    MTF_kern = np.moveaxis(mtf_kernel, -1, 0)
    MTF_kern = np.expand_dims(MTF_kern, axis = 1)
    MTF_kern = torch.from_numpy(MTF_kern).type(torch.float32)

    # DepthWise-Conv2d definition
    depthconv = nn.Conv2d(in_channels=channels,
                               out_channels=channels,
                               kernel_size=MTF_kern.shape,
                               groups=channels,
                               padding=20,
                               padding_mode='replicate',
                               bias=False)
    
    depthconv.weight.data = MTF_kern
    depthconv.weight.requires_grad = False

    ms_down = depthconv(ms)
    ms_wald_ = nn.functional.interpolate(ms_down, scale_factor=0.25, mode='bicubic')
    ms_lr = torch.zeros(ms.shape)
    for i in range(ms_wald_.shape[0]):
        temp = np.copy(np.asarray(torch.squeeze(torch.squeeze(ms_wald_[i,:,:,:]).permute((1,2,0))).detach().cpu()))
        ms_lr[i, :, :, :]= torch.from_numpy(interp23tap_GPU(temp,ratio)).permute((2,0,1))
    pan_lr = nn.functional.interpolate(pan, scale_factor=0.25, mode='bicubic')

    
    return ms_lr, pan_lr


def wald_protocol_v2(ms, pan, ratio, sensor, channels=8):

    def genMTF_MS():
        mtf_kernel = MTF(ratio, sensor)

        MTF_kern = np.moveaxis(mtf_kernel, -1, 0)
        MTF_kern = np.expand_dims(MTF_kern, axis=1)
        MTF_kern = torch.from_numpy(MTF_kern).type(torch.float32)

        # DepthWise-Conv2d definition
        depthconv = nn.Conv2d(in_channels=channels,
                              out_channels=channels,
                              kernel_size=MTF_kern.shape,
                              groups=channels,
                              padding=20,
                              padding_mode='replicate',
                              bias=False)

        depthconv.weight.data = MTF_kern
        depthconv.weight.requires_grad = False

        ms_down = depthconv(ms)
        ms_wald_ = nn.functional.interpolate(ms_down, scale_factor=0.25, mode='bicubic')
        ms_lr = torch.zeros(ms.shape)
        for i in range(ms_wald_.shape[0]):
            temp = np.copy(np.asarray(torch.squeeze(torch.squeeze(ms_wald_[i, :, :, :]).permute((1, 2, 0))).detach().cpu()))
            ms_lr[i, :, :, :] = torch.from_numpy(interp23tap_GPU(temp, ratio)).permute((2, 0, 1))
        return ms_lr

    def genMTF_PAN():
        channels = 1
        mtf_kernel = MTF_PAN(ratio, sensor)

        MTF_kern = np.moveaxis(mtf_kernel, -1, 0)
        MTF_kern = np.expand_dims(MTF_kern, axis=1)
        MTF_kern = torch.from_numpy(MTF_kern).type(torch.float32)

        # DepthWise-Conv2d definition
        depthconv = nn.Conv2d(in_channels=channels,
                              out_channels=channels,
                              kernel_size=MTF_kern.shape,
                              groups=channels,
                              padding=20,
                              padding_mode='replicate',
                              bias=False)

        depthconv.weight.data = MTF_kern
        depthconv.weight.requires_grad = False

        pan_down = depthconv(pan)
        pan_lr = nn.functional.interpolate(pan_down, scale_factor=0.25, mode='bicubic')

        return pan_lr

    return genMTF_PAN()#ms_lr, pan_lr

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/BDPN/bdpn_main.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, Ran Ran, LiangJian Deng
# @reference:
import torch
import torch.nn as nn
import torch.optim as optim
from .model_bdpn import BDPN

class SetCriterion(nn.Module):
    """ This class computes the loss for DETR.
    The process happens in two steps:
        1) we compute hungarian assignment between ground truth boxes and the outputs of the model
        2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
    """

    def __init__(self, losses, weight_dict):
        """ Create the criterion.
        Parameters:
            num_classes: n able to compute a matching between targets and proposals
            weight_dict: dict containing as key the names of the losses and as values their relative weight.
            eos_coef: relatiumber of object categories, omitting the special no-object category
            matcher: moduleve classification weight applied to the no-object category
            losses: list of all the losses to be applied. See get_loss for list of available losses.
        """
        super().__init__()
        self.weight_dict = weight_dict
        self.losses = losses
        self.loss_dicts = {}

    def forward(self, outputs, targets, *args, **kwargs):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        # Compute all the requested losses

        for k in self.losses.keys():
            # k, loss = loss_dict
            if k == 'Loss':
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets)})
            else:
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets, *args)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets, *args))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets, *args)})

        return self.loss_dicts

from UDL.pansharpening.models import PanSharpeningModel
class build_bdpn(PanSharpeningModel, name='BDPN'):
    def __call__(self, cfg):

        # important for Pansharpening models, which are from tensorflow code
        self.reg = cfg.reg

        scheduler = None

        if any(["wv" in v for v in cfg.dataset.values()]):
            spectral_num = 8
        else:
            spectral_num = 4

        loss = nn.MSELoss(size_average=True).cuda()  ## Define the Loss function
        weight_dict = {'loss': 1}
        losses = {'loss': loss}
        criterion = SetCriterion(losses, weight_dict)
        model = BDPN(spectral_num, criterion).cuda()
        optimizer = optim.Adam(model.parameters(), lr=cfg.lr, betas=(0.9, 0.999), weight_decay=1e-5)  ## optimizer 1: Adam
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=100,
                                                       gamma=0.8)  # lr = lr* gamma for each step_size = 180

        return model, criterion, optimizer, scheduler


###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/BDPN/loss_utils.py
================================================
import torch
import math
import numpy as np
import torch.nn.functional as F
from pytorch_msssim import ssim, ms_ssim, SSIM, MS_SSIM

# X: (N,3,H,W) a batch of RGB images with values ranging from 0 to 255.
# Y: (N,3,H,W)  ssim_val=ssim(X,Y,data_range=255,size_average=False)
# return (N,) ms_ssim_val=ms_ssim(X,Y,data_range=255,size_average=False)
# #(N,)# or set 'size_average=True' to get a scalar value as loss.ssim_loss=ssim(X,Y,data_range=255,size_average=True)
# return a scalar valuems_ssim_loss=ms_ssim(X,Y,data_range=255,size_average=True)
# or reuse windows with SSIM & MS_SSIM. ssim_module=SSIM(win_size=11,win_sigma=1.5,data_range=255,size_average=True,channel=3)
# ms_ssim_module=MS_SSIM(win_size=11,win_sigma=1.5,data_range=255,size_average=True,channel=3)
# ssim_loss=ssim_module(X,Y)ms_ssim_loss=ms_ssim_module(X,Y)


# def compute_charbonnier_loss(tensor1, tensor2, is_mean=True):
#     epsilon = 1e-6
#     if is_mean:
#         loss = tf.reduce_mean(tf.reduce_mean(tf.sqrt(tf.square(tf.subtract(tensor1,tensor2))+epsilon), [1, 2, 3]))
#     else:
#         loss = tf.reduce_mean(tf.reduce_sum(tf.sqrt(tf.square(tf.subtract(tensor1,tensor2))+epsilon), [1, 2, 3]))
#
#     return loss

def compute_charbonnier_loss(tensor1, tensor2, is_mean=True):
    epsilon = 1e-6
    if is_mean:
        loss = torch.mean(torch.mean(torch.sqrt(torch.square(torch.sub(tensor1, tensor2))+epsilon), [2, 3, 1]))
    else:
        loss = torch.mean(torch.sum(torch.sqrt(torch.square(torch.sub(tensor1, tensor2))+epsilon), [2, 3, 1]))
    return loss


# def compute_ergas_loss(tensor1, tensor2):
#     epsilon = 1e-8
#     rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tensor1,tensor2)),[1,2])+epsilon)
#     mean = tf.reduce_mean(tensor2, [1, 2])
#     mean = tf.exp(mean)
#     loss = tf.sqrt(tf.reduce_mean(tf.square(tf.divide(rmse,mean)))+epsilon)
#     return loss

def compute_ergas_loss(tensor1, tensor2):
    epsilon = 1e-8
    rmse = torch.sqrt(torch.mean(torch.square(torch.subtract(tensor1, tensor2)), [2, 3])+epsilon)
    mean = torch.mean(tensor2, [2, 3])
    mean = torch.exp(mean)
    loss = torch.sqrt(torch.mean(torch.square(torch.divide(rmse, mean)))+epsilon)
    return loss

# def compute_spetral_shift_loss(tensor1, tensor2):
#     epsilon = 1e-8
#     size = (int(int(tensor1.get_shape()[1])/4), int(int(tensor1.get_shape()[2])/4))
#     tensor_lr1 = tf.image.resize_images(tensor1, size)
#     tensor_lr2 = tf.image.resize_images(tensor2, size)
#     loss = compute_ergas_loss(tensor_lr1, tensor_lr2)
#     #tf.reduce_mean(tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tensor_lr1,tensor_lr2)),[1,2])+epsilon))
#     return loss

def compute_spetral_shift_loss(tensor1, tensor2):
    epsilon = 1e-8
    size = (int(int(tensor1.get_shape()[2])/4), int(int(tensor1.get_shape()[3])/4))
    tensor_lr1 = F.interpolate(tensor1, size)
    tensor_lr2 = F.interpolate(tensor2, size)
    loss = compute_ergas_loss(tensor_lr1, tensor_lr2)
    #tf.reduce_mean(tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tensor_lr1,tensor_lr2)),[1,2])+epsilon))
    return loss

# def compute_ssim_loss(tensor1, tensor2):
#     ssim = tf.image.ssim_multiscale(tensor1, tensor2, np.float32(2.0))
#     loss = 1 - tf.reduce_mean(ssim)
#     return loss

def compute_ssim_loss(tensor1, tensor2, channel = 8):
    ssim = MS_SSIM(win_size=11, win_sigma=1.5, data_range=1, size_average=True, channel=channel)
    loss = 1 - torch.mean(ssim)
    return loss


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/BDPN/main_train_wv3.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, Ran Ran, LiangJian Deng
# @reference:
import os
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from data_wv3 import Dataset_Pro
from model_wv3 import BDPN
from torchstat import stat
import numpy as np
from tensorboardX import SummaryWriter
import shutil
from loss_utils import compute_charbonnier_loss, compute_ergas_loss

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

###################################################################
# ------------------- Pre-Define Part----------------------
###################################################################
# ============= 1) Pre-Define =================== #
SEED = 10
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
# cudnn.benchmark = True  ###自动寻找最优算法
cudnn.deterministic = True

# ============= 2) HYPER PARAMS(Pre-Defined) ==========#
lr = 0.0001
epochs = 1000
ckpt = 50
batch_size = 8
lambda_v = 1.0
lambda_init = 0.05
lambda_declay = 5
model_path = "Weights/wv3/.pth"

# ============= 3) Load Model + Loss + Optimizer + Learn_rate_update ==========#
model = BDPN().cuda()
if os.path.isfile(model_path):
    model.load_state_dict(torch.load(model_path))   ## Load the pretrained Encoder
    print('PANnet is Successfully Loaded from %s' % (model_path))

stat(model, input_size=[(8, 16, 16), (1, 64, 64)])
#criterion = nn.MSELoss(size_average=True).cuda()

#optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-7)  # optimizer 2
optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=1e-5)   # optimizer 1
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=100,
                                               gamma=0.8)  # lr = lr* gamma for every step_size(epochs) = 180

# ============= 4) Tensorboard_show + Save_model ==========#
#if os.path.exists('train_logs'):  # for tensorboard: copy dir of train_logs  ## Tensorboard_show: case 1
#   shutil.rmtree('train_logs')  # ---> console (see tensorboard): tensorboard --logdir = dir of train_logs

writer = SummaryWriter('./train_logs')    ## Tensorboard_show: case 2

def save_checkpoint(model, epoch):  # save model function
    model_out_path = 'Weights' + '/' + "{}.pth".format(epoch)
    torch.save(model.state_dict(), model_out_path)

###################################################################
# ------------------- Main Train (Run second)----------------------
###################################################################
def train(training_data_loader, validate_data_loader,start_epoch=0):
    global lambda_v
    print('Start training...')

    for epoch in range(start_epoch, epochs, 1):

        epoch += 1
        epoch_train_loss, epoch_val_loss = [], []

        if epoch <= 100:
            lambda_v = 1.0 - lambda_init*(epoch//lambda_declay)  # decrease lambda_v for every lambda_declay epochs

        # ============Epoch Train=============== #
        model.train()

        for iteration, batch in enumerate(training_data_loader, 1):
            gt, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda()

            optimizer.zero_grad()  # fixed

            sr, sr_down = model(ms, pan)  # call model: sr=4x64x64; sr_down=4x32x32

            gt_down = F.interpolate(gt, scale_factor=0.5, mode='nearest')   # nearest down 2
            loss1 = compute_charbonnier_loss(sr_down, gt_down)  # compute loss1; orig: loss = criterion(sr, gt)
            loss2 = compute_charbonnier_loss(sr, gt)  # compute loss2

            loss = lambda_v*loss1 + (1.0 - lambda_v)*loss2   # total loss:
            epoch_train_loss.append(loss.item())  # save all losses into a vector for one epoch

            loss.backward()  # fixed
            optimizer.step()  # fixed

            # for name, layer in model.named_parameters():
                # writer.add_histogram('torch/'+name + '_grad_weight_decay', layer.grad, epoch*iteration)
                # writer.add_histogram('net/'+name + '_data_weight_decay', layer, epoch*iteration)

        lr_scheduler.step()  # if update_lr, activate here!

        t_loss = np.nanmean(np.array(epoch_train_loss))  # compute the mean value of all losses, as one epoch loss
        writer.add_scalar('mse_loss/t_loss', t_loss, epoch)  # write to tensorboard to check
        print('Epoch: {}/{} training loss (lr={}, lam_v={}): {:.7f}'.format(epochs, epoch, lr_scheduler.get_last_lr(), lambda_v, t_loss))  # print loss for each epoch

        if epoch % ckpt == 0:  # if each ckpt epochs, then start to save model
            save_checkpoint(model, epoch)

        # ============Epoch Validate=============== #
        model.eval()
        with torch.no_grad():
            for iteration, batch in enumerate(validate_data_loader, 1):
                gt, ms, pan = batch[0].cuda(), batch[1].cuda(), batch[2].cuda()

                sr, sr_down = model(ms, pan)  # call model

                gt_down = F.interpolate(gt, scale_factor=0.5, mode='nearest')  # nearest down 2
                loss1 = compute_charbonnier_loss(sr_down, gt_down)  # compute loss1; orig: loss = criterion(sr, gt)
                loss2 = compute_charbonnier_loss(sr, gt)  # compute loss2

                loss = lambda_v * loss1 + (1.0 - lambda_v) * loss2

                epoch_val_loss.append(loss.item())

        if epoch % 10 == 0:
            v_loss = np.nanmean(np.array(epoch_val_loss))
            writer.add_scalar('val/v_loss', v_loss, epoch)
            print('             validate loss: {:.7f}'.format(v_loss))

    writer.close()  # close tensorboard

###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################
if __name__ == "__main__":
    train_set = Dataset_Pro('./training_data/train_wv3_10000.h5')  # creat data for training
    training_data_loader = DataLoader(dataset=train_set, num_workers=0, batch_size=batch_size, shuffle=True,
                                      pin_memory=True, drop_last=True)  # put training data to DataLoader for batches

    validate_set = Dataset_Pro('./training_data/valid_wv3_10000.h5')  # creat data for validation
    validate_data_loader = DataLoader(dataset=validate_set, num_workers=0, batch_size=batch_size, shuffle=False,
                                      pin_memory=True, drop_last=True)  # put training data to DataLoader for batches

    train(training_data_loader, validate_data_loader)  # call train function (call: Line 53)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/BDPN/model_bdpn.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, Ran Ran, LiangJian Deng
# @reference:

import torch
import torch.nn as nn
import numpy as np
import math
import torch.nn.init as int
import sys

# print(sys.path)
import torch
import torch.nn as nn
import math
from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer

# -------------Initialization----------------------------------------
def init_weights(*modules):
    for module in modules:
        for m in module.modules():
            if isinstance(m, nn.Conv2d):   ## initialization for Conv2d
                # try:
                #     import tensorflow as tf
                #     tensor = tf.get_variable(shape=m.weight.shape, initializer=tf.variance_scaling_initializer(seed=1))
                #     m.weight.data = tensor.eval()
                # except:
                #     print("try error, run variance_scaling_initializer")
                # variance_scaling_initializer(m.weight)
                variance_scaling_initializer(m.weight)  # method 1: initialization
                #nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')  # method 2: initialization
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.BatchNorm2d):   ## initialization for BN
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.Linear):     ## initialization for nn.Linear
                # variance_scaling_initializer(m.weight)
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)

# ----------------------------------------------------
class Resblock(nn.Module):
    def __init__(self):
        super(Resblock, self).__init__()

        channel = 64
        self.conv20 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1,
                               bias=True)
        self.conv21 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1,
                               bias=True)
        self.prelu = nn.PReLU(num_parameters = 1, init = 0.2)

    def forward(self, x):
        rs1 = self.prelu(self.conv20(x))  # Bsx32x64x64
        rs1 = self.conv21(rs1)  # Bsx32x64x64
        rs = torch.add(x, rs1)  # Bsx32x64x64

        return rs

# -----------------------------------------------------
class BDPN(nn.Module):
    def __init__(self, spectral_num, criterion, channel=64):
        super(BDPN, self).__init__()

        channel1 = channel
        spectral_num = spectral_num
        channel2 = 4*spectral_num
        self.criterion = criterion
        # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize
        # Conv2d: padding = kernel_size//2
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=channel1, kernel_size=3, stride=1, padding=1,
                               bias=True)
        #self.conv2 = nn.Conv2d(in_channels=channel1, out_channels=channel1, kernel_size=3, stride=1, padding=1,
        #                       bias=True)
        self.res1 = Resblock()
        self.res2 = Resblock()
        self.res3 = Resblock()
        self.res4 = Resblock()
        self.res5 = Resblock()
        self.res6 = Resblock()
        self.res7 = Resblock()
        self.res8 = Resblock()
        self.res9 = Resblock()
        self.res10 = Resblock()


        self.rres1 = Resblock()
        self.rres2 = Resblock()
        self.rres3 = Resblock()
        self.rres4 = Resblock()
        self.rres5 = Resblock()
        self.rres6 = Resblock()
        self.rres7 = Resblock()
        self.rres8 = Resblock()
        self.rres9 = Resblock()
        self.rres10 = Resblock()


        self.conv3 = nn.Conv2d(in_channels=channel1, out_channels=spectral_num, kernel_size=3, stride=1, padding=1,
                               bias=True)
        self.conv4 = nn.Conv2d(in_channels=spectral_num, out_channels=channel2, kernel_size=3, stride=1, padding=1,
                               bias=True)
        self.conv5 = nn.Conv2d(in_channels=spectral_num, out_channels=channel2, kernel_size=3, stride=1, padding=1,
                               bias=True)

        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.pixshuf = nn.PixelShuffle(upscale_factor=2)  # out = ps(img)
        self.prelu = nn.PReLU(num_parameters = 1, init = 0.2)


        self.backbone = nn.Sequential(  # method 2: 4 resnet repeated blocks
            self.res1,
            self.res2,
            self.res3,
            self.res4,
            self.res5,
            self.res6,
            self.res7,
            self.res8,
            self.res9,
            self.res10
        )

        self.backbone2 = nn.Sequential(  # method 2: 4 resnet repeated blocks
            self.rres1,
            self.rres2,
            self.rres3,
            self.rres4,
            self.rres5,
            self.rres6,
            self.rres7,
            self.rres8,
            self.rres9,
            self.rres10
        )


        init_weights(self.backbone, self.backbone2, self.conv1, self.conv3, self.conv4, self.conv5, self.maxpool, self.pixshuf)   # state initialization, important!


    def forward(self, x, y):  # x= ms(Nx8x16x16); y = pan(Nx1x64x64)

        # ========A): pan feature (extraction)===========
        # --------pan feature (stage 1:)------------
        pan_feature = self.conv1(y)  # Nx64x64x64
        rs = pan_feature  # Nx64x64x64

        rs = self.backbone(rs)  # Nx64x64x64

        pan_feature1 = torch.add(pan_feature, rs)  # Bsx64x64x64
        pan_feature_level1 = self.conv3(pan_feature1)  # Bsx8x64x64
        pan_feature1_out = self.maxpool(pan_feature1)  # Bsx64x32x32

        # --------pan feature (stage 2:)------------
        rs = pan_feature1_out  # Bsx64x32x32

        rs = self.backbone2(rs)  # Nx64x32x32, ????

        pan_feature2 = torch.add(pan_feature1_out, rs)  # Bsx64x32x32
        pan_feature_level2 = self.conv3(pan_feature2)  # Bsx8x32x32

        # ========B): ms feature (extraction)===========
        # --------ms feature (stage 1:)------------
        ms_feature1 = self.conv4(x)  # x= ms(Nx8x16x16); ms_feature1 =Nx32x16x16
        ms_feature_up1 = self.pixshuf(ms_feature1)  # Nx8x32x32
        ms_feature_level1 = torch.add(pan_feature_level2, ms_feature_up1)  # Nx8x32x32

        # --------ms feature (stage 2:)------------
        ms_feature2 = self.conv5(ms_feature_level1)  # Nx32x32x32
        ms_feature_up2 = self.pixshuf(ms_feature2)  # Nx8x64x64
        output = torch.add(pan_feature_level1, ms_feature_up2)  # Nx8x64x64

        return output, ms_feature_level1

    def train_step(self, data, *args, **kwargs):
        log_vars = {}
        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                                data['ms'].cuda(), data['pan'].cuda()
        sr, _ = self(ms, pan)

        loss = self.criterion(sr, gt, *args, **kwargs)

        # return sr, loss
        log_vars.update(loss=loss['loss'])
        return {'loss': loss['loss'], 'log_vars': log_vars}

    def val_step(self, data, *args, **kwargs):

        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                                data['ms'].cuda(), data['pan'].cuda()
        sr, _ = self(ms, pan)

        return sr, gt


if __name__ == '__main__':
    lms = torch.randn([1, 8, 64, 64])
    pan = torch.randn([1, 1, 64, 64])
    ms = torch.randn([1, 8, 16, 16])
    model = BDPN(8, None)
    x,_ = model(ms, pan)
    print(x.shape)

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/DRPNN/drpnn_main.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, Ran Ran, LiangJian Deng
# @reference:
import torch
import torch.nn as nn
import torch.optim as optim
from .model_drpnn import DRPNN

class SetCriterion(nn.Module):
    """ This class computes the loss for DETR.
    The process happens in two steps:
        1) we compute hungarian assignment between ground truth boxes and the outputs of the model
        2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
    """

    def __init__(self, losses, weight_dict):
        """ Create the criterion.
        Parameters:
            num_classes: n able to compute a matching between targets and proposals
            weight_dict: dict containing as key the names of the losses and as values their relative weight.
            eos_coef: relatiumber of object categories, omitting the special no-object category
            matcher: moduleve classification weight applied to the no-object category
            losses: list of all the losses to be applied. See get_loss for list of available losses.
        """
        super().__init__()
        self.weight_dict = weight_dict
        self.losses = losses
        self.loss_dicts = {}

    def forward(self, outputs, targets, *args, **kwargs):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        # Compute all the requested losses

        for k in self.losses.keys():
            # k, loss = loss_dict
            if k == 'Loss':
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets)})
            else:
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets, *args)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets, *args))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets, *args)})

        return self.loss_dicts

from UDL.pansharpening.models import PanSharpeningModel
class build_drpnn(PanSharpeningModel, name='DRPNN'):
    def __call__(self, cfg):

        # important for Pansharpening models, which are from tensorflow code
        self.reg = cfg.reg

        scheduler = None

        if any(["wv" in v for v in cfg.dataset.values()]):
            spectral_num = 8
        else:
            spectral_num = 4
        loss = nn.MSELoss(size_average=True).cuda()  ## Define the Loss function
        weight_dict = {'loss': 1}
        losses = {'loss': loss}
        criterion = SetCriterion(losses, weight_dict)
        model = DRPNN(spectral_num, criterion).cuda()
        optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=0)  ## optimizer 1: Adam
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=30,
                                                       gamma=0.5)  # lr = lr* gamma for each step_size = 180

        return model, criterion, optimizer, scheduler


###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/DRPNN/model_drpnn.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, Ran Ran, LiangJian Deng
# @reference:
import torch
import torch.nn as nn
import numpy as np
import math
import torch.nn.init as int

import torch
import torch.nn as nn
import math
from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer
from UDL.pansharpening.models import PanSharpeningModel

# -------------Initialization----------------------------------------

class Repeatblock(nn.Module):
    def __init__(self):
        super(Repeatblock, self).__init__()

        channel = 32  # input_channel =
        self.conv2 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=7, stride=1, padding=3,
                               bias=True)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        rs = self.relu(self.conv2(x))

        return rs

class DRPNN(nn.Module):
    def __init__(self, spectral_num, criterion, channel=32):
        super(DRPNN, self).__init__()

        self.criterion = criterion
        # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize
        self.conv1 = nn.Conv2d(in_channels=spectral_num+1, out_channels=channel, kernel_size=7, stride=1, padding=3,
                                  bias=True)

        self.conv2 = nn.Conv2d(in_channels=channel, out_channels=spectral_num+1, kernel_size=7, stride=1, padding=3,
                                  bias=True)
        self.conv3 = nn.Conv2d(in_channels=spectral_num+1, out_channels=spectral_num, kernel_size=7, stride=1, padding=3,
                                  bias=True)
        self.relu = nn.ReLU(inplace=True)

        self.backbone = nn.Sequential(  # method 2: 4 resnet repeated blocks
            Repeatblock(),
            Repeatblock(),
            Repeatblock(),
            Repeatblock(),
            Repeatblock(),
            Repeatblock(),
            Repeatblock(),
            Repeatblock(),
        )

    def forward(self, x, y):  # x= lms; y = pan

        input = torch.cat([x, y], 1)  # Bsx9x64x64
        rs = self.relu(self.conv1(input))  # Bsx64x64x64

        rs = self.backbone(rs)  # backbone!  Bsx64x64x64

        out_res = self.conv2(rs)  # Bsx9x64x64
        output1 = torch.add(input, out_res)  # Bsx9x64x64
        output  = self.conv3(output1)  # Bsx8x64x64

        return output

    def train_step(self, data, *args, **kwargs):
        log_vars = {}
        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                                data['ms'].cuda(), data['pan'].cuda()
        sr = self(lms, pan)

        loss = self.criterion(sr, gt, *args, **kwargs)

        # return sr, loss
        log_vars.update(loss=loss['loss'])
        return {'loss': loss['loss'], 'log_vars': log_vars}

    def val_step(self, data, *args, **kwargs):

        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                                data['ms'].cuda(), data['pan'].cuda()
        sr = self(lms, pan)

        return sr, gt

# ----------------- End-Main-Part ------------------------------------


if __name__ == '__main__':
    lms = torch.randn([1, 8, 64, 64])
    pan = torch.randn([1, 8, 64, 64])
    model = DRPNN(8, None)
    x = model(lms, pan)
    print(x.shape)

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/DiCNN/dicnn_main.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import torch
import torch.nn as nn
import torch.optim as optim
from .model_dicnn import DiCNN

class SetCriterion(nn.Module):
    """ This class computes the loss for DETR.
    The process happens in two steps:
        1) we compute hungarian assignment between ground truth boxes and the outputs of the model
        2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
    """

    def __init__(self, losses, weight_dict):
        """ Create the criterion.
        Parameters:
            num_classes: n able to compute a matching between targets and proposals
            weight_dict: dict containing as key the names of the losses and as values their relative weight.
            eos_coef: relatiumber of object categories, omitting the special no-object category
            matcher: moduleve classification weight applied to the no-object category
            losses: list of all the losses to be applied. See get_loss for list of available losses.
        """
        super().__init__()
        self.weight_dict = weight_dict
        self.losses = losses
        self.loss_dicts = {}

    def forward(self, outputs, targets, *args, **kwargs):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        # Compute all the requested losses

        for k in self.losses.keys():
            # k, loss = loss_dict
            if k == 'Loss':
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets)})
            else:
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets, *args)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets, *args))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets, *args)})

        return self.loss_dicts

from UDL.pansharpening.models import PanSharpeningModel
class build_dicnn(PanSharpeningModel, name='DiCNN1'):
    def __call__(self, cfg):

        # important for Pansharpening models, which are from tensorflow code
        self.reg = cfg.reg

        scheduler = None

        if any(["wv" in v for v in cfg.dataset.values()]):
            spectral_num = 8
        else:
            spectral_num = 4
        loss = nn.MSELoss(size_average=True).cuda()  ## Define the Loss function
        weight_dict = {'loss': 1}
        losses = {'loss': loss}
        criterion = SetCriterion(losses, weight_dict)
        model = DiCNN(spectral_num, criterion).cuda()
        optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=0)  ## optimizer 1: Adam
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=1500,
                                                       gamma=0.5)  # lr = lr* gamma for each step_size = 180

        return model, criterion, optimizer, scheduler


###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/DiCNN/model_dicnn.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import torch
import torch.nn as nn
import math
from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer
from UDL.pansharpening.models import PanSharpeningModel

# -------------Initialization----------------------------------------
def init_weights(*modules):
    for module in modules:
        for m in module.modules():
            if isinstance(m, nn.Conv2d):
                print("nn.Conv2D is initialized by variance_scaling_initializer")
                variance_scaling_initializer(m.weight)

                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)

class DiCNN(nn.Module):
    def __init__(self, spectral_num, criterion, channel=64, reg=True):
        super(DiCNN, self).__init__()
        self.criterion = criterion
        self.reg = reg
        # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize
        self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=3, stride=1, padding=1,
                               bias=True)
        self.conv2 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1,
                               bias=True)
        self.conv3 = nn.Conv2d(in_channels=channel, out_channels=spectral_num, kernel_size=3, stride=1, padding=1,
                               bias=True)

        self.relu = nn.ReLU(inplace=True)

        self.apply(init_weights)

    def forward(self, x, y):
        # x= lms; y = pan
        input1 = torch.cat([x, y], 1)  # Bsx9x64x64

        rs = self.relu(self.conv1(input1))
        rs = self.relu(self.conv2(rs))
        out = self.conv3(rs)
        output = x + out

        return output

    def train_step(self, data, *args, **kwargs):
        log_vars = {}
        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                                data['ms'].cuda(), data['pan'].cuda()
        sr = self(lms, pan)

        loss = self.criterion(sr, gt, *args, **kwargs)

        # return sr, loss
        log_vars.update(loss=loss['loss'])
        return {'loss': loss['loss'], 'log_vars': log_vars}

    def val_step(self, data, *args, **kwargs):

        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                                data['ms'].cuda(), data['pan'].cuda()
        sr = self(lms, pan)

        return sr, gt


if __name__ == '__main__':
    lms = torch.randn([1, 8, 64, 64])
    pan = torch.randn([1, 8, 64, 64])
    model = DiCNN(8, None)
    x = model(lms, pan)
    print(x.shape)

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/FusionNet/fusionnet_main.py
================================================
import os
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from .model_fusionnet import FusionNet
import numpy as np


class SetCriterion(nn.Module):
    """ This class computes the loss for DETR.
    The process happens in two steps:
        1) we compute hungarian assignment between ground truth boxes and the outputs of the model
        2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
    """

    def __init__(self, losses, weight_dict):
        """ Create the criterion.
        Parameters:
            num_classes: n able to compute a matching between targets and proposals
            weight_dict: dict containing as key the names of the losses and as values their relative weight.
            eos_coef: relatiumber of object categories, omitting the special no-object category
            matcher: moduleve classification weight applied to the no-object category
            losses: list of all the losses to be applied. See get_loss for list of available losses.
        """
        super().__init__()
        self.weight_dict = weight_dict
        self.losses = losses
        self.loss_dicts = {}

    def forward(self, outputs, targets, *args, **kwargs):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        # lms = kwargs.get('lms')
        # outputs = outputs + lms  # outputs: hp_sr
        # Compute all the requested losses
        for k in self.losses.keys():
            # k, loss = loss_dict
            if k == 'loss':
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets)})
            else:
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets, *args)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets, *args))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets, *args)})

        return self.loss_dicts

from UDL.pansharpening.models import PanSharpeningModel
class build_fusionnet(PanSharpeningModel, name='FusionNet'):
    def __call__(self, args):
        scheduler = None
        if any(["wv" in v for v in args.dataset.values()]):
            spectral_num = 8
        else:
            spectral_num = 4


        loss = nn.MSELoss(size_average=True).cuda()  ## Define the Loss function
        weight_dict = {'loss': 1}
        losses = {'loss': loss}
        criterion = SetCriterion(losses, weight_dict)
        model = FusionNet(spectral_num, criterion).cuda()
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0)   ## optimizer 1: Adam

        return model, criterion, optimizer, scheduler


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/FusionNet/model_fusionnet.py
================================================
# This is a pytorch version for the work of PanNet
# YW Jin, X Wu, LJ Deng(UESTC);
# 2020-09;

import torch
import torch.nn as nn
import numpy as np
import math
import torch.nn.init as int
from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer


class loss_with_l2_regularization(nn.Module):
    def __init__(self):
        super(loss_with_l2_regularization, self).__init__()

    def forward(self, criterion, model, weight_decay=1e-5, flag=False):
        regularizations = []
        for k, v in model.named_parameters():
            if 'conv' in k and 'weight' in k:
                # print(k)
                penality = weight_decay * ((v.data ** 2).sum() / 2)
                regularizations.append(penality)
                if flag:
                    print("{} : {}".format(k, penality))
        # r = torch.sum(regularizations)

        loss = criterion + sum(regularizations)
        return loss


# -------------Initialization----------------------------------------
def init_weights(*modules):
    for module in modules:
        for m in module.modules():
            if isinstance(m, nn.Conv2d):  ## initialization for Conv2d
                print("initial nn.Conv2d with var_scale_new: ", m)
                # try:
                #     import tensorflow as tf
                #     tensor = tf.get_variable(shape=m.weight.shape, initializer=tf.variance_scaling_initializer(seed=1))
                #     m.weight.data = tensor.eval()
                # except:
                #     print("try error, run variance_scaling_initializer")
                # variance_scaling_initializer(m.weight)
                variance_scaling_initializer(m.weight)  # method 1: initialization
                # nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')  # method 2: initialization
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.BatchNorm2d):  ## initialization for BN
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.Linear):  ## initialization for nn.Linear
                # variance_scaling_initializer(m.weight)
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)


# -------------ResNet Block (One)----------------------------------------
class Resblock(nn.Module):
    def __init__(self):
        super(Resblock, self).__init__()

        channel = 32
        self.conv20 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1,
                                bias=True)
        self.conv21 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1,
                                bias=True)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):  # x= hp of ms; y = hp of pan
        rs1 = self.relu(self.conv20(x))  # Bsx32x64x64
        rs1 = self.conv21(rs1)  # Bsx32x64x64
        rs = torch.add(x, rs1)  # Bsx32x64x64
        return rs

class FusionNet(nn.Module):
    def __init__(self, spectral_num, criterion, channel=32):
        super(FusionNet, self).__init__()
        # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize
        self.spectral_num = spectral_num
        self.criterion = criterion

        self.conv1 = nn.Conv2d(in_channels=spectral_num, out_channels=channel, kernel_size=3, stride=1, padding=1,
                               bias=True)
        self.res1 = Resblock()
        self.res2 = Resblock()
        self.res3 = Resblock()
        self.res4 = Resblock()

        self.conv3 = nn.Conv2d(in_channels=channel, out_channels=spectral_num, kernel_size=3, stride=1, padding=1,
                               bias=True)

        self.relu = nn.ReLU(inplace=True)

        self.backbone = nn.Sequential(  # method 2: 4 resnet repeated blocks
            self.res1,
            self.res2,
            self.res3,
            self.res4
        )

        # init_weights(self.backbone, self.conv1, self.conv3)   # state initialization, important!
        # self.apply(init_weights)

    def forward(self, x, y):  # x= lms; y = pan

        pan_concat = y.repeat(1, self.spectral_num, 1, 1)  # Bsx8x64x64
        input = torch.sub(pan_concat, x)  # Bsx8x64x64
        rs = self.relu(self.conv1(input))  # Bsx32x64x64

        rs = self.backbone(rs)  # ResNet's backbone!
        output = self.conv3(rs)  # Bsx8x64x64

        return output  # lms + outs

    def train_step(self, data, *args, **kwargs):
        log_vars = {}
        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                           data['ms'].cuda(), data['pan'].cuda()
        res = self(lms, pan)
        sr = lms + res  # output:= lms + hp_sr
        loss = self.criterion(sr, gt, *args, **kwargs)['loss']
        # outputs = loss
        # return loss
        log_vars.update(pan2ms=loss.item(), loss=loss.item())
        metrics = {'loss': loss, 'log_vars': log_vars}
        return metrics

    def val_step(self, data, *args, **kwargs):
        # gt, lms, ms, pan = data
        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                           data['ms'].cuda(), data['pan'].cuda()
        res = self(lms, pan)
        sr = lms + res  # output:= lms + hp_sr

        return sr, gt

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/FusionNet/run_fusionnet.py
================================================
from UDL.Basis.config import Config
from UDL.pansharpening.common.main_pansharpening import main
from UDL.Basis.auxiliary import set_random_seed
from UDL.pansharpening.models.FusionNet.option_fusionnet import cfg as args
from UDL.pansharpening.models.FusionNet.fusionnet_main import build_fusionnet as builder

if __name__ == '__main__':
    # cfg = Config.fromfile("../pansharpening/DCFNet/option_DCFNet.py")
    set_random_seed(args.seed)
    # print(cfg.builder)
    args.builder = builder
    main(args)

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/MSDCNN/model_msdcnn.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, Ran Ran, LiangJian Deng
# @reference:
import torch
import torch.nn as nn
import numpy as np
import math
import torch.nn.init as int
# import sys
# sys.path.append('/home/office-401-remote/桌面/Machine Learning/RanRan')
# print(sys.path)
import torch
import torch.nn as nn
import math
from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer
from UDL.pansharpening.models import PanSharpeningModel

class MSDCNN(nn.Module):
    def __init__(self, spectral_num, criterion, channel=64):
        super(MSDCNN, self).__init__()

        self.criterion = criterion

        input_channel = spectral_num + 1
        output_channel = spectral_num

        self.conv1 = nn.Conv2d(in_channels=input_channel, out_channels=60, kernel_size=7, stride=1, padding=3, bias=True)

        self.conv2_1 = nn.Conv2d(in_channels=60, out_channels=20, kernel_size=3, stride=1, padding=1, bias=True)
        self.conv2_2 = nn.Conv2d(in_channels=60, out_channels=20, kernel_size=5, stride=1, padding=2, bias=True)
        self.conv2_3 = nn.Conv2d(in_channels=60, out_channels=20, kernel_size=7, stride=1, padding=3, bias=True)

        self.conv3 = nn.Conv2d(in_channels=60, out_channels=30, kernel_size=3, stride=1, padding=1, bias=True)

        self.conv4_1 = nn.Conv2d(in_channels=30, out_channels=10, kernel_size=3, stride=1, padding=1, bias=True)
        self.conv4_2 = nn.Conv2d(in_channels=30, out_channels=10, kernel_size=5, stride=1, padding=2, bias=True)
        self.conv4_3 = nn.Conv2d(in_channels=30, out_channels=10, kernel_size=7, stride=1, padding=3, bias=True)

        self.conv5 = nn.Conv2d(in_channels=30, out_channels=output_channel, kernel_size=5, stride=1, padding=2, bias=True)

        self.shallow1 = nn.Conv2d(in_channels=input_channel, out_channels=64, kernel_size=9, stride=1, padding=4, bias=True)
        self.shallow2 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=1, stride=1, padding=0, bias=True)
        self.shallow3 = nn.Conv2d(in_channels=32, out_channels=output_channel, kernel_size=5, stride=1, padding=2, bias=True)

        self.relu = nn.ReLU(inplace=True)


    def forward(self, x, y):  # x: lms; y: pan

        concat = torch.cat([x, y], 1)  # Bsx9x64x64

        out1 = self.relu(self.conv1(concat))  # Bsx60x64x64
        out21 = self.conv2_1(out1)   # Bsx20x64x64
        out22 = self.conv2_2(out1)   # Bsx20x64x64
        out23 = self.conv2_3(out1)   # Bsx20x64x64
        out2 = torch.cat([out21, out22, out23], 1)  # Bsx60x64x64

        out2 = self.relu(torch.add(out2, out1))  # Bsx60x64x64

        out3 = self.relu(self.conv3(out2))  # Bsx30x64x64
        out41 = self.conv4_1(out3)          # Bsx10x64x64
        out42 = self.conv4_2(out3)          # Bsx10x64x64
        out43 = self.conv4_3(out3)          # Bsx10x64x64
        out4 = torch.cat([out41, out42, out43], 1)  # Bsx30x64x64

        out4 = self.relu(torch.add(out4, out3))  # Bsx30x64x64

        out5 = self.conv5(out4)  # Bsx8x64x64

        shallow1 = self.relu(self.shallow1(concat))   # Bsx64x64x64
        shallow2 = self.relu(self.shallow2(shallow1))  # Bsx32x64x64
        shallow3 = self.shallow3(shallow2) # Bsx8x64x64

        out = torch.add(out5, shallow3)  # Bsx8x64x64
        out = self.relu(out)  # Bsx8x64x64

        return out

    def train_step(self, data, *args, **kwargs):
        log_vars = {}
        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                                data['ms'].cuda(), data['pan'].cuda()
        sr = self(lms, pan)

        loss = self.criterion(sr, gt, *args, **kwargs)

        # return sr, loss
        log_vars.update(loss=loss['loss'])
        return {'loss': loss['loss'], 'log_vars': log_vars}

    def val_step(self, data, *args, **kwargs):

        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                                data['ms'].cuda(), data['pan'].cuda()
        sr = self(lms, pan)

        return sr, gt


if __name__ == '__main__':
    lms = torch.randn([1, 8, 64, 64])
    pan = torch.randn([1, 1, 64, 64])
    ms = torch.randn([1, 8, 16, 16])
    model = BDPN(8, None)
    x,_ = model(ms, pan)
    print(x.shape)

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/MSDCNN/msdcnn_main.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, Ran Ran, LiangJian Deng
# @reference:
import torch
import torch.nn as nn
import torch.optim as optim
from .model_msdcnn import MSDCNN

class SetCriterion(nn.Module):
    """ This class computes the loss for DETR.
    The process happens in two steps:
        1) we compute hungarian assignment between ground truth boxes and the outputs of the model
        2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
    """

    def __init__(self, losses, weight_dict):
        """ Create the criterion.
        Parameters:
            num_classes: n able to compute a matching between targets and proposals
            weight_dict: dict containing as key the names of the losses and as values their relative weight.
            eos_coef: relatiumber of object categories, omitting the special no-object category
            matcher: moduleve classification weight applied to the no-object category
            losses: list of all the losses to be applied. See get_loss for list of available losses.
        """
        super().__init__()
        self.weight_dict = weight_dict
        self.losses = losses
        self.loss_dicts = {}

    def forward(self, outputs, targets, *args, **kwargs):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        # Compute all the requested losses

        for k in self.losses.keys():
            # k, loss = loss_dict
            if k == 'Loss':
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets)})
            else:
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets, *args)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets, *args))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets, *args)})

        return self.loss_dicts

from UDL.pansharpening.models import PanSharpeningModel
class build_msdcnn(PanSharpeningModel, name='MSDCNN'):
    def __call__(self, cfg):

        # important for Pansharpening models, which are from tensorflow code
        self.reg = cfg.reg

        scheduler = None

        if any(["wv" in v for v in cfg.dataset.values()]):
            spectral_num = 8
        else:
            spectral_num = 4

        loss = nn.MSELoss(size_average=True).cuda()  ## Define the Loss function
        weight_dict = {'loss': 1}
        losses = {'loss': loss}
        criterion = SetCriterion(losses, weight_dict)
        model = MSDCNN(spectral_num, criterion).cuda()
        optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=1e-5)  ## optimizer 1: Adam
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=40,
                                                       gamma=0.5)  # <=> lr = opt.lr * (0.5 ** (epoch // opt.step))

        return model, criterion, optimizer, scheduler


###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/PNN/model_pnn.py
================================================
import torch
import torch.nn as nn
from torch.nn import functional as F
import math
# from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer

class PNN(nn.Module):
    def __init__(self, spectral_num, criterion, channel=64):
        super(PNN, self).__init__()

        self.criterion = criterion

        # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize
        self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=9, stride=1,
                               bias=True)
        self.conv2 = nn.Conv2d(in_channels=channel, out_channels=32, kernel_size=5, stride=1,
                               bias=True)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=spectral_num, kernel_size=5, stride=1,
                               bias=True)

        self.relu = nn.ReLU(inplace=True)

        # init_weights(self.conv1, self.conv2, self.conv3)

    def forward(self, x):  # x = cat(lms,pan)
        input1 = x  # Bsx9x64x64

        rs = self.relu(self.conv1(input1))
        rs = self.relu(self.conv2(rs))
        output = self.conv3(rs)

        return output

    def train_step(self, data, *args, **kwargs):
        log_vars = {}
        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                           data['ms'].cuda(), data['pan'].cuda()
        blk = self.blk

        gt = gt[:, :, blk:-blk, blk:-blk]
        lms = torch.cat([lms, pan], dim=1)

        sr = self(lms)

        loss = self.criterion(sr, gt, *args, **kwargs)

        # return sr, loss
        log_vars.update(loss=loss['loss'])
        return {'loss': loss['loss'], 'log_vars': log_vars}

    def val_step(self, data, *args, **kwargs):
        blk = self.blk
        gt, lms, ms, pan = data['gt'].cuda(), data['lms'].cuda(), \
                           data['ms'].cuda(), data['pan'].cuda()
        test_I_in1 = torch.cat([lms, pan], dim=1)
        test_I_in1 = F.pad(test_I_in1, (blk, blk, blk, blk), mode='replicate')
        sr = self(test_I_in1)

        return sr, gt

    @classmethod
    def set_blk(cls, blk):
        cls.blk = blk

# ----------------- End-Main-Part ------------------------------------


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/PNN/pnn_main.py
================================================
import torch.nn as nn
import torch.optim as optim
from .model_pnn import PNN
import numpy as np

class SetCriterion(nn.Module):
    """ This class computes the loss for DETR.
    The process happens in two steps:
        1) we compute hungarian assignment between ground truth boxes and the outputs of the model
        2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
    """

    def __init__(self, losses, weight_dict):
        """ Create the criterion.
        Parameters:
            num_classes: n able to compute a matching between targets and proposals
            weight_dict: dict containing as key the names of the losses and as values their relative weight.
            eos_coef: relatiumber of object categories, omitting the special no-object category
            matcher: moduleve classification weight applied to the no-object category
            losses: list of all the losses to be applied. See get_loss for list of available losses.
        """
        super().__init__()
        self.weight_dict = weight_dict
        self.losses = losses
        self.loss_dicts = {}

    def forward(self, outputs, targets, *args, **kwargs):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        # Compute all the requested losses

        for k in self.losses.keys():
            # k, loss = loss_dict
            if k == 'Loss':
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets)})
            else:
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets, *args)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets, *args))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets, *args)})

        return self.loss_dicts


from UDL.pansharpening.models import PanSharpeningModel
class build_pnn(PanSharpeningModel, name='PNN'):
    def __call__(self, cfg):

        # important for Pansharpening models, which are from tensorflow code
        self.reg = cfg.reg


        scheduler = None

        if any(["wv" in v for v in cfg.dataset.values()]):
            spectral_num = 8
        else:
            spectral_num = 4
        lr = 0.0001 * 17 * 17 * spectral_num
        cfg.lr = lr
        print(f"PNN adopted another lr: {lr} in \"build_pnn in pnn_main.py\" ")


        loss = nn.MSELoss(size_average=True).cuda()  ## Define the Loss function
        weight_dict = {'loss': 1}
        losses = {'loss': loss}
        criterion = SetCriterion(losses, weight_dict)
        model = PNN(spectral_num, criterion).cuda()
        target_layerParam = list(map(id, model.conv3.parameters()))
        base_layerParam = filter(lambda p: id(p) not in target_layerParam, model.parameters())

        training_parameters = [{'params': model.conv3.parameters(), 'lr': lr / 10},
                               {'params': base_layerParam}]

        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)  ## optimizer 2: SGD

        net_scope = 0
        for name, layer in model.named_parameters():
            if 'conv' in name and 'bias' not in name:
                net_scope += layer.shape[-1] - 1

        net_scope = np.sum(net_scope) + 1
        blk = net_scope // 2  # 8
        model.set_blk(blk)

        return model, criterion, optimizer, scheduler

###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/PanNet/model_pannet.py
================================================
# GPL License
# Copyright (C) 2021 , UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:
import torch
import torch.nn as nn
import numpy as np
import math
import torch.nn.init as int
from UDL.Basis.variance_sacling_initializer import variance_scaling_initializer
from UDL.pansharpening.models import PanSharpeningModel

# -------------Initialization----------------------------------------
def init_weights(*modules):
    for module in modules:
        for m in module.modules():
            if isinstance(m, nn.Conv2d):  ## initialization for Conv2d
                print("nn.Conv2D is initialized by variance_scaling_initializer")
                variance_scaling_initializer(m.weight)  # method 1: initialization
                # nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')  # method 2: initialization
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.BatchNorm2d):  ## initialization for BN
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.Linear):  ## initialization for nn.Linear
                # variance_scaling_initializer(m.weight)
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)


# -------------ResNet Block (One)----------------------------------------
class Resblock(nn.Module):
    def __init__(self):
        super(Resblock, self).__init__()

        channel = 32
        self.conv20 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1,
                                bias=True)
        self.conv21 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1,
                                bias=True)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):  # x= hp of ms; y = hp of pan
        rs1 = self.relu(self.conv20(x))  # Bsx32x64x64
        rs1 = self.conv21(rs1)  # Bsx32x64x64
        rs = torch.add(x, rs1)  # Bsx32x64x64
        return rs


# -----------------------------------------------------
class PanNet(nn.Module):
    def __init__(self, spectral_num, criterion, channel=32, reg=True):
        super(PanNet, self).__init__()
        self.criterion = criterion
        self.reg = reg

        # ConvTranspose2d: output = (input - 1)*stride + outpading - 2*padding + kernelsize
        self.deconv = nn.ConvTranspose2d(in_channels=spectral_num, out_channels=spectral_num, kernel_size=8, stride=4,
                                         padding=2, bias=True)
        self.conv1 = nn.Conv2d(in_channels=spectral_num + 1, out_channels=channel, kernel_size=3, stride=1, padding=1,
                               bias=True)
        self.res1 = Resblock()
        self.res2 = Resblock()
        self.res3 = Resblock()
        self.res4 = Resblock()

        self.conv3 = nn.Conv2d(in_channels=channel, out_channels=spectral_num, kernel_size=3, stride=1, padding=1,
                               bias=True)

        self.relu = nn.ReLU(inplace=True)

        self.backbone = nn.Sequential(  # method 2: 4 resnet repeated blocks
            self.res1,
            self.res2,
            self.res3,
            self.res4
        )

        self.apply(init_weights)
        # init_weights(self.backbone, self.deconv, self.conv1, self.conv3)  # state initialization, important!

    def forward(self, x, y):# x= hp of ms; y = hp of pan

        output_deconv = self.deconv(x)
        input = torch.cat([output_deconv, y], 1)  # Bsx9x64x64
        rs = self.relu(self.conv1(input))  # Bsx32x64x64

        rs = self.backbone(rs)  # ResNet's backbone!

        output = self.conv3(rs)  # Bsx8x64x64
        return output

    def train_step(self, data, *args, **kwargs):
        log_vars = {}
        gt, lms, ms_hp, pan_hp = data['gt'].cuda(), data['lms'].cuda(), \
                                data['ms_hp'].cuda(), data['pan_hp'].cuda()
        hp_sr = self(ms_hp, pan_hp)
        sr = lms + hp_sr  # output:= lms + hp_sr
        loss = self.criterion(sr, gt, *args, **kwargs)
        # return sr, loss
        log_vars.update(loss=loss['loss'])
        return {'loss': loss['loss'], 'log_vars': log_vars}

    def val_step(self, data, *args, **kwargs):
        # gt, lms, ms, pan = data
        gt, lms, ms_hp, pan_hp = data['gt'].cuda(), data['lms'].cuda(), \
                                data['ms'].cuda(), data['pan'].cuda()
        hp_sr = self(ms_hp, pan_hp)
        sr = lms + hp_sr  # output:= lms + hp_sr
        return sr, gt


# ----------------- End-Main-Part ------------------------------------


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/PanNet/pannet_main.py
================================================
'''
[Flops]: ConvTranspose2d is not supported!
[Memory]: ConvTranspose2d is not supported!
===============================================================================================================================================
Total params: 83,024
-----------------------------------------------------------------------------------------------------------------------------------------------
Total memory: 7.25MB
Total MAdd: 646.84MMAdd
Total Flops: 323.91MFlops
Total MemR+W: 14.57MB
'''
import torch.nn as nn
import torch.optim as optim
from .model_pannet import PanNet

class SetCriterion(nn.Module):
    """ This class computes the loss for DETR.
    The process happens in two steps:
        1) we compute hungarian assignment between ground truth boxes and the outputs of the model
        2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
    """

    def __init__(self, losses, weight_dict):
        """ Create the criterion.
        Parameters:
            num_classes: n able to compute a matching between targets and proposals
            weight_dict: dict containing as key the names of the losses and as values their relative weight.
            eos_coef: relatiumber of object categories, omitting the special no-object category
            matcher: moduleve classification weight applied to the no-object category
            losses: list of all the losses to be applied. See get_loss for list of available losses.
        """
        super().__init__()
        self.weight_dict = weight_dict
        self.losses = losses
        self.loss_dicts = {}

    def forward(self, outputs, targets, *args, **kwargs):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        # Compute all the requested losses

        for k in self.losses.keys():
            # k, loss = loss_dict
            if k == 'Loss':
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets)})
            else:
                loss = self.losses[k]
                loss_dicts = loss(outputs, targets, *args)
                if isinstance(loss_dicts, dict):
                    self.loss_dicts.update(loss(outputs, targets, *args))
                else:
                    self.loss_dicts.update({k: loss(outputs, targets, *args)})

        return self.loss_dicts

from UDL.pansharpening.models import PanSharpeningModel
class build_pannet(PanSharpeningModel, name='PanNet'):
    def __call__(self, cfg):

        if not all(['hp' in name for name in list(cfg.dataset.values())]):
            raise ValueError(f"{cfg.dataset} is wrong for PanNet, you need high-pass filter dataset.")

        # important for Pansharpening models, which are from tensorflow code
        self.reg = cfg.reg

        scheduler = None

        if any(["wv" in v for v in cfg.dataset.values()]):
            spectral_num = 8
        else:
            spectral_num = 4
        loss = nn.MSELoss(size_average=True).cuda()  ## Define the Loss function
        weight_dict = {'loss': 1}
        losses = {'loss': loss}
        criterion = SetCriterion(losses, weight_dict)
        model = PanNet(spectral_num, criterion).cuda()
        optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=0)   ## optimizer 1: Adam

        return model, criterion, optimizer, scheduler


###################################################################
# ------------------- Main Function (Run first) -------------------
###################################################################


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/models/__init__.py
================================================
from UDL.AutoDL import PanSharpeningModel
from .DiCNN.dicnn_main import build_dicnn, DiCNN
from .FusionNet.fusionnet_main import build_fusionnet, FusionNet
from .PNN.pnn_main import build_pnn, PNN
from .PanNet.pannet_main import build_pannet, PanNet
from .DRPNN.drpnn_main import build_drpnn, DRPNN
from .BDPN.bdpn_main import build_bdpn, BDPN
from .MSDCNN.msdcnn_main import build_msdcnn, MSDCNN

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/run_pansharpening.py
================================================
# GPL License
# Copyright (C) UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:

import sys
sys.path.append('../..')
from UDL.AutoDL import TaskDispatcher
from UDL.AutoDL.trainer import main

if __name__ == '__main__':
    cfg = TaskDispatcher.new(task='pansharpening', mode='entrypoint', arch='FusionNet')
    print(TaskDispatcher._task.keys())
    main(cfg)

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pansharpening/run_test_pansharpening.py
================================================
# GPL License
# Copyright (C) UESTC
# All Rights Reserved
# @Author  : Xiao Wu, LiangJian Deng
# @reference:

import sys
sys.path.append('../..')
from UDL.AutoDL import TaskDispatcher
from UDL.AutoDL.trainer import main

if __name__ == '__main__':
    cfg = TaskDispatcher.new(task='pansharpening', mode='entrypoint', arch='MSDCNN')
    # cfg.resume_from = "../pretrained-model/WV3/pannet.pth"
    cfg.eval = True
    cfg.workflow = [('val', 1)]
    print(TaskDispatcher._task.keys())
    main(cfg)


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pretrained-model/QB/readme.txt
================================================
none

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pretrained-model/WV2/readme.txt
================================================
none

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/pretrained-model/WV4/readme.txt
================================================
none

================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/readme.md
================================================
test


================================================
FILE: 01-DL-toolbox(Pytorch)/UDL/results/readme.txt
================================================


================================================
FILE: 01-DL-toolbox(Pytorch)/readme.md
================================================
# DL toolbox
"DL toolbox" for Remote Sensing Pansharpening

[English]([https://github.com/XiaoXiao-Woo/PanCollection/edit/dev/README.md](https://github.com/liangjiandeng/DLPan-Toolbox/edit/main/01-DL-toolbox(Pytorch)/readme.md)) | [简体中文](https://github.com.md)

This repository is the official PyTorch implementation of our IEEE GRSM paper “Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks”, 2022 ([paper](https://github.com/liangjiandeng/liangjiandeng.github.io/tree/master/papers/2022/review-grsm2022.pdf) | [homepage](https://github.com/liangjiandeng/DLPan-Toolbox)).


## Features


## Requirements
* Python3.7+, Pytorch>=1.6.0
* NVIDIA GPU + CUDA
* Run `python setup.py develop`

Note: Our project is based on MMCV, but you needn't to install it currently.

## Quick Start
**Step0. Set your Python environment.**

>git clone https://github.com/liangjiandeng/DLPan-Toolbox/tree/main/01-DL-toolbox(Pytorch)

Then, 

> python setup.py develop

**Step1. Put datasets and set path**
* Put datasets (WorldView-3, QuickBird, GaoFen2, WorldView2) into the `UDL/Data/pansharpening`, see following path structure. 

```
|-$ROOT/Data
├── pansharpening
│   ├── training_data
│   │   ├── train_wv3.h5
│   │   ├── ...
│   ├── validation_data
│   │   │   ├── valid_wv3.h5
│   │   │   ├── ...
│   ├── test_data
│   │   ├── WV3
│   │   │   ├── NY1_WV3_RR.mat
│   │   │   ├── ...
│   │   │   ├── ...
```

* Check and revise your dataset path in `01-DL-toolbox(Pytorch)/UDL/Basis/option.py` (line 100 or line 102, may not need to revise); Or, you can print the output of `run_pansharpening.py`, then set __cfg.data_dir__ (also line 100 or line 102) to your dataset path.


**Step2. How to train?**

> open `01-DL-toolbox(Pytorch)/UDL/pansharpening`

> run `python run_pansharpening.py` for training

> if you want to change the network, you could: 

1) revise arch='BDPN' in the following codes to other network's name, e.g., arch='xxx'; 

	```python
	   import sys
           sys.path.append('../..')
           from UDL.AutoDL import TaskDispatcher
           from UDL.AutoDL.trainer import main

           if __name__ == '__main__':
           cfg = TaskDispatcher.new(task='pansharpening', mode='entrypoint', arch='BDPN')
           print(TaskDispatcher._task.keys())
           main(cfg)
	 ```
2) revise the corresponding setting in `pansharpening/configs/option_bdpn.py`, e.g., hyperparameters, validation data

	```python
	   cfg.eval = False, 
  
       cfg.workflow = [('train', 50), ('val', 1)], 
	
	   cfg.dataset = {'train': 'wv3', 'val': 'valid_wv3.h5'}
	```
	

**Step3. How to test?**

> open `01-DL-toolbox(Pytorch)/UDL/pansharpening`

> run `run_test_pansharpening.py` for testing

> Note you need to ensure `cfg.eval = True` or `cfg.workflow = [('val', 1)]` in the following `run_test_pansharpening.py` to run
	  

```python
	import sys
	sys.path.append('../..')
	from UDL.AutoDL import TaskDispatcher
	from UDL.AutoDL.trainer import main

	if __name__ == '__main__':
	cfg = TaskDispatcher.new(task='pansharpening', mode='entrypoint', arch='MSDCNN')
	cfg.eval = True
	cfg.workflow = [('val', 1)]
	print(TaskDispatcher._task.keys())
	main(cfg)
```

> How to get test outcome using the pretrained models?

1) find the given one example (i.e., `NY1_WV3_RR.mat`) in the path `UDL/Data/pansharpening/test_data`; 

2) load pretrained model by setting __model_path__ = "your_model_path" located in the folder of `pansharpening/configs/option_bdpn.py` (line 15)； Or __cfg.resume_from__ = "your_model_path" (line 31).

3) run `run_test_pansharpening.py`, then you may find the test results in the folder of `UDL/results`


## FAQ
**Q1.** How to customize your new network/model in this framework?

> 1) Construct your model, loss, optimizer, scheduler in `UDL/pansharpening/models/modelName/modelName_main.py` (you need to create your modelName in `modelName_main.py`, i.e., the similar operation as other methods in the path).

> 2) Update `UDL/pansharpening/models/__init__.py` 

> 3) Add `option_modelName.py` in `UDL/pansharpening/configs/Option_modelName.py`, and configure your hyperparameters in this file (see other methods' configuration in `UDL/pansharpening/configs` for easy usage).

> 4) train your model and infer your results, see __step2__ and __step3__ for details.

> 5) save your model early, add or change `cfg.save_freq_print` and `cfg.save_top_k` in `UDL/pansharpening/configs/Option_modelName.py`. We set the default for it starting at epoch 5 and save models every 10 epochs.


**Q2.** How to customize your datasets?

You need to update: `UDL/pansharpening/common/psdata.py` (revise/add lines 24-29 to customize your datasets).


**Q3.**  How to customized training settings, such as saving model, recording logs, etc.?

You need to update: `UDL/mmcv/mmcv/runner/hooks` (generally, it does not need to revise if you do not require more complicated training settings).


**Q4.**  How to know more details of runner about how to train/test in `UDL/AutoDL/trainer.py`?

Please see `UDL/mmcv/mmcv/runner/epoch_based_runner.py`.


**Note:** Don't put any files into the folder of AutoDL. 


## Citation
* If you use this toolbox, please kindly cite our paper:

```bibtex
@ARTICLE{deng2022grsm,
author={L.-J. Deng, G. Vivone, M. E. Paoletti, G. Scarpa, J. He, Y. Zhang, J. Chanussot, and A. Plaza},
booktitle={IEEE Geoscience and Remote Sensing Magazine},
title={Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks},
year={2022},
pages={},
}
```


* Also, the codes of traditional methods are from the "pansharpening toolbox for distribution", thus please cite the corresponding paper:
```bibtex
@ARTICLE{vivone2021grsm,
  author={Vivone, Gemine and Dalla Mura, Mauro and Garzelli, Andrea and Restaino, Rocco and Scarpa, Giuseppe and Ulfarsson, Magnus O. and   Alparone, Luciano and Chanussot, Jocelyn},
  journal={IEEE Geoscience and Remote Sensing Magazine}, 
  title={A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting Pansharpening With Classical and Emerging Pansharpening Methods}, 
  year={2021},
  volume={9},
  number={1},
  pages={53-81},
  doi={10.1109/MGRS.2020.3019315}
}
```


## Acknowledgement
- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision.
- We appreciate the great contribution of [Xiao Wu](https://xiaoxiao-woo.github.io/) who is a graduate student in [UESTC](https://www.uestc.edu.cn/) to this toolbox.

## Contribution
We appreciate all contributions to improving '01-DL-toolbox(Pytorch)'. Looking forward to your contribution to DLPan-Toolbox.


## License & Copyright
This project is open sourced under GNU General Public License v3.0.


================================================
FILE: 01-DL-toolbox(Pytorch)/setup.py
================================================
from setuptools import setup, find_packages

setup(
    classifiers=
    ['Programming Language :: Python :: 3.7+', ],
    name='udl',
    description="unified pytorch framework for vision task",
    author="XiaoXiao-Woo",
    author_email="wxwsx1997@gmail.com",
    url='https://github.com/XiaoXiao-Woo/PanCollection',
    version='0.1',
    packages=find_packages(),
    license='GPLv3',
    python_requires='>=3.7',
    install_requires=[
        "psutil",
        "opencv-python",
        "numpy",
        "matplotlib",
        "tensorboard",
        "addict",
        "yapf",
        "imageio",
        "colorlog",
        "scipy",
        "timm"
    ],
)

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/Datasets Testing/Download link for WV3-NewYork test data.txt
================================================
This folders contain the testing examples, including:

1) "Datasets Testing": A full-resolution WV3-NewYork example + A reduced-resolution WV3-NewYork example

2) "QB", "WV2", "WV3" and "WV4":  Save the test datasets for different sensors

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/QB/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/WV2/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/WV3/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/WV4/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/1_TestData/readme.txt
================================================
This folders contain the testing examples, including:

1) "Datasets Testing": A full-resolution WV3-NewYork example + A reduced-resolution WV3-NewYork example

2) "QB", "WV2", "WV3" and "WV4":  Save the test datasets for different sensors

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/QB/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV2/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/APNN/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/BDPN/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/DRPNN/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/DiCNN1/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/Download link for the 8 DL methods on WV3 dataset.txt
================================================
This folders contain the testing examples, including:

1) "Datasets Testing": A full-resolution WV3-NewYork example + A reduced-resolution WV3-NewYork example

2) "QB", "WV2", "WV3" and "WV4":  Save the test datasets for different sensors

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/FusionNet/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/MSDCNN/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/PNN/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV3/PanNet/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/WV4/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/2_DL_Result/readme.txt
================================================
This folder cotains the outcomes of the 8 DL methods on QB, WV2, WV3 and WV4 sensors.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/3_EPS/QB/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/3_EPS/WV2/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/3_EPS/WV3/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/3_EPS/WV4/readme.txt
================================================
none

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/3_EPS/readme.txt
================================================
This folder stores the visual ouput with .eps format, which can be used in your latex editing.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/AWLP/AWLP.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           AWLP fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the Additive Wavelet Luminance Proportional (AWLP) algorithm.
% 
% Interface:
%           I_Fus_AWLP = AWLP(I_MS,I_PAN,ratio)
%
% Inputs:
%           I_MS:       MS image upsampled at PAN scale;
%           I_PAN:      PAN image;
%           ratio:      Scale ratio between MS and PAN. Pre-condition: Integer value.
%
% Outputs:
%           I_Fus_AWLP: AWLP pasharpened image.
% 
% References:
%           [Otazu05]       X. Otazu, M. Gonzalez-Audcana, O. Fors, and J. Nunez, Introduction of sensor spectral response into image fusion methods.
%                           Application to wavelet-based methods, IEEE Transactions on Geoscience and Remote Sensing, vol. 43, no. 10, pp. 23762385,
%                           October 2005.
%           [Vivone15]      G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%           [Alparone17]    L. Alparone, A. Garzelli, and G. Vivone, "Intersensor statistical matching for pansharpening: Theoretical issues and practical solutions",
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 55, no. 8, pp. 4682-4695, 2017.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_AWLP = AWLP(I_MS,I_PAN,ratio)

[Height,Width,Bands]=size(I_MS);
I_Fus_AWLP=zeros(Height,Width,Bands,'double');

SumImage=sum(I_MS,3)/Bands;

IntensityRatio = zeros(size(I_MS));
for i=1:Bands
    IntensityRatio(:,:,i)=I_MS(:,:,i)./(SumImage+eps);
end

I_PAN = repmat(I_PAN,[1 1 size(I_MS,3)]);

% for ii = 1 : size(I_MS,3)    
%   I_PAN(:,:,ii) = (I_PAN(:,:,ii) - mean2(I_PAN(:,:,ii))).*(std2(I_MS(:,:,ii))./std2(I_PAN(:,:,ii))) + mean2(I_MS(:,:,ii));  
% end
imageHR_LR=imresize(imresize(I_PAN,1/ratio),ratio);
for ii = 1 : size(I_MS,3)
    I_PAN(:,:,ii) = (I_PAN(:,:,ii) - mean2(I_PAN(:,:,ii))).*(std2(I_MS(:,:,ii))./std2(imageHR_LR(:,:,ii))) + mean2(I_MS(:,:,ii));
end

h=[1 4 6 4 1 ]/16;
g=[0 0 1 0 0 ]-h;
htilde=[ 1 4 6 4 1]/16;
gtilde=[ 0 0 1 0 0 ]+htilde;
h=sqrt(2)*h;
g=sqrt(2)*g;
htilde=sqrt(2)*htilde;
gtilde=sqrt(2)*gtilde;
WF={h,g,htilde,gtilde};

Levels = ceil(log2(ratio));

for i=1:Bands
    WT = ndwt2_working(I_PAN(:,:,i),Levels,WF);    
    for ii = 2 : numel(WT.dec), WT.dec{ii} = zeros(size(WT.dec{ii})); end
    StepDetails = I_PAN(:,:,i) - indwt2_working(WT,'c');
%%%%%%%%% OLD [as in the article Otazu05]
%     sINI = WT.sizeINI;
%     
%     StepDetails = zeros(sINI);
%     
%     for ii = 2 : numel(WT.dec)
%         h = WT.dec{ii};
%         h = imcrop(h,[(size(h,1) - sINI(1))/2 + 1,(size(h,2) - sINI(2))/2 + 1, sINI(1) - 1, sINI(2) - 1]);
%         StepDetails = StepDetails + h; 
%     end
%%%%%%%%%%%%%%%%%%%
    I_Fus_AWLP(:,:,i) = StepDetails .* IntensityRatio(:,:,i)+I_MS(:,:,i);
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Avg_RR_Assessment.tex
================================================


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/BDSD/BDSD.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           BDSD fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the Band-Dependent Spatial-Detail (BDSD) algorithm. 
% 
% Interface:
%           I_Fus_BDSD = BDSD(I_MS,I_PAN,ratio,S,sensor)
%
% Inputs:
%           I_MS:           MS image upsampled at PAN scale;
%           I_PAN:          PAN image;
%           ratio:          Scale ratio between MS and PAN. Pre-condition: Integer value;
%           S:              Local estimation on SxS distinct blocks (typically 128x128); 
%           sensor:         String for type of sensor (e.g. 'WV2', 'IKONOS').
%
% Output:
%           I_Fus_BDSD:     BDSD pansharpened image.
% 
% References:
%           [Garzelli08]    A. Garzelli, F. Nencini, and L. Capobianco, Optimal MMSE pan sharpening of very high resolution multispectral images, 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 46, no. 1, pp. 228236, January 2008.
%           [Vivone15]      G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_BDSD = BDSD(I_MS,I_PAN,ratio,S,sensor)

%%%
% Control of input parameters and initialization
%%%
if (S > 1)
    if(rem(S,2) && S >1)
        fprintf(1,'\n\n ');
        error('block size for local estimation must be even')
    end

    if(rem(S,ratio))
        fprintf(1,'\n\n ');
        error('block size must be multiple of ratio')
    end

    [N,M] = size(I_PAN);

    if(rem(N,S)||rem(M,S))
        fprintf(1,'\n\n ');
        error('x and y dims of pan must be multiple of the block size')
    end
end

I_MS = double(I_MS);
I_PAN = double(I_PAN);

%%%
% Reduced resolution
%%%

pan_LP = MTF_PAN(I_PAN,sensor,ratio);
pan_LP_d = pan_LP(3:ratio:end,3:ratio:end);

ms_orig = imresize(I_MS,1/ratio);

ms_LP_d = MTF(ms_orig,sensor,ratio);

%%%
% Parameter estimation at reduced resolution
%%%
in3 = cat(3,ms_LP_d,ms_orig,pan_LP_d);
fun_eg = @(bs) estimate_gamma_cube(bs.data,S,ratio);
gamma = blockproc(in3,[S/ratio S/ratio],fun_eg);

%%%
% Fusion
%%%
in3 = cat(3,I_MS,I_PAN,gamma);
fun_Hi = @(bs) compH_inject(bs.data,S);

I_Fus_BDSD = blockproc(in3,[S S],fun_Hi);

%%%_______________________________________________________________
%%%
function gamma = estimate_gamma_cube(in3,S,ratio)
Nb = (size(in3,3)-1)/2;
hs_LP_d = in3(:,:,1:Nb);
hs_orig = in3(:,:,Nb+1:2*Nb);
pan_LP_d = in3(:,:,2*Nb+1);
% Compute Hd
Hd = zeros(S*S/ratio/ratio,Nb+1);
for k=1:Nb
    b = hs_LP_d(:,:,k);
    Hd(:,k) = b(:);
end
Hd(:,Nb+1) = pan_LP_d(:);
% Estimate gamma
B = (Hd'*Hd)\Hd';
gamma = zeros(Nb+1,Nb);
for k=1:Nb
    b = hs_orig(:,:,k);
    bd = hs_LP_d(:,:,k);
    gamma(:,k) = B *(b(:)-bd(:));
end
gamma = padarray(gamma,[S-Nb-1 S-Nb],0,'post');


%%%_______________________________________________________________
%%%
function ms_en = compH_inject(in3,S)
Nb = size(in3,3)-2;
hs = in3(:,:,1:Nb);
pan = in3(:,:,Nb+1);
gamma = in3(:,:,Nb+2); 
% Compute H
[N,M,Nb] = size(hs);
H = zeros(S*S,Nb+1);
for k=1:Nb
    b = hs(:,:,k);
    H(:,k) = b(:);
end
H(:,Nb+1) = pan(:);
% Inject
g = gamma(1:Nb+1,1:Nb);
ms_en = zeros(N,M,Nb);
for k=1:Nb
    b = hs(:,:,k);
    b_en = b(:) + H * g(:,k);
    ms_en(:,:,k) = reshape(b_en,N,M);
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/BDSD/BDSD_PC.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           BDSD_PC fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the Band-Dependent Spatial-Detail (BDSD) model solving an optimization constrained problem. 
% 
% Interface:
%           I_Fus_BDSD = BDSD_PC(I_MS,I_PAN,ratio,S,sensor)
%
% Inputs:
%           I_MS:           MS image upsampled at PAN scale;
%           I_PAN:          PAN image;
%           ratio:          Scale ratio between MS and PAN. Pre-condition: Integer value;
%           sensor:         String for type of sensor (e.g. 'WV2', 'IKONOS').
%
% Output:
%           I_Fus_BDSD:     BDSD_PC pansharpened image.
% 
% Reference:
%           [Vivone19]      G. Vivone, Robust Band-Dependent Spatial-Detail Approaches for Panchromatic Sharpening, 
%                           IEEE Transactions on Geoscience and Remote Sensing, 2019.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_BDSD = BDSD_PC(I_MS,I_PAN,ratio,sensor)

I_MS = double(I_MS);
I_PAN = double(I_PAN);

opts1 = optimset('display','off');

I_GT = imresize(I_MS,1/ratio);%,'nearest');
I_MS_LR = MTF(I_GT,sensor,ratio);
I_PAN_LR = imresize(MTF_PAN(I_PAN,sensor,ratio),1/ratio,'nearest');

I_Fus_BDSD = zeros(size(I_MS));
gamma = zeros(size(I_MS,3)+1,size(I_MS,3));
for ii = 1 : size(I_MS,3)
    h1 = I_GT(:,:,ii);
    h2 = I_MS_LR(:,:,ii);
    H = [I_PAN_LR(:), reshape(I_MS_LR,[size(I_MS_LR,1)*size(I_MS_LR,2), size(I_MS_LR,3)])];
    A = eye(size(I_MS,3)+1);
    A(1,1) = -1;

    gamma(:,ii) = lsqlin(H,h1(:)-h2(:),A,zeros(1,size(I_MS,3)+1),[],[],[],[],[],opts1);
    I_Fus_BDSD(:,:,ii) = I_MS(:,:,ii) + reshape([I_PAN(:),reshape(I_MS,[size(I_MS,1)*size(I_MS,2), size(I_MS,3)])]*gamma(:,ii),[size(I_MS,1) size(I_MS,2)]);
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/BDSD/C_BDSD.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           C_BDSD fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images  
%           through the Clustered Band-Dependent Spatial-Detail (C-BDSD) algorithm. 
% 
% Interface:
%           I_Fus_C_BDSD = C_BDSD(I_MS,I_PAN,ratio,sensor,K)
%
% Inputs:
%           I_MS:           MS image upsampled at PAN scale;
%           I_PAN:          PAN image;
%           ratio:          Scale ratio between MS and PAN. Pre-condition: Integer value;
%           sensor:         String for type of sensor (e.g. 'WV2', 'IKONOS').
%           K:              Number of clusters (K>1) (Optional: default value K=30); 
%
% Outputs:
%           I_Fus_C_BDSD:   C_BDSD pansharpened image.
% 
% Reference:
%           [Garzelli15]    A. Garzelli, Pansharpening of Multispectral Images Based on Nonlocal Parameter Optimization, 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 4, pp. 2096-2107, April 2015.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_C_BDSD = C_BDSD(I_MS,I_PAN,ratio,sensor,K)

%%%
% Control of input parameters and initialization
%%%
[N,M,Nb] = size(I_MS);

if nargin == 5
    if K < 2
        fprintf(1,'Required number of clusters K>1.\n\n'); 
    return
    end
end
if nargin < 5
    K = 30; 
end
if nargin < 4
    fprintf(1,'\nI_Fus_C_BDSD = C_BDSD(I_MS,I_PAN,ratio,sensor,K)\n\n');
    error('At least four input arguments required')
end

I_MS = double(I_MS);
I_PAN = double(I_PAN);

%%%
% Reduced resolution
%%%

pan_LP = MTF_PAN(I_PAN,sensor,ratio);
pan_LP_d = pan_LP(3:ratio:end,3:ratio:end);

ms_orig = imresize(I_MS,1/ratio);
ms_LP_d = MTF(ms_orig,sensor,ratio);


% CLUSTER MAPS AT FULL RESOLUTION AND REDUCED RESOLUTION
%
Sa = stdfilt(I_PAN,ones(51)); 
Sa = Sa/max(Sa(:));
Sb = I_PAN; 
Sb = Sb/max(Sb(:));
 
opts = statset('TolX',1e-5);

features = zeros(N/ratio,M/ratio,2,ratio*ratio);
for i = 1:ratio
    for j = 1:ratio
        features(:,:,1,(i-1)*ratio+j) = Sa(1+(i-1):ratio:end,1+(j-1):ratio:end);
        features(:,:,2,(i-1)*ratio+j) = Sb(1+(i-1):ratio:end,1+(j-1):ratio:end);
    end
end
C_stack = zeros(N/ratio,M/ratio,ratio*ratio);

f = features(:,:,:,(3-1)*ratio+3);
warning off
[aux, centers] = kmeans(reshape(f,[N/ratio*M/ratio,2]),K,'replicates',2,'start','cluster','options',opts);
C = reshape(aux,[N/ratio M/ratio]);
C_stack(:,:,(3-1)*ratio+3) = C;
for i = 1:ratio
    for j = 1:ratio
        if(i*j~=9)
            f = features(:,:,:,(i-1)*ratio+j);
            aux = kmeans(reshape(f,[N/ratio*M/ratio,2]),K,'start',centers,'MaxIter',1);
            C_stack(:,:,(i-1)*ratio+j) = reshape(aux,[N/ratio M/ratio]);
        end
    end
end

C4 = zeros(size(I_PAN));
for i = 1:ratio
    for j = 1:ratio
        C4(i:ratio:end,j:ratio:end) = C_stack(:,:,(i-1)*ratio+j);
    end
end

% ESTIMATE PARAMETERS AT REDUCED RESOLUTION AND INJECT (CLUSTER BY CLUSTER)
%
g = zeros(K,Nb);
alpha = zeros(Nb,Nb,K);
offset = zeros(Nb,K);
ms_ps_stack = zeros(N,M,Nb,K);

% Estimate for K=1
[~,g_global,alpha_global,offset_global] = parm_est(ms_LP_d(:,:,:),pan_LP_d,ms_orig,find(C>0));

for j=1:K
    [~,g(j,:),alpha(:,:,j),offset(:,j)] = parm_est(ms_LP_d(:,:,:),pan_LP_d,ms_orig,find(C==j));
    if(size(find(g<0)>0))
        g(j,:) = g_global;
        alpha(:,:,j) = alpha_global;
        offset(:,j) = offset_global;
    end
    H = H_comp(I_PAN,I_MS,find(C4==j));
    ms_ps_stack(:,:,:,j) = bdsd_injection(I_PAN,I_MS,H,g(j,:),squeeze(alpha(:,:,j)),offset(:,j),find(C4==j));
end

% FORM PANSHARPENED IMAGE
I_Fus_C_BDSD = sum(ms_ps_stack,4);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


function [gamma,g,alpha,offset] = parm_est(hs_LP_d,pan_LP_d,hs_orig,ind)

Nb = size(hs_orig,3);

for i=1:Nb
    % compute Hd
    Hd = zeros(size(ind,1),Nb+2);
    gamma = zeros((Nb+2),Nb);
    for k=1:Nb
        bfull = hs_LP_d(:,:,k);
        Hd(:,k) = bfull(ind);
    end
    Hd(:,Nb+1) = ones(size(ind));
    Hd(:,Nb+2) = pan_LP_d(ind);
    
    % estimate gamma
    
    for k=1:Nb
        Z = (Hd'*Hd)\Hd';
        bfull = hs_orig(:,:,k);
        b = bfull(ind);
        bdfull = hs_LP_d(:,:,k);
        bd = bdfull(ind);
        gamma(:,k) = Z *(b(:)-bd(:));
    end
    
    g = gamma(Nb+2,:);
    
    alpha = zeros(Nb);
    for k = 1:Nb
        alpha(:,k) = -gamma(1:Nb,k)/gamma(Nb+2,k);
    end
    
    offset = zeros(Nb,1);
    for k = 1:Nb
        offset(k) = gamma(Nb+1,k)/gamma(Nb+2,k);
    end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function hs_en = bdsd_injection(pan,msexp,H,g,alpha,offset,ind)

[N,M,Nb] = size(msexp);
Intensity = zeros(length(ind),Nb);
for k = 1:Nb
    Intensity(:,k) = H(:,1:Nb) * alpha(:,k) - offset(k);
end
pfull = pan;
p = pfull(ind);

hs_en = zeros(N,M,Nb); 
for k=1:Nb
    bfull = msexp(:,:,k);
    b = bfull(ind);
    b_en = b(:) + (p - Intensity(:,k)) * g(k);
    hs_enfull = hs_en(:,:,k);
    hs_enfull(ind) = b_en;
    hs_en(:,:,k) = reshape(hs_enfull,N,M);
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


function H = H_comp(pan,hs,ind)

Nb = size(hs,3);
H = zeros(length(ind),Nb+2);

for k=1:Nb
    bfull = hs(:,:,k);
    H(:,k) = bfull(ind);
end
H(:,Nb+1) = ones(size(ind));
H(:,Nb+2) = pan(ind);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/BT-H/BroveyRegHazeMin.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Brovey data fusion with haze correction
% 
% Interface:
%           I_Fus_Brovey_Reg = BroveyRegHazeMin(I_MS,I_PAN,ratio)
%
% Inputs:
%           I_MS:           MS image upsampled at PAN scale;
%           I_PAN:          PAN image;
%           ratio:          Scale ratio between MS and PAN. Pre-condition: Integer value.
%
% Outputs:
%           I_Fus_Brovey_Reg:  Pansharpened image.
% 
% References:
%           [Lolli17]       S. Lolli, L. Alparone, A. Garzelli, and G. Vivone, "Haze correction for contrast-based multispectral pansharpening",
%                           IEEE Geoscience and Remote Sensing Letters, vol. 14, no. 12, pp. 2255-2259, 2017.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_Brovey_Reg = BroveyRegHazeMin(I_MS,I_PAN,ratio)

if size(I_MS,3) == 4
    prc = 1;
    minMS = zeros(1,1,4);
    B = I_MS(:,:,1);
    G = I_MS(:,:,2);
    R = I_MS(:,:,3);
    NIR = I_MS(:,:,4);
    minMS(1,1,1) = 0.95 * prctile(B(:),prc);
    minMS(1,1,2) = 0.45 * prctile(G(:),prc);
    minMS(1,1,3) = 0.40 * prctile(R(:),prc);
    minMS(1,1,4) = 0.05 * prctile(NIR(:),prc);
else
    minMS = zeros(1,1,size(I_MS,3));
    for ii = 1 : size(I_MS, 3)
       minMS(1,1,ii) = min(min(I_MS(:,:,ii)));  
    end
end

L = repmat(minMS, [size(I_MS,1) size(I_MS,2)]);

imageLR = double(I_MS);
imageHR = double(I_PAN);

imageHR_LR = LPfilterGauss(imageHR,ratio);

h = estimation_alpha(imageLR,imageHR_LR,'global');

alpha(1,1,:) = h;

I = sum((imageLR - L) .* repmat(alpha,[size(I_MS,1) size(I_MS,2) 1]),3); 

imageHR = (imageHR - mean2(imageHR_LR)).*(std2(I)./std2(imageHR_LR)) + mean2(I);  

I_MS_L = imageLR - L;
I_MS_L(I_MS_L < 0) = 0;

I_Fus_Brovey_Reg = I_MS_L .* repmat(imageHR./(I+eps),[1 1 size(imageLR,3)]) + L;

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Demo_Full_Resolution.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%For FUll-Resolution%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  1) This is a test demo to show all full-resolution results of traditional and DL methods
%     Here, we take WV3 test dataset as example. Readers can change the corresponding director 
%     and setting to test other/your datasets
%  2) The codes of traditional methods are from the "pansharpening toolbox for distribution",
%     thus please cite the paper:
%     [1] G. Vivone, et al., A new benchmark based on recent advances in multispectral pansharpening: Revisiting
%         pansharpening with classical and emerging pansharpening methods, IEEE Geosci. Remote Sens. Mag., 
%         9(1): 53C81, 2021
%  3) Also, if you use this toolbox, please cite our paper:
%     [2] L.-J. Deng, et al., Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks, 
%         IEEE Geosci. Remote Sens. Mag., 2022

%  LJ Deng (UESTC), 2020-02-27

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Note: the test dataset of full-resolution are too huge to upload to
% GitHub, thus we provide cloud links to readers to download them to
% successfully run this demo, including:

% i) Download link for full-resolution WV3-NewYork example (named "NY1_WV3_FR.mat"):
%     http:********   (put into the folder of "1_TestData/Datasets Testing")

% ii) Download link of DL's results for full-resolution WV3-NewYork example:
%     http:********   (put into the folder of "'2_DL_Result/WV3")

% Once you have above datasets, you can run this demo successfully, then
% understand how this demo run!

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

clear; close all;
%% =======load directors========
% Tools
addpath([pwd,'/Tools']);

% Select algorithms to run
algorithms = {'EXP','BT-H','BDSD-PC','C-GSA','SR-D',...
    'MTF-GLP-HPM-R','MTF-GLP-FS','TV','PanNet','DRPNN','MSDCNN','BDPN','DiCNN','PNN','APNN','FusionNet'};

% director to save EPS figures for latex editing; if other dataset, please
% change the director correspondingly
data_name = '3_EPS/WV3/wv3_os_ny';  

%% ==========Read Data and sensors' info====================
%% read the test dataset; if use your test dataset, please update in this folder
file_test = '1_TestData/Datasets Testing/NY1_WV3_FR.mat';

% get I_MS_LR, I_MS, I_PAN and sensors' info; 
load(file_test)   

% (Note: If there is no sensor's info in your dataset, 
% please find and update these info in the following commented lines):

%------ following are sensor's info for WV3 (an example for WV3)----
%     sensor = 'WV3';
%     Qblocks_size = 32;
%     bicubic = 0;% Interpolator
%     flag_cut_bounds = 1;% Cut Final Image
%     dim_cut = 21;% Cut Final Image
%     thvalues = 0;% Threshold values out of dynamic range
%     printEPS = 0;% Print Eps
%     ratio = 4;% Resize Factor
%     L = 11;% Radiometric Resolution

%% Initialization of the Matrix of Results
NumIndexes = 3;
MatrixResults = zeros(numel(algorithms),NumIndexes);
alg = 0;
flagQNR = 0; %% Flag QNR/HQNR, 1: QNR otherwise HQNR

% zoom-in interesting two regions of figure; you may change them
% according to your requirment
location1                = [500 700 100 300];  %default: data6: [10 50 1 60]; data7:[140 180 5 60]
location2                = [200 380 1000 1250];  %default: data6: [190 240 5 60]; data7:[190 235 120 150]

clear print

%% show I_MS_LR, I_GT, PAN Imgs:
if size(I_MS,3) == 4
    showImage4LR(I_MS_LR,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
else
    showImage8LR(I_MS_LR,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
end

% (Note: If you only want to show pan image without region zoom-in, use showPan;
% otherwise, use showPan_zoomin)

%showPan(I_PAN,printEPS,2,flag_cut_bounds,dim_cut);
showPan_zoomin(I_PAN,printEPS,2,flag_cut_bounds,dim_cut, location1, location2);

% Note: eps figure is saved in "data_name" for latex editing
print('-depsc', strcat(data_name, '_pan', '.eps')) 

%% ======EXP ===================
if ismember('EXP',algorithms)
    alg = alg + 1;
    [D_lambda_EXP,D_S_EXP,QNRI_EXP] = indexes_evaluation_FS(I_MS,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_EXP,D_S_EXP,QNRI_EXP];
    MatrixImage(:,:,:,alg) = I_MS;
    
    % (Note: You may use following "showImage8LR" without region zoom-in; otherwise, you can
    % use "showImage8_zoomin" for zoom-in visualization.) 
    
    %showImage8LR(I_MS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_MS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_exp.eps')) 
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%% CS-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%%
%% ====== 1) BT-H Method ======
if ismember('BT-H',algorithms)
    alg = alg + 1;
    
    cd BT-H
    t2=tic;
    I_BT_H = BroveyRegHazeMin(I_MS,I_PAN,ratio);
    time_BT_H = toc(t2);
    fprintf('Elaboration time BT-H: %.2f [sec]\n',time_BT_H);
    cd ..
    
    %%% Quality indexes computation
    [D_lambda_BT_H,D_S_BT_H,QNRI_BT_H] = indexes_evaluation_FS(I_BT_H,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_BT_H,D_S_BT_H,QNRI_BT_H];
    MatrixImage(:,:,:,alg) = I_BT_H;
    
    %showImage8LR(I_BT_H,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_BT_H,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_bth.eps'))
end

%% ====== 2) BDSD-PC Method ======
if ismember('BDSD-PC',algorithms)
    alg = alg + 1;
    
    cd BDSD
    t2=tic;
    I_BDSD_PC = BDSD_PC(I_MS,I_PAN,ratio,sensor);
    time_BDSD_PC = toc(t2);
    fprintf('Elaboration time BDSD-PC: %.2f [sec]\n',time_BDSD_PC);
    cd ..
    
    [D_lambda_BDSD_PC,D_S_BDSD_PC,QNRI_BDSD_PC] = indexes_evaluation_FS(I_BDSD_PC,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_BDSD_PC,D_S_BDSD_PC,QNRI_BDSD_PC];
    MatrixImage(:,:,:,alg) = I_BDSD_PC;
    
    %showImage8LR(I_BDSD_PC,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_BDSD_PC,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_bdsd_pc.eps'))
end

%% ====== 3) C-GSA Method ======
if ismember('C-GSA',algorithms)
    alg = alg + 1;
    
    PS_algorithm = 'GSA'; % Pansharpening algorithm
    n_segm = 5; % Number of segments
    
    cd GS
    
    t2=tic;
    I_C_GSA = GS_Segm(I_MS,I_PAN,gen_LP_image(PS_algorithm,I_MS,I_PAN,I_MS_LR,ratio,sensor), k_means_clustering(I_MS,n_segm));
    time_C_GSA = toc(t2);
    fprintf('Elaboration time GSA: %.2f [sec]\n',time_C_GSA);
    cd ..
    
    %%% Quality indexes computation
    [D_lambda_C_GSA,D_S_C_GSA,QNRI_C_GSA] = indexes_evaluation_FS(I_C_GSA,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_C_GSA,D_S_C_GSA,QNRI_C_GSA];
    MatrixImage(:,:,:,alg) = I_C_GSA;
    
    %showImage8LR(I_C_GSA,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_C_GSA,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_c_gsa.eps'))
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%% MRA-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%%
%% ====== 1) SR-D Method ======
if ismember('SR-D',algorithms)
    alg = alg + 1;
    
    %%%%%%%%%%%%%%%%%%%%%%%%%% Parameters setting %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    TS = 7; % Tiling (dimensions of the patches are TS x TS)
    ol = 4; % Overlap (in pixels) between contiguous tile
    n_atoms = 10; % Max number of representation atoms (default value = 10)
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    cd SR-D
    t2=tic;
    I_SR_D = CS(I_MS,I_PAN,I_MS_LR,ratio,sensor,TS,ol,n_atoms);
    time_SR_D = toc(t2);
    fprintf('Elaboration time SR_D: %.2f [sec]\n',time_SR_D);
    cd ..
    
    [D_lambda_SR_D,D_S_SR_D,QNRI_SR_D] = indexes_evaluation_FS(I_SR_D,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_SR_D,D_S_SR_D,QNRI_SR_D];
    MatrixImage(:,:,:,alg) = I_SR_D;
    
    %showImage8LR(I_SR_D,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_SR_D,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_sr_d.eps'))
end

%% ====== 2) MTF-GLP-HPM-R Method ======
if ismember('MTF-GLP-HPM-R',algorithms)
    alg = alg + 1;
    
    cd GLP
    t2=tic;
    I_MTF_GLP_HPM_R = MTF_GLP_HPM_R(I_MS,I_PAN,sensor,ratio);
    time_MTF_GLP_HPM_R = toc(t2);
    fprintf('Elaboration time MTF-GLP-HPM-R: %.2f [sec]\n',time_MTF_GLP_HPM_R);
    cd ..
    
    [D_lambda_MTF_GLP_HPM_R,D_S_MTF_GLP_HPM_R,QNRI_MTF_GLP_HPM_R] = indexes_evaluation_FS(I_MTF_GLP_HPM_R,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_MTF_GLP_HPM_R,D_S_MTF_GLP_HPM_R,QNRI_MTF_GLP_HPM_R];
    MatrixImage(:,:,:,alg) = I_MTF_GLP_HPM_R;
    
    %showImage8LR(I_MTF_GLP_HPM_R,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_MTF_GLP_HPM_R,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_mtfglp_hpm_r.eps'))
end

%% ====== 3) MTF-GLP-FS Method ======
if ismember('MTF-GLP-FS',algorithms)
    alg = alg + 1;
    
    cd GLP
    t2=tic;
    I_MTF_GLP_FS = MTF_GLP_FS(I_MS,I_PAN,sensor,ratio);
    time_MTF_GLP_FS = toc(t2);
    fprintf('Elaboration time MTF-GLP-FS: %.2f [sec]\n',time_MTF_GLP_FS);
    cd ..
    
    %%% Quality indexes computation
    [D_lambda_MTF_GLP_FS,D_S_MTF_GLP_FS,QNRI_MTF_GLP_FS] = indexes_evaluation_FS(I_MTF_GLP_FS,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_MTF_GLP_FS,D_S_MTF_GLP_FS,QNRI_MTF_GLP_FS];
    MatrixImage(:,:,:,alg) = I_MTF_GLP_FS;
    
    %showImage8LR(I_MTF_GLP_FS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_MTF_GLP_FS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_mtfglpfs.eps'))
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%% VO-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%%
%% ====== 1) TV Method ======
if ismember('TV',algorithms)
    alg = alg + 1;
    
    %%%%%%%%%%%%%%%%%%%%%%%%%% Parameters setting %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    switch sensor
        case 'IKONOS'
            w=[0.1091    0.2127    0.2928    0.3854];
            c = 8;
            alpha=1.064;
            maxiter=10;
            lambda = 0.47106;
        case {'GeoEye1','WV4'}
            w=[0.1552, 0.3959, 0.2902, 0.1587];
            c = 8;
            alpha=0.75;
            maxiter=50;
            lambda = 157.8954;
        case 'WV3'
            w=[0.0657    0.1012    0.1537    0.1473    0.1245    0.1545    0.1338    0.1192];
            c = 8;
            alpha=0.75;
            maxiter=50;
            lambda = 1.0000e-03;
    end
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    cd TV
    t2 = tic;
    I_TV = TV_pansharpen(I_MS_LR,I_PAN,alpha,lambda,c,maxiter,w);
    time_TV = toc(t2);
    fprintf('Elaboration time TV: %.2f [sec]\n',time_TV);
    cd ..
    
    %%% Quality indexes computation
    [D_lambda_TV,D_S_TV,QNRI_TV] = indexes_evaluation_FS(I_TV,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_TV,D_S_TV,QNRI_TV];
    MatrixImage(:,:,:,alg) = I_TV;
    
    %showImage8LR(I_TV,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_TV,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_tv.eps'))
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%% DL-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%%
%% ====== 1) PanNet Method ======
% if you use other sensor's data, please update the following director and
% DL result. Note that the DL results here are obtained from our "01-DL toolbox (Pytorch)" folder, please check it.
% Similar operation for following other DL methods.
file_pannet = 'pannet_wv3_os_ny';
load(strcat('2_DL_Result/WV3/PanNet/', file_pannet, '.mat')) 

% (Note: val_bit = 2047 for 11-bit WV3, WV4 and QB data; val_bit = 1023 for 10-bit GF2 data)
val_bit  = 2047;
I_pannet = val_bit*double(pannet_wv3_os_ny);  

if ismember('PanNet',algorithms)
    alg = alg + 1;
    %%% Quality indexes computation
    [D_lambda_pannet,D_S_pannet,QNRI_pannet] = indexes_evaluation_FS(I_pannet,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_pannet,D_S_pannet,QNRI_pannet];
    MatrixImage(:,:,:,alg) = I_pannet;
    
    %showImage8LR(I_pannet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_pannet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_pannet.eps'))
end

%% ====== 2) DRPNN Method ======
file_drpnn = 'drpnn_wv3_os_ny';
load(strcat('2_DL_Result/WV3/DRPNN/', file_drpnn, '.mat')) % load i-th image for DiCNN
I_drpnn    = val_bit*double(drpnn_wv3_os_ny);

if ismember('DRPNN',algorithms)
    alg = alg + 1;
    %%% Quality indexes computation
    [D_lambda_drpnn,D_S_drpnn,QNRI_drpnn] = indexes_evaluation_FS(I_drpnn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_drpnn,D_S_drpnn,QNRI_drpnn];
    MatrixImage(:,:,:,alg) = I_drpnn;
    
    %showImage8LR(I_drpnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_drpnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_drpnn.eps'))
end

%% ====== 3) MSDCNN Method ======
file_msdcnn = 'msdcnn_wv3_os_ny';
load(strcat('2_DL_Result/WV3/MSDCNN/', file_msdcnn, '.mat')) % load i-th image for DiCNN
I_msdcnn = val_bit*double(msdcnn_wv3_os_ny);

if ismember('MSDCNN',algorithms)
    alg = alg + 1;
    %%% Quality indexes computation
    [D_lambda_msdcnn,D_S_msdcnn,QNRI_msdcnn] = indexes_evaluation_FS(I_msdcnn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_msdcnn,D_S_msdcnn,QNRI_msdcnn];
    MatrixImage(:,:,:,alg) = I_msdcnn;
    
    %showImage8LR(I_msdcnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_msdcnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_msdcnn.eps'))
end

%% ====== 4) BDPN Method ======
file_bdpn  = 'bdpn_wv3_os_ny';
load(strcat('2_DL_Result/WV3/BDPN/', file_bdpn , '.mat')) % load i-th image for DiCNN
I_bdpn  = val_bit*double(bdpn_wv3_os_ny);

if ismember('BDPN',algorithms)
    alg = alg + 1;
    %%% Quality indexes computation
    [D_lambda_bdpn,D_S_bdpn,QNRI_bdpn] = indexes_evaluation_FS(I_bdpn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_bdpn,D_S_bdpn,QNRI_bdpn];
    MatrixImage(:,:,:,alg) = I_bdpn;
    
    %showImage8LR(I_bdpn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_bdpn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_bdpn.eps'))
end

%% ====== 5) DiCNN Method ======
file_dicnn = 'dicnn_wv3_os_ny';
load(strcat('2_DL_Result/WV3/DiCNN/', file_dicnn, '.mat')) % load i-th image for DiCNN
I_dicnn = val_bit*double(dicnn_wv3_os_ny);

if ismember('DiCNN',algorithms)
    alg = alg + 1;
    %%% Quality indexes computation
    [D_lambda_dicnn,D_S_dicnn,QNRI_dicnn] = indexes_evaluation_FS(I_dicnn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_dicnn,D_S_dicnn,QNRI_dicnn];
    MatrixImage(:,:,:,alg) = I_dicnn;
    
    %showImage8LR(I_dicnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_dicnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_dicnn.eps'))
end

%% ====== 6) PNN Method ======
file_pnn = 'pnn_wv3_os_ny';
load(strcat('2_DL_Result/WV3/PNN/', file_pnn, '.mat')) % load i-th image for DiCNN
I_pnn = val_bit*double(pnn_wv3_os_ny);

if ismember('PNN',algorithms)
    alg = alg + 1;
    %%% Quality indexes computation
    [D_lambda_pnn,D_S_pnn,QNRI_pnn] = indexes_evaluation_FS(I_pnn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_pnn,D_S_pnn,QNRI_pnn];
    MatrixImage(:,:,:,alg) = I_pnn;
    
    %showImage8LR(I_pnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_pnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_pnn.eps'))
end

%% ====== 7) APNN Method ======  (not true APNN, just a replacement!!)
    file_apnn = 'apnn_wv3_os_ny';
    load(strcat('2_DL_Result/WV3/APNN/', file_apnn, '.mat')) % load i-th image for DiCNN
    I_apnn = val_bit*double(apnn_wv3_os_ny);  % not right answer, just a replacement!

if ismember('APNN',algorithms)
    alg = alg + 1;
    %%% Quality indexes computation
    [D_lambda_apnn,D_S_apnn,QNRI_apnn] = indexes_evaluation_FS(I_apnn,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_apnn,D_S_apnn,QNRI_apnn];
    MatrixImage(:,:,:,alg) = I_apnn;
    
    %showImage8LR(I_apnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_apnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_apnn.eps'))
end

%% ====== 8) FusionNet Method ======
file_fusionnet = 'fusionnet_wv3_os_ny';
load(strcat('2_DL_Result/WV3/FusionNet/', file_fusionnet, '.mat')) % load i-th image for DiCNN
I_fusionnet = val_bit*double(fusionnet_wv3_os_ny);

if ismember('FusionNet',algorithms)
    alg = alg + 1;
    %%% Quality indexes computation
    [D_lambda_fusionnet,D_S_fusionnet,QNRI_fusionnet] = indexes_evaluation_FS(I_fusionnet,I_MS_LR,I_PAN,L,thvalues,I_MS,sensor,ratio,flagQNR);
    MatrixResults(alg,:) = [D_lambda_fusionnet,D_S_fusionnet,QNRI_fusionnet];
    MatrixImage(:,:,:,alg) = I_fusionnet;
    
    %showImage8LR(I_fusionnet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_fusionnet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_fusionnet.eps'))
end

%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% %%%%%%%%%%% Show and Save Results %%%%%%%%%%%%%%%%%%%%%%%%%%
%% Print in LATEX
if flagQNR == 1
    matrix2latex(MatrixResults,'FR_Assessment.tex', 'rowLabels',algorithms,'columnLabels',[{'DL'},{'DS'},{'QNR'}],'alignment','c','format', '%.4f');
else
    matrix2latex(MatrixResults,'FR_Assessment.tex', 'rowLabels',algorithms,'columnLabels',[{'DL'},{'DS'},{'HQNR'}],'alignment','c','format', '%.4f');
end

%% View All
if size(I_MS,3) == 4
    vect_index_RGB = [3,2,1];
else
    vect_index_RGB = [5,3,2];
end

titleImages = algorithms;
figure, showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,0);

%% ======Display the final average performance =======
fprintf('\n')
disp('#######################################################')
disp(['Display the performance for:'])
disp('#######################################################')
disp(' |====Q====|===Q_avg===|=====SAM=====|======ERGAS=======|=======SCC=======')
MatrixResults

%% %%%%%%%%%%% End %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Demo_Reduced_Resolution.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%For Reduced-Resolution%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  1) This is a test demo to show all reduced-resolution results of traditional and DL methods
%     Here, we take WV3 test dataset as example. Readers can change the corresponding director 
%     and setting to test other/your datasets
%  2) The codes of traditional methods are from the "pansharpening toolbox for distribution",
%     thus please cite the paper:
%     [1] G. Vivone, et al., A new benchmark based on recent advances in multispectral pansharpening: Revisiting
%         pansharpening with classical and emerging pansharpening methods, IEEE Geosci. Remote Sens. Mag., 
%         9(1): 53�C81, 2021
%  3) Also, if you use this toolbox, please cite our paper:
%     [2] L.-J. Deng, et al., Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks, 
%         IEEE Geosci. Remote Sens. Mag., 2022

%  LJ Deng (UESTC), 2020-02-27

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Note: the test datasets of reduced-resolution are still too huge to upload to
% GitHub, thus we provide cloud links to readers to download them to
% successfully run this demo, including:

% i) Download link for reduced-resolution WV3-NewYork example (named "NY1_WV3_RR.mat"):
%     http:********   (put into the folder of "1_TestData/Datasets Testing")

% ii) Download link of DL's results for reduced-resolution WV3-NewYork example:
%     http:********   (put into the folder of "'2_DL_Result/WV3")

% Once you have above datasets, you can run this demo successfully, then
% understand how this demo run!

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

clear; close all;
%% =======load directors========
% Tools
addpath([pwd,'/Tools']);

% Select algorithms to run
algorithms = {'GT','EXP','BT-H','BDSD-PC','C-GSA','SR-D',...
    'MTF-GLP-HPM-R','MTF-GLP-FS','TV','PanNet','DRPNN','MSDCNN','BDPN','DiCNN1','PNN','APNN','FusionNet'};
% director to save EPS figures for latex editing; if other dataset, please
% change the director correspondingly
satellite = 'WV3';
mat_name = 'NY1_WV3_RR';
data_name = strcat('3_EPS/', satellite, '/', mat_name);  

%% ==========Read Data and sensors' info====================
%% read the test dataset; if use your test dataset, please update in this folder
file_test = '1_TestData/Datasets Testing/NY1_WV3_RR.mat';

% get I_MS_LR, I_MS, I_PAN and sensors' info; 
load(file_test)  

% (Note: If there is no sensor's info in your dataset, 
% please find and update these info in the following commented lines):

%------ following are sensor's info for WV3 (an example for WV3)----
%     sensor = 'WV3';
%     Qblocks_size = 32;
%     bicubic = 0;% Interpolator
%     flag_cut_bounds = 1;% Cut Final Image
%     dim_cut = 21;% Cut Final Image
%     thvalues = 0;% Threshold values out of dynamic range
%     printEPS = 0;% Print Eps
%     ratio = 4;% Resize Factor
%     L = 11;% Radiometric Resolution

%% Initialization of the Matrix of Results
NumIndexes = 5;
MatrixResults = zeros(numel(algorithms),NumIndexes);
alg = 0;

% zoom-in interesting two regions of figure; you may change them
% according to your requirment
location1                = [50 70 10 30];  %default: data6: [10 50 1 60]; data7:[140 180 5 60]
location2                = [20 38 10 50];  %default: data6: [190 240 5 60]; data7:[190 235 120 150]

clear print

%% show I_MS_LR, I_GT, PAN Imgs:
if size(I_MS,3) == 4
    showImage4LR(I_MS_LR,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
else
    showImage8LR(I_MS_LR,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
end

% (Note: You may use following "showPan" without region zoom-in; otherwise, you can
% use "showPan_zoomin" for zoom-in visualization.)

%showPan(I_PAN,printEPS,2,flag_cut_bounds,dim_cut);
showPan_zoomin(I_PAN,printEPS,2,flag_cut_bounds,dim_cut, location1, location2);

% Note: eps figure is saved in "data_name" for latex editing
print('-depsc', strcat(data_name, '_pan', '.eps'))

%% ======GT ===================
if ismember('GT',algorithms)
    alg = alg + 1;
    [Q_avg_GT, SAM_GT, ERGAS_GT, SCC_GT_GT, Q_GT] = indexes_evaluation(I_GT,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_GT,Q_avg_GT,SAM_GT,ERGAS_GT,SCC_GT_GT];
    MatrixImage(:,:,:,alg) = I_GT;
    
    % (Note: You may use following "showImage8LR" without region zoom-in; otherwise, you can
    % use "showImage8_zoomin" for zoom-in visualization.) 
    
    %showImage8LR(I_GT,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_GT,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_gt', '.eps'))
end

%% ======EXP ===================
if ismember('EXP',algorithms)
    alg = alg + 1;
    [Q_avg_EXP, SAM_EXP, ERGAS_EXP, SCC_GT_EXP, Q_EXP] = indexes_evaluation(I_MS,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_EXP,Q_avg_EXP,SAM_EXP,ERGAS_EXP,SCC_GT_EXP];
    MatrixImage(:,:,:,alg) = I_MS;
    
    %showImage8LR(I_MS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_MS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_exp.eps'))
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%% CS-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%%
%% ====== 1) BT-H Method ======
if ismember('BT-H',algorithms)
    alg = alg + 1;
    
    cd BT-H
    t2=tic;
    I_BT_H = BroveyRegHazeMin(I_MS,I_PAN,ratio);
    time_BT_H = toc(t2);
    fprintf('Elaboration time BT-H: %.2f [sec]\n',time_BT_H);
    cd ..
    
    %%% Quality indexes computation
    [Q_avg_BT_H, SAM_BT_H, ERGAS_BT_H, SCC_GT_BT_H, Q_BT_H] = indexes_evaluation(I_BT_H,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_BT_H,Q_avg_BT_H,SAM_BT_H,ERGAS_BT_H,SCC_GT_BT_H];
    MatrixImage(:,:,:,alg) = I_BT_H;

    %showImage8LR(I_BT_H,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_BT_H,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_bth.eps'))
end

%% ====== 2) BDSD-PC Method ======
if ismember('BDSD-PC',algorithms)
    alg = alg + 1;
    
    cd BDSD
    t2=tic;
    I_BDSD_PC = BDSD_PC(I_MS,I_PAN,ratio,sensor);
    time_BDSD_PC = toc(t2);
    fprintf('Elaboration time BDSD-PC: %.2f [sec]\n',time_BDSD_PC);
    cd ..
    
    [Q_avg_BDSD_PC, SAM_BDSD_PC, ERGAS_BDSD_PC, SCC_GT_BDSD_PC, Q_BDSD_PC] = indexes_evaluation(I_BDSD_PC,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    
    MatrixResults(alg,:) = [Q_BDSD_PC,Q_avg_BDSD_PC,SAM_BDSD_PC,ERGAS_BDSD_PC,SCC_GT_BDSD_PC];
    MatrixImage(:,:,:,alg) = I_BDSD_PC;
    
    %showImage8LR(I_BDSD_PC,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_BDSD_PC,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_bdsd_pc.eps'))
end

%% ====== 3) C-GSA Method ======
if ismember('C-GSA',algorithms)
    alg = alg + 1;
    
    PS_algorithm = 'GSA'; % Pansharpening algorithm
    n_segm = 5; % Number of segments
    
    cd GS
    
    t2=tic;
    I_C_GSA = GS_Segm(I_MS,I_PAN,gen_LP_image(PS_algorithm,I_MS,I_PAN,I_MS_LR,ratio,sensor), k_means_clustering(I_MS,n_segm));
    time_C_GSA = toc(t2);
    fprintf('Elaboration time GSA: %.2f [sec]\n',time_C_GSA);
    cd ..
    
    [Q_avg_C_GSA, SAM_C_GSA, ERGAS_C_GSA, SCC_GT_C_GSA, Q_C_GSA] = indexes_evaluation(I_C_GSA,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_C_GSA,Q_avg_C_GSA,SAM_C_GSA,ERGAS_C_GSA,SCC_GT_C_GSA];
    MatrixImage(:,:,:,alg) = I_C_GSA;

    %showImage8LR(I_C_GSA,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_C_GSA,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_C_gsa.eps'))
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%% MRA-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%%
%% ====== 1) SR-D Method ======
if ismember('SR-D',algorithms)
    alg = alg + 1;
    
    %%%%%%%%%%%%%%%%%%%%%%%%%% Parameters setting %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    TS = 7; % Tiling (dimensions of the patches are TS x TS)
    ol = 4; % Overlap (in pixels) between contiguous tile
    n_atoms = 10; % Max number of representation atoms (default value = 10)
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    cd SR-D
    t2=tic;
    I_SR_D = CS(I_MS,I_PAN,I_MS_LR,ratio,sensor,TS,ol,n_atoms);
    time_SR_D = toc(t2);
    fprintf('Elaboration time SR_D: %.2f [sec]\n',time_SR_D);
    cd ..
    
    [Q_avg_SR_D, SAM_SR_D, ERGAS_SR_D, SCC_GT_SR_D, Q_SR_D] = indexes_evaluation(I_SR_D,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_SR_D,Q_avg_SR_D,SAM_SR_D,ERGAS_SR_D,SCC_GT_SR_D];
    MatrixImage(:,:,:,alg) = I_SR_D;

    %showImage8LR(I_SR_D,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_SR_D,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_SR_D.eps'))
end


%% ====== 2) MTF-GLP Method ======
if ismember('MTF-GLP-HPM-R',algorithms)
    alg = alg + 1;
    
    cd GLP
    t2=tic;
    I_MTF_GLP_HPM_R = MTF_GLP_HPM_R(I_MS,I_PAN,sensor,ratio);
    time_MTF_GLP_HPM_R = toc(t2);
    fprintf('Elaboration time MTF-GLP-HPM-R: %.2f [sec]\n',time_MTF_GLP_HPM_R);
    cd ..
    
    [Q_avg_MTF_GLP_HPM_R, SAM_MTF_GLP_HPM_R, ERGAS_MTF_GLP_HPM_R, SCC_GT_MTF_GLP_HPM_R, Q_MTF_GLP_HPM_R] = indexes_evaluation(I_MTF_GLP_HPM_R,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_MTF_GLP_HPM_R,Q_avg_MTF_GLP_HPM_R,SAM_MTF_GLP_HPM_R,ERGAS_MTF_GLP_HPM_R,SCC_GT_MTF_GLP_HPM_R];
    MatrixImage(:,:,:,alg) = I_MTF_GLP_HPM_R;
    
    %showImage8LR(I_MTF_GLP_HPM_R,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_MTF_GLP_HPM_R,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_mtfglp_hpm_r.eps'))
end

%% ====== 3) MTF-GLP-FS Method ======
if ismember('MTF-GLP-FS',algorithms)
    alg = alg + 1;
    
    cd GLP
    t2=tic;
    I_MTF_GLP_FS = MTF_GLP_FS(I_MS,I_PAN,sensor,ratio);
    time_MTF_GLP_FS = toc(t2);
    fprintf('Elaboration time MTF-GLP-FS: %.2f [sec]\n',time_MTF_GLP_FS);
    cd ..
    
    [Q_avg_MTF_GLP_FS, SAM_MTF_GLP_FS, ERGAS_MTF_GLP_FS, SCC_GT_MTF_GLP_FS, Q_MTF_GLP_FS] = indexes_evaluation(I_MTF_GLP_FS,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_MTF_GLP_FS,Q_avg_MTF_GLP_FS,SAM_MTF_GLP_FS,ERGAS_MTF_GLP_FS,SCC_GT_MTF_GLP_FS];
    MatrixImage(:,:,:,alg) = I_MTF_GLP_FS;
 
    %showImage8LR(I_MTF_GLP_FS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_MTF_GLP_FS,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_mtfglpfs.eps'))
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%% VO-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%%
%% ====== 1) TV Method ======
if ismember('TV',algorithms)
    alg = alg + 1;
    
    %%%%%%%%%%%%%%%%%%%%%%%%%% Parameters setting %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    switch sensor
        case 'IKONOS'
            w=[0.1091    0.2127    0.2928    0.3854];
            c = 8;
            alpha=1.064;
            maxiter=10;
            lambda = 0.47106;
        case {'GeoEye1','WV4'}
            w=[0.1552, 0.3959, 0.2902, 0.1587];
            c = 8;
            alpha=0.75;
            maxiter=50;
            lambda = 157.8954;
        case 'WV3'
            w=[0.0657    0.1012    0.1537    0.1473    0.1245    0.1545    0.1338    0.1192];
            c = 8;
            alpha=0.75;
            maxiter=50;
            lambda = 1.0000e-03;
    end
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    cd TV
    t2 = tic;
    I_TV = TV_pansharpen(I_MS_LR,I_PAN,alpha,lambda,c,maxiter,w);
    time_TV = toc(t2);
    fprintf('Elaboration time TV: %.2f [sec]\n',time_TV);
    cd ..
    
    [Q_avg_TV, SAM_TV, ERGAS_TV, SCC_GT_TV, Q_TV] = indexes_evaluation(I_TV,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_TV,Q_avg_TV,SAM_TV,ERGAS_TV,SCC_GT_TV];
    MatrixImage(:,:,:,alg) = I_TV;

    %showImage8LR(I_TV,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_TV,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_tv.eps'))
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%% DL-based Methods %%%%%%%%%%%%%%%%%%%%%%%%%%
%% ====== 1) PanNet Method ======
% if you use other sensor's data, please update the following director and
% DL result. Note that the DL results here are obtained from our "01-DL toolbox (Pytorch)" folder, please check it.
% Similar operation for following other DL methods.


% (Note: val_bit = 2047 for 11-bit WV3, WV4 and QB data; val_bit = 1023 for 10-bit GF2 data)
val_bit  = 2047;


if ismember('PanNet',algorithms)
%     file_pannet = 'output';
    load(strcat('2_DL_Result/', satellite, '/PanNet/', 'output_', mat_name, '.mat')) 
    I_pannet = double(sr);
    alg = alg + 1;
    [Q_avg_pannet, SAM_pannet, ERGAS_pannet, SCC_pannet, Q_pannet] = indexes_evaluation(I_pannet,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_pannet,Q_avg_pannet,SAM_pannet,ERGAS_pannet,SCC_pannet];
    MatrixImage(:,:,:,alg) = I_pannet;
    
    %showImage8LR(I_pannet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_pannet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_pannet.eps'))
end

%% ====== 2) DRPNN Method ======
if ismember('DRPNN',algorithms)
%     file_drpnn = 'drpnn_wv3_rs_ny';
    load(strcat('2_DL_Result/', satellite, '/DRPNN/', 'output_', mat_name, '.mat')) 
    I_drpnn = double(sr);
    alg = alg + 1;
    [Q_avg_drpnn, SAM_drpnn, ERGAS_drpnn, SCC_drpnn, Q_drpnn] = indexes_evaluation(I_drpnn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_drpnn,Q_avg_drpnn,SAM_drpnn,ERGAS_drpnn,SCC_drpnn];
    MatrixImage(:,:,:,alg) = I_drpnn;
    
    %showImage8LR(I_drpnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_drpnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_drpnn.eps'))
end

%% ====== 3) MSDCNN Method ======

if ismember('MSDCNN',algorithms)
%     file_msdcnn = 'msdcnn_wv3_rs_ny';
    load(strcat('2_DL_Result/', satellite, '/MSDCNN/', 'output_', mat_name, '.mat')) 
    I_msdcnn = double(sr);
    alg = alg + 1;
    [Q_avg_msdcnn, SAM_msdcnn, ERGAS_msdcnn, SCC_msdcnn, Q_msdcnn] = indexes_evaluation(I_msdcnn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_msdcnn,Q_avg_msdcnn,SAM_msdcnn,ERGAS_msdcnn,SCC_msdcnn];
    MatrixImage(:,:,:,alg) = I_msdcnn;
    
    %showImage8LR(I_msdcnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_msdcnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_msdcnn.eps'))
end

%% ====== 4) BDPN Method ======
if ismember('BDPN',algorithms)
%     file_bdpn  = 'bdpn_wv3_rs_ny';
    load(strcat('2_DL_Result/', satellite, '/BDPN/', 'output_', mat_name, '.mat')) 
    I_bdpn  = double(sr);
    alg = alg + 1;
    [Q_avg_bdpn, SAM_bdpn, ERGAS_bdpn, SCC_bdpn, Q_bdpn] = indexes_evaluation(I_bdpn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_bdpn,Q_avg_bdpn,SAM_bdpn,ERGAS_bdpn,SCC_bdpn];
    MatrixImage(:,:,:,alg) = I_bdpn;
    
    %showImage8LR(I_bdpn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_bdpn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_bdpn.eps'))
end

%% ====== 5) DiCNN Method ======


if ismember('DiCNN1',algorithms)
%     file_dicnn = 'dicnn_wv3_rs_ny';
    load(strcat('2_DL_Result/', satellite, '/DiCNN1/', 'output_', mat_name, '.mat')) 
    I_dicnn = double(sr);
    alg = alg + 1;
    [Q_avg_dicnn, SAM_dicnn, ERGAS_dicnn, SCC_dicnn, Q_dicnn] = indexes_evaluation(I_dicnn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_dicnn,Q_avg_dicnn,SAM_dicnn,ERGAS_dicnn,SCC_dicnn];
    MatrixImage(:,:,:,alg) = I_dicnn;
    
    %showImage8LR(I_dicnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_dicnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_dicnn.eps'))
end

%% ====== 6) PNN Method ======
if ismember('PNN',algorithms)
%     file_pnn = 'pnn_wv3_rs_ny';
    load(strcat('2_DL_Result/', satellite ,'/PNN/', 'output_', mat_name, '.mat')) 
    I_pnn = double(sr);
    alg = alg + 1;
    [Q_avg_pnn, SAM_pnn, ERGAS_pnn, SCC_pnn, Q_pnn] = indexes_evaluation(I_pnn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_pnn,Q_avg_pnn,SAM_pnn,ERGAS_pnn,SCC_pnn];
    MatrixImage(:,:,:,alg) = I_pnn;
    
    %showImage8LR(I_pnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_pnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_pnn.eps'))
end

%% ====== 7) APNN Method ======
if ismember('APNN',algorithms)
%     file_apnn = 'apnn_wv3_rs_ny';
    load(strcat('2_DL_Result/', satellite, '/APNN/', 'output_', mat_name, '.mat')) 
    I_apnn = double(sr);
    alg = alg + 1;
    [Q_avg_apnn, SAM_apnn, ERGAS_apnn, SCC_apnn, Q_apnn] = indexes_evaluation(I_apnn,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_apnn,Q_avg_apnn,SAM_apnn,ERGAS_apnn,SCC_apnn];
    MatrixImage(:,:,:,alg) = I_apnn;
    
    %showImage8LR(I_apnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_apnn,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_apnn.eps'))
end

%% ====== 8) FusionNet Method ======

if ismember('FusionNet',algorithms)
%     file_fusionnet = 'fusionnet_wv3_rs_ny';
%     load(strcat('2_DL_Result/', satellite ,'/FusionNet/', 'output_',mat_name, '.mat'));
    load(strcat('2_DL_Result/', satellite ,'/FusionNet/', 'fusionnet_wv3_rs_ny', '.mat'));
    I_fusionnet = val_bit * double(sr);
    alg = alg + 1;
    [Q_avg_fusionnet, SAM_fusionnet, ERGAS_fusionnet, SCC_fusionnet, Q_fusionnet] = indexes_evaluation(I_fusionnet,I_GT,ratio,L,Qblocks_size,flag_cut_bounds,dim_cut,thvalues);
    MatrixResults(alg,:) = [Q_fusionnet,Q_avg_fusionnet,SAM_fusionnet,ERGAS_fusionnet,SCC_fusionnet];
    MatrixImage(:,:,:,alg) = I_fusionnet;
    
    %showImage8LR(I_fusionnet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L,ratio);
    showImage8_zoomin(I_fusionnet,printEPS,1,flag_cut_bounds,dim_cut,thvalues,L, location1, location2);
    print('-depsc', strcat(data_name, '_fusionnet.eps'))
end

%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% %%%%%%%%%%% Show and Save Results %%%%%%%%%%%%%%%%%%%%%%%%%%
%% Print in LATEX
matrix2latex(MatrixResults(:,[1,3,4]),'RR_Assessment.tex', 'rowLabels',algorithms,'columnLabels',[{'Q2n'},{'SAM'},{'ERGAS'}],'alignment','c','format', '%.4f');

%% View All
if size(I_GT,3) == 4
    vect_index_RGB = [3,2,1];
else
    vect_index_RGB = [5,3,2];
end

titleImages = algorithms;
figure, showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,0);

%% ======Display the final average performance =======
fprintf('\n')
disp('#######################################################')
disp(['Display the performance for:'])
disp('#######################################################')
disp(' |====Q====|===Q_avg===|=====SAM=====|======ERGAS=======|=======SCC=======')
MatrixResults

%% %%%%%%%%%%% End %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/FE-HPM/FE.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           FE estimates the estraction detail filter via deconvolution.  
% 
% Interface:
%           PSF_l = FE(I_MS,I_PAN,ratio,tap,lambda,mu,th,num_iter,filtername)
%
% Inputs:
%           I_MS:               MS image upsampled at PAN scale;
%           I_PAN:              PAN image;
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value.
%           tap:                Filter support;
%           lambda:             Coefficient for weighting the energy regularization term;
%           mu:                 Coefficient for weighting the derivative regularization terms;
%           th:                 Threshold on the kernel (it cuts to 0 values below threshold);
%           num_iter:           Max number of iteration (at least 3; not sensitive);    
%           filtername:         Kind of derivative (default: 'Basic')       
%
% Output:
%           PSF_l:              Estimated point spread function.
% 
% Reference:
%           [Vivone15]      G. Vivone, M. Simoes, M. Dalla Mura, R. Restaino, J. Bioucas-Dias, G. A. Licciardi, and J. Chanussot, "Pansharpening based on semiblind deconvolution", 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 4, pp. 1997-2010, 2015.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function PSF_l = FE(I_MS,I_PAN,ratio,tap,lambda,mu,th,num_iter,filtername)

if rem(tap,2) == 0
    sum_tap = 0;
else
    sum_tap = 1;
end
tap = floor(tap/2);

[R_SIZE,C_SIZE] = size(I_PAN);

switch filtername
    case 'Naive2'
        gv = zeros(2,1);
        gv(1,1) = -1;
        gv(2,1) = 1;
        
        gh = zeros(1,2);
        gh(1,1) = -1;
        gh(1,2) = 1;
    case 'Naive3'
        gv = zeros(3,1);
        gv(1,1) = -1;
        gv(3,1) = 1;
        
        gh = zeros(1,3);
        gh(1,1) = -1;
        gh(1,3) = 1;
    case 'Basic'
        gv = zeros(2,2);
        gv(1,:) = -1;
        gv(2,:) = 1;
        
        gh = zeros(2,2);
        gh(:,1) = -1;
        gh(:,2) = 1;
    case 'Prewitt'
        gv = zeros(3,3);
        gv(1,:) = -1;
        gv(3,:) = 1;
        
        gh = zeros(3,3);
        gh(:,1) = -1;
        gh(:,3) = 1;
    case 'Sobel'
        gv = zeros(3,3);
        gv(1,1) = -1;gv(1,2) = -2;gv(1,3) = -1;
        gv(3,1) = +1;gv(3,2) = +2;gv(3,3) = +1;
        
        gh = zeros(3,3);
        gh(1,1) = -1;gh(2,1) = -2;gh(3,1) = -1;
        gh(1,3) = +1;gh(2,3) = +2;gh(3,3) = +1;
    otherwise
        gv = zeros(2,2);
        gv(1,:) = -1;
        gv(2,:) = 1;
        
        gh = zeros(2,2);
        gh(:,1) = -1;
        gh(:,2) = 1;
end

gvf = fft2(gv,R_SIZE,C_SIZE);
ghf = fft2(gh,R_SIZE,C_SIZE);

gvfc = conj(gvf);
ghfc = conj(ghf);

gvf2 = gvfc .* gvf;
ghf2 = ghfc .* ghf;

gf2sum = gvf2 + ghf2;

H_E = double(I_PAN);

for jj = 1 : num_iter
    
    %%% Filter PAN to estimate alpha set
    if jj == 1
        PAN_LP = LPfilter(H_E,ratio);
    else
        PAN_LP = imfilter(H_E,PSF_l,'replicate');
    end
    
    %%% Estimate alpha
    alpha(1,1,:) = estimation_alpha(cat(3,I_MS,ones(size(I_MS,1),size(I_MS,2))),PAN_LP,'global');
    
    It_E = sum(cat(3,I_MS,ones(size(I_MS,1),size(I_MS,2))) .* repmat(alpha,[size(I_MS,1) size(I_MS,2) 1]),3); 

    %%% Edge taper
    H_E = edgetaper(H_E,ones(tap,tap)./((tap)^2));
    It_E = edgetaper(It_E,ones(tap,tap)./((tap)^2));

    %%% Filter Estimation
    PSF = real(fftshift(ifft2(conj(fft2(H_E)).* fft2(It_E)./(abs(fft2(H_E)).^2 + lambda + mu * gf2sum ))));
    
    %%% Thresholding
    PSF(PSF < th) = 0;
    
    %%% Cut using the support dimension and center
    [~, maxIndex] = max(PSF(:));
    [rm, cm] = ind2sub(size(PSF), maxIndex);
    PSF_l = PSF(rm - tap : rm + tap - 1 + sum_tap, cm - tap : cm + tap - 1 + sum_tap);
    PSF_l = PSF_l ./ sum(PSF_l(:));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/FE-HPM/FE_HPM.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           FE_HPM fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the high pass modulation injection model and the estimated filter via deconvolution. 
% 
% Interface:
%           [I_Fus,D,PSF_l] = FE_HPM(I_MS,I_PAN,ratio,tap,lambda,mu,th,num_iter,filtername)
%
% Inputs:
%           I_MS:               MS image upsampled at PAN scale;
%           I_PAN:              PAN image;
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value.
%           tap:                Filter support;
%           lambda:             Coefficient for weighting the energy regularization term;
%           mu:                 Coefficient for weighting the derivative regularization terms;
%           th:                 Threshold on the kernel (it cuts to 0 values below threshold);
%           num_iter_max:       Max number of iteration (at least 3; not sensitive);    
%           filtername:         Kind of derivative (default: 'Basic')       
%
% Outputs:
%           I_Fus,D:            Pansharpened image;
%           PSF_l:              Estimated point spread function.
% 
% Reference:
%           [Vivone15]      G. Vivone, M. Simoes, M. Dalla Mura, R. Restaino, J. Bioucas-Dias, G. A. Licciardi, and J. Chanussot, "Pansharpening based on semiblind deconvolution", 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 4, pp. 1997-2010, 2015.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [I_Fus,PSF_l] = FE_HPM(I_MS,I_PAN,ratio,tap,lambda,mu,th,num_iter,filtername)

imageHR = double(I_PAN);
I_MS = double(I_MS);
nBands = size(I_MS,3);

%%% Equalization
imageHR = repmat(imageHR,[1 1 size(I_MS,3)]);
for ii = 1 : size(I_MS,3)    
  imageHR(:,:,ii) = (imageHR(:,:,ii) - mean2(imageHR(:,:,ii))).*(std2(I_MS(:,:,ii))./std2(imageHR(:,:,ii))) + mean2(I_MS(:,:,ii));  
end

PSF_l = FE(I_MS,I_PAN,ratio,tap,lambda,mu,th,num_iter,filtername);

PAN_LP = zeros(size(imageHR));
for ii = 1 : nBands
    PAN_LP(:,:,ii) = imfilter(imageHR(:,:,ii),PSF_l,'replicate');
    t = imresize(PAN_LP(:,:,ii),1/ratio,'nearest');
    PAN_LP(:,:,ii) = interp23tap(t,ratio);
end

PAN_LP = double(PAN_LP);

I_Fus = I_MS .* (imageHR ./ (PAN_LP + eps));

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/FR_Assessment.tex
================================================


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/GS2_GLP.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           GS2_GLP fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the Gram-Schmidt (GS) mode 2 algorithm with Generalized Laplacian Pyramid (GLP) decomposition.
% 
% Interface:
%           I_Fus_GS2_GLP = GS2_GLP(I_MS,I_PAN,ratio,sensor)
%
% Inputs:
%           I_MS:           MS image upsampled at PAN scale;
%           I_PAN:          PAN image;
%           ratio:          Scale ratio between MS and PAN. Pre-condition: Integer value;
%           sensor:         String for type of sensor (e.g. 'WV2','IKONOS').
%
% Outputs:
%           I_Fus_GS2_GLP:  GS2_GLP pasharpened image.
% 
% References:
%           [Aiazzi06]      B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery,
%                           Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006.
%           [Alparone07]    L. Alparone, L. Wald, J. Chanussot, C. Thomas, P. Gamba, and L. M. Bruce, Comparison of pansharpening algorithms: Outcome
%                           of the 2006 GRS-S Data Fusion Contest, IEEE Transactions on Geoscience and Remote Sensing, vol. 45, no. 10, pp. 30123021,
%                           October 2007.
%           [Vivone15]      G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_GS2_GLP = GS2_GLP(I_MS,I_PAN,ratio,sensor)

imageLR = double(I_MS);
imageHR = double(I_PAN);

imageHR = repmat(imageHR,[1 1 size(imageLR,3)]);

h = genMTF(ratio, sensor, size(I_MS,3));

PAN_LP = zeros(size(I_MS));
for ii = 1 : size(I_MS,3)
    PAN_LP(:,:,ii) = imfilter(imageHR(:,:,ii),real(h(:,:,ii)),'replicate');
    t = imresize(PAN_LP(:,:,ii),1/ratio,'nearest');
    PAN_LP(:,:,ii) = interp23tap(t,ratio);
end

PAN_LP = double(PAN_LP);

%%% Coefficients
g = ones(1,size(I_MS,3));
for ii = 1 : size(I_MS,3)
    h = imageLR(:,:,ii);
    h2 = PAN_LP(:,:,ii);
    c = cov(h2(:),h(:));
    g(ii) = c(1,2)/var(h2(:));
end

%%% Detail Extraction
delta = imageHR - PAN_LP;

I_Fus_GS2_GLP = zeros(size(imageLR));

for ii = 1 : size(imageLR,3)
    I_Fus_GS2_GLP(:,:,ii) = imageLR(:,:,ii) + delta(:,:,ii) .* g(ii);
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/MTF_GLP.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           MTF_GLP fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the Modulation Transfer Function - Generalized Laplacian Pyramid (MTF-GLP) algorithm. 
% 
% Interface:
%           I_Fus_MTF_GLP = MTF_GLP(I_MS,I_PAN,sensor,ratio)
%
% Inputs:
%           I_MS:               MS image upsampled at PAN scale;
%           I_PAN:              PAN image;
%           sensor:             String for type of sensor (e.g. 'WV2','IKONOS');
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value.
%
% Outputs:
%           I_Fus_MTF_GLP:      MTF_GLP pansharpened image.
% 
% References:
%           [Aiazzi02]          B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on
%                               oversampled multiresolution analysis, IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 23002312, October
%                               2002.
%           [Aiazzi06]          B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery,
%                               Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006.
%           [Vivone14a]         G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral
%                               image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014.
%           [Vivone15a]         G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%           [Alparone17]        L. Alparone, A. Garzelli, and G. Vivone, "Intersensor statistical matching for pansharpening: Theoretical issues and practical solutions",
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 55, no. 8, pp. 4682-4695, 2017.
%           [Vivone20]          G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                               IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 2
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_MTF_GLP = MTF_GLP(I_MS,I_PAN,sensor,ratio)

imageHR = double(I_PAN);
I_MS = double(I_MS);

%%% Equalization
imageHR = repmat(imageHR,[1 1 size(I_MS,3)]);

for ii = 1 : size(I_MS,3)    
   imageHR(:,:,ii) = (imageHR(:,:,ii) - mean2(imageHR(:,:,ii))).*(std2(I_MS(:,:,ii))./std2(LPfilterGauss(imageHR(:,:,ii),ratio))) + mean2(I_MS(:,:,ii));
end

h = genMTF(ratio, sensor, size(I_MS,3));

PAN_LP = zeros(size(I_MS));
for ii = 1 : size(I_MS,3)
    PAN_LP(:,:,ii) = imfilter(imageHR(:,:,ii),real(h(:,:,ii)),'replicate');
    t = imresize(PAN_LP(:,:,ii),1/ratio,'nearest');    
    PAN_LP(:,:,ii) = interp23tap(t,ratio);
end

I_Fus_MTF_GLP = I_MS + imageHR - PAN_LP;

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/MTF_GLP_FS.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           MTF_GLP_FS fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the Modulation Transfer Function - Generalized Laplacian Pyramid (MTF-GLP) and a new Full Resolution Regression-based injection model. 
% 
% Interface:
%           I_Fus_MTF_GLP_FS = MTF_GLP_FS(I_MS,I_PAN,sensor,ratio)
%
% Inputs:
%           I_MS:               MS image upsampled at PAN scale;
%           I_PAN:              PAN image;
%           sensor:             String for type of sensor (e.g. 'WV2','IKONOS');
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value.
%
% Outputs:
%           I_Fus_MTF_GLP_FS:	Pansharpened image.
% 
% Reference:
%           [Vivone18]          G. Vivone, R. Restaino,and J. Chanussot, "Full scale regression-based injection coefficients for panchromatic sharpening," 
%                               IEEE Transactions on Image Processing, vol. 27, no. 7, pp. 3418-3431, Jul. 2018.
%           [Vivone20]          G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                               IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_MTF_GLP_FS = MTF_GLP_FS(I_MS,I_PAN,sensor,ratio)

imageHR = double(I_PAN);
I_MS = double(I_MS);

h = genMTF(ratio, sensor, size(I_MS,3));

I_Fus_MTF_GLP_FS = zeros(size(I_MS));
for ii = 1 : size(I_MS,3)
    %%% Low resolution PAN image
    PAN_LP = imfilter(imageHR,real(h(:,:,ii)),'replicate');
    t = imresize(PAN_LP,1/ratio,'nearest');    
    PAN_LP = interp23tap(t,ratio);
    
    %%% Injection coefficient for band ii    
    MSB = I_MS(:,:,ii);
    CMSPAN = cov(MSB(:), imageHR(:));    
    CPANPANLR = cov(PAN_LP(:), imageHR(:));
    gFS = CMSPAN(1,2)./CPANPANLR(1,2);
    
    %%% Fusion rule
    I_Fus_MTF_GLP_FS(:,:,ii) = I_MS(:,:,ii) + gFS .* (imageHR - PAN_LP);
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/MTF_GLP_HPM.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           MTF_GLP_HPM fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the Modulation Transfer Function - Generalized Laplacian Pyramid (MTF-GLP) with High Pass Modulation (HPM) injection model algorithm. 
% 
% Interface:
%           I_Fus_MTF_GLP_HPM = MTF_GLP_HPM(I_MS,I_PAN,sensor,ratio)
%
% Inputs:
%           I_MS:               MS image upsampled at PAN scale;
%           I_PAN:              PAN image;
%           sensor:             String for type of sensor (e.g. 'WV2','IKONOS');
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value.
%
% Outputs:
%           I_Fus_MTF_GLP_HPM:  MTF_GLP_HPM pansharpened image.
% 
% References:
%           [Aiazzi03]          B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, An MTF-based spectral distortion minimizing model for Pan-sharpening
%                               of very high resolution multispectral images of urban areas, in Proceedings of URBAN 2003: 2nd GRSS/ISPRS Joint Workshop on
%                               Remote Sensing and Data Fusion over Urban Areas, 2003, pp. 9094.
%           [Aiazzi06]          B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery,
%                               Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006.
%           [Vivone14a]         G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral
%                               image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014.
%           [Vivone15]          G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%           [Alparone17]        L. Alparone, A. Garzelli, and G. Vivone, "Intersensor statistical matching for pansharpening: Theoretical issues and practical solutions",
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 55, no. 8, pp. 4682-4695, 2017.
%           [Vivone20]          G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                               IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_MTF_GLP_HPM = MTF_GLP_HPM(I_MS,I_PAN,sensor,ratio)

imageHR = double(I_PAN);
I_MS = double(I_MS);

%%% Equalization
imageHR = repmat(imageHR,[1 1 size(I_MS,3)]);

for ii = 1 : size(I_MS,3)    
  imageHR(:,:,ii) = (imageHR(:,:,ii) - mean2(imageHR(:,:,ii))).*(std2(I_MS(:,:,ii))./std2(LPfilterGauss(imageHR(:,:,ii),ratio))) + mean2(I_MS(:,:,ii));  
end


h = genMTF(ratio, sensor, size(I_MS,3));

PAN_LP = zeros(size(I_MS));
for ii = 1 : size(I_MS,3)
    PAN_LP(:,:,ii) = imfilter(imageHR(:,:,ii),real(h(:,:,ii)),'replicate');
    t = imresize(PAN_LP(:,:,ii),1/ratio,'nearest');
    PAN_LP(:,:,ii) = interp23tap(t,ratio);
end

I_Fus_MTF_GLP_HPM = I_MS .* (imageHR ./ (PAN_LP + eps));

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/MTF_GLP_HPM_Haze_min.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Gaussian Laplacian Pyramid with high pass modulation injection model haze corrected.
% 
% Interface:
%           I_Fus_MTF_GLP_HPM = MTF_GLP_HPM_Haze_min(I_PAN,I_MS,sensor,ratio,decimation)
%
% Inputs:
%           I_MS:           MS image upsampled at PAN scale;
%           I_PAN:          PAN image;
%           sensor:         String for type of sensor (e.g. 'WV2','IKONOS');
%           ratio:          Scale ratio between MS and PAN. Pre-condition: Integer value;
%           decimation:     Flag decimation (1: decimated PAN_LP).
%
% Outputs:
%           I_Fus_MTF_GLP_HPM:  Pansharpened image.
% 
% References:
%           [Lolli17]       S. Lolli, L. Alparone, A. Garzelli, and G. Vivone, "Haze correction for contrast-based multispectral pansharpening",
%                           IEEE Geoscience and Remote Sensing Letters, vol. 14, no. 12, pp. 2255-2259, 2017.
%           [Garzelli18]    A. Garzelli, B. Aiazzi, L. Alparone, S. Lolli, and G. Vivone, 
%                           "Multispectral Pansharpening with Radiative Transfer-Based Detail-Injection Modeling for Preserving Changes in Vegetation Cover",
%                           MDPI Remote Sensing, vol. 10, no. 8, pp. 1 - 18, 2018.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_MTF_GLP_HPM = MTF_GLP_HPM_Haze_min(I_MS,I_PAN,sensor,ratio,decimation)

if size(I_MS,3) == 4
    prc = 1;
    minMS = zeros(1,1,4);
    B = I_MS(:,:,1);
    G = I_MS(:,:,2);
    R = I_MS(:,:,3);
    NIR = I_MS(:,:,4);
    minMS(1,1,1) = 0.95 * prctile(B(:),prc);
    minMS(1,1,2) = 0.45 * prctile(G(:),prc);
    minMS(1,1,3) = 0.40 * prctile(R(:),prc);
    minMS(1,1,4) = 0.05 * prctile(NIR(:),prc);
else
    minMS = zeros(1,1,size(I_MS,3));
    for ii = 1 : size(I_MS, 3)
       minMS(1,1,ii) = min(min(I_MS(:,:,ii)));  
    end
end

I_PAN_LR = LPfilterGauss(I_PAN,ratio);
w = estimation_alpha(cat(3,ones(size(I_PAN_LR)),I_MS),I_PAN_LR,'global');
wp = w' * [1;squeeze(minMS)]; 

L = repmat(minMS, [size(I_MS,1) size(I_MS,2)]);
Lp = wp .* ones([size(I_MS,1) size(I_MS,2)]);

imageHR = double(I_PAN);
I_MS = double(I_MS);

%%% Equalization
imageHR = repmat(imageHR,[1 1 size(I_MS,3)]);

PAN_LP = MTF(imageHR,sensor,ratio);

if decimation
    for ii = 1 : size(I_MS,3)
        t = imresize(PAN_LP(:,:,ii),1/ratio,'nearest');
        PAN_LP(:,:,ii) = interp23tap(t,ratio);
    end
end

P_PL = (imageHR - Lp) ./ (PAN_LP - Lp + eps);

MS_L = I_MS - L;

I_Fus_MTF_GLP_HPM = MS_L .* P_PL + L;

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GLP/MTF_GLP_HPM_R.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           A Regression-Based High-Pass Modulation Pansharpening Approach (Global Version) 
% 
% Interface:
%           I_Fus_MTF_GLP_HPM_R = MTF_GLP_HPM_R(I_MS,I_PAN,sensor,ratio)
%
% Inputs:
%           I_MS:                   MS image upsampled at PAN scale;
%           I_PAN:                  PAN image;
%           sensor:                 String for type of sensor (e.g. 'WV2','IKONOS');
%           ratio:                  Scale ratio between MS and PAN. Pre-condition: Integer value.
%
% Outputs:
%           I_Fus_MTF_GLP_HPM_R:    Pansharpened image.
% 
% Reference:
%           [Vivone18]              G. Vivone, R. Restaino, and J. Chanussot, "A regression-based high-pass modulation pansharpening approach," 
%                                   IEEE Transactions on Geoscience and Remote Sensing, vol. 56, no. 2, pp. 984-996, Feb. 2018.
%           [Vivone20]              G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                                   IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_MTF_GLP_HPM_R = MTF_GLP_HPM_R(I_MS,I_PAN,sensor,ratio)

imageHR = double(I_PAN);
I_MS = double(I_MS);

h = genMTF(ratio, sensor, size(I_MS,3));

I_Fus_MTF_GLP_HPM_R = zeros(size(I_MS));
for ii = 1 : size(I_MS,3)
    %%% Low resolution PAN image
    PAN_LP = imfilter(imageHR,real(h(:,:,ii)),'replicate');
    t = imresize(PAN_LP,1/ratio,'nearest');
    PAN_LP = interp23tap(t,ratio);
    
    %%%% Regression coefficients
    MSB = I_MS(:,:,ii);
	C = cov(MSB(:),PAN_LP(:));
    g = C(1,2)./C(2,2);
    cb = mean(MSB(:))./g - mean(imageHR(:));
        
    %%% Fusion rule
    I_Fus_MTF_GLP_HPM_R(:,:,ii) = I_MS(:,:,ii) .* (imageHR + cb) ./ (PAN_LP + cb + eps);
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GS/GS.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           GS fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the Gram-Schmidt (GS) transformation.
% 
% Interface:
%           I_Fus_GS = GS(I_MS,I_PAN)
%
% Inputs:
%           I_MS:       MS image upsampled at PAN scale;
%           I_PAN:      PAN image.
%
% Outputs:
%           I_Fus_GS:   GS pasharpened image.
% 
% References:
%           [Laben00]   C. A. Laben and B. V. Brower, Process for enhancing the spatial resolution of multispectral imagery using pan-sharpening, Eastman
%                       Kodak Company, Tech. Rep. US Patent # 6,011,875, 2000.
%           [Aiazzi07]  B. Aiazzi, S. Baronti, and M. Selva, Improving component substitution Pansharpening through multivariate regression of MS+Pan
%                       data, IEEE Transactions on Geoscience and Remote Sensing, vol. 45, no. 10, pp. 32303239, October 2007.
%           [Vivone15]  G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                       IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%           [Vivone20]  G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                       IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_GS = GS(I_MS,I_PAN)

imageLR = double(I_MS);
imageHR = double(I_PAN);

%%% Remove means from imageLR
imageLR0 = zeros(size(I_MS));
for ii = 1 : size(I_MS,3), imageLR0(:,:,ii) = imageLR(:,:,ii) - mean2(imageLR(:,:,ii)); end

%%% Intensity
I = mean(imageLR,3); 

%%% Remove mean from I
I0 = I - mean2(I);

imageHR = (imageHR - mean2(imageHR)) .* (std2(I0)./std2(imageHR)) + mean2(I0);

%%% Coefficients
g = ones(1,1,size(I_MS,3)+1);
for ii = 1 : size(I_MS,3)
    h = imageLR0(:,:,ii);
    c = cov(I0(:),h(:));
    g(1,1,ii+1) = c(1,2)/var(I0(:));
end

%%% Detail Extraction
delta = imageHR - I0;
deltam = repmat(delta(:),[1 size(I_MS,3)+1]);

%%% Fusion
V = I0(:);
for ii = 1 : size(I_MS,3)
    h = imageLR0(:,:,ii);
    V = cat(2,V,h(:));
end

gm = zeros(size(V));
for ii = 1 : size(g,3)
    gm(:,ii) = squeeze(g(1,1,ii)) .* ones(size(I_MS,1).*size(I_MS,2),1);
end

V_hat = V + deltam .* gm;

%%% Reshape fusion result
I_Fus_GS = reshape(V_hat(:,2:end),[size(I_MS,1) size(I_MS,2) size(I_MS,3)]);

% Final Mean Equalization
for ii = 1 : size(I_MS,3)
    h = I_Fus_GS(:,:,ii);
    I_Fus_GS(:,:,ii) = h - mean2(h) + mean2(imageLR(:,:,ii));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GS/GSA.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           GSA fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the Gram-Schmidt Adaptive (GSA) algorithm.
% 
% Interface:
%           I_Fus_GSA = GSA(I_MS,I_PAN,I_MS_LR,ratio)
%
% Inputs:
%           I_MS:       MS image upsampled at PAN scale;
%           I_PAN:      PAN image;
%           I_MS_LR:    MS image;
%           ratio:      Scale ratio between MS and PAN. Pre-condition: Integer value.
%
% Outputs:
%           I_Fus_GSA:  GSA pasharpened image.
% 
% References:
%           [Aiazzi07]  B. Aiazzi, S. Baronti, and M. Selva, Improving component substitution Pansharpening through multivariate regression of MS+Pan
%                       data, IEEE Transactions on Geoscience and Remote Sensing, vol. 45, no. 10, pp. 32303239, October 2007.
%           [Vivone15]  G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                       IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%           [Vivone20]  G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                       IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function I_Fus_GSA = GSA(I_MS,I_PAN,I_MS_LR,ratio)

imageLR = double(I_MS);
imageHR = double(I_PAN);
imageLR_LP = double(I_MS_LR);

%%% Remove means from imageLR
imageLR0 = zeros(size(I_MS));
for ii = 1 : size(I_MS,3), imageLR0(:,:,ii) = imageLR(:,:,ii) - mean2(imageLR(:,:,ii)); end

%%% Remove means from imageLR_LP
imageLR_LP0 = zeros(size(I_MS_LR));
for ii = 1 : size(I_MS_LR,3), imageLR_LP0(:,:,ii) = imageLR_LP(:,:,ii) - mean2(imageLR_LP(:,:,ii)); end


%%% Intensity
imageHR0 = imageHR - mean2(imageHR);
imageHR0 = LPfilterPlusDec(imageHR0,ratio);
alpha(1,1,:) = estimation_alpha(cat(3,imageLR_LP0,ones(size(I_MS_LR,1),size(I_MS_LR,2))),imageHR0,'global');
I = sum(cat(3,imageLR0,ones(size(I_MS,1),size(I_MS,2))) .* repmat(alpha,[size(I_MS,1) size(I_MS,2) 1]),3); 

%%% Remove mean from I
I0 = I - mean2(I);

%%% Coefficients
g = ones(1,1,size(I_MS,3)+1);
for ii = 1 : size(I_MS,3)
    h = imageLR0(:,:,ii);
    c = cov(I0(:),h(:));
    g(1,1,ii+1) = c(1,2)/var(I0(:));
end

imageHR = imageHR - mean2(imageHR);

%%% Detail Extraction
delta = imageHR - I0;
deltam = repmat(delta(:),[1 size(I_MS,3)+1]);

%%% Fusion
V = I0(:);
for ii = 1 : size(I_MS,3)
    h = imageLR0(:,:,ii);
    V = cat(2,V,h(:));
end

gm = zeros(size(V));
for ii = 1 : size(g,3)
    gm(:,ii) = squeeze(g(1,1,ii)) .* ones(size(I_MS,1).*size(I_MS,2),1);
end

V_hat = V + deltam .* gm;

%%% Reshape fusion result
I_Fus_GSA = reshape(V_hat(:,2:end),[size(I_MS,1) size(I_MS,2) size(I_MS,3)]);

%%% Final Mean Equalization
for ii = 1 : size(I_MS,3)
    h = I_Fus_GSA(:,:,ii);
    I_Fus_GSA(:,:,ii) = h - mean2(h) + mean2(imageLR(:,:,ii));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/GS/GS_Segm.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           GS_Segm fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the segmentation-based version of the Gram-Schmidt algorithm.
% 
% Interface:
%           PanSharpenedImage = GS_Segm(I_MS,I_PAN,I_LR_input,S)
%
% Inputs:
%           I_MS:       MS image upsampled at PAN scale
%           I_PAN:      PAN image
%           I_LR_input: Low Resolution PAN Image 
%           S:          Segmentation
%
% Outputs:
%           PanSharpenedImage:  Pasharpened image
% 
% Reference:
%           [Restaino17]    R. Restaino, M. Dalla Mura, G. Vivone, J. Chanussot, Context-Adaptive Pansharpening Based on Image Segmentation, 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 55, no. 2, pp. 753766, February 2017.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function PanSharpenedImage = GS_Segm(I_MS,I_PAN,I_LR_input,S)
I_MS = double(I_MS);

I_PAN = repmat(double(I_PAN), [1, 1, size(I_MS,3)]);
I_LR_input = double(I_LR_input);
if size(I_LR_input, 3) == 1
    I_LR_input = repmat(I_LR_input, [1, 1, size(I_MS,3)]);
end
if size(I_LR_input, 3) ~= size(I_PAN, 3)
    error('I_LP should have the same number of bands as PAN');
end

DetailsHRPan = I_PAN - I_LR_input;

Coeff = zeros(size(I_MS));
labels = unique(S);

for ii = 1: size(I_MS,3)
    MS_Band = squeeze(I_MS(:,:,ii));
    I_LR_Band = squeeze(I_LR_input(:,:,ii));
    Coeff_Band = zeros(size(I_LR_Band));
    for il=1:length(labels)
        idx = S==labels(il);
        c = cov(I_LR_Band(idx),MS_Band(idx));
        Coeff_Band(idx) = c(1,2)/var(I_LR_Band(idx));
    end
    Coeff(:,:,ii) = Coeff_Band;
end

PanSharpenedImage = Coeff .* DetailsHRPan + I_MS;

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/MF/MF_HG_Pansharpen.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Morphological Pyramid Decomposition using Half-Gradient. 
% 
% Interface:
%           I_Fus_MF_HG = MF_HG_Pansharpen(I_MS,I_PAN,ratio)
%
% Inputs:
%           I_MS:               MS image upsampled at PAN scale;
%           I_PAN:              PAN image;
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value.
%
% Outputs:
%           I_Fus_MF_HG:        Morphological Half Gradient (HG) pansharpened image.
% 
% Reference:
%           [Restaino16]        R. Restaino, G. Vivone, M. Dalla Mura, and J. Chanussot, Fusion of Multispectral and Panchromatic Images Based on Morphological Operators, 
%                               IEEE Transactions on Image Processing, vol. 25, no. 6, pp. 2882-2895, Jun. 2016.
%           [Vivone20]          G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                               IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_MF_HG = MF_HG_Pansharpen(I_MS,I_PAN,ratio)

imageLR = double(I_MS);
imageHR = double(I_PAN);

% Equalization
imageHR = repmat(imageHR,[1 1 size(imageLR,3)]);
for ii = 1 : size(imageLR,3)
    imageHR(:,:,ii) = (imageHR(:,:,ii) - mean2(imageHR(:,:,ii))).*(std2(imageLR(:,:,ii))./std2(imageHR(:,:,ii))) + mean2(imageLR(:,:,ii));
end

% Structuring Element  choice
textse= [0 1 0; 1 1 1; 0 1 0];

% Interpolation Method
int_meth='bilinear';

% Number of levels
lev=ceil(log2(ratio))+1;

% Image Construction
P = Pyr_Dec(imageHR,textse,lev,int_meth);

% Fusion   
P_LP = P(:,:,:,lev);
I_Fus_MF_HG = imageLR .* (P(:,:,:,1)./(P_LP+eps));

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/MF/Pyr_Dec.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Morphological Pyramid Decomposition using Half-Gradient. 
% 
% Interface:
%           P = Pyr_Dec(Im,textse,lev,int_meth)
%
% Inputs:
%           Im:                 Image to decompose;
%           textse:             Structuring Element;
%           lev:                Number of decomposition levels;
%           int_meth:           Interpolation method.
%
% Outputs:
%           P:                  Morphological Pyramid using Half-Gradient.
% 
% References:
%           [Vivone14]          G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral
%                               image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014.
%           [Vivone15]          G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 2565-2586, May 2015.
%           [Restaino16]        R. Restaino, G. Vivone, M. Dalla Mura, and J. Chanussot, Fusion of Multispectral and Panchromatic Images Based on Morphological Operators, 
%                               IEEE Transactions on Image Processing, vol. 25, no. 6, pp. 2882-2895, Jun. 2016.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function  P = Pyr_Dec(Im,textse,lev,int_meth)

P(:,:,:,1) = Im;
Sizes(1,:)=[size(Im,1), size(Im,2)];
imageI_new=P(:,:,:,1);
first=1;

for ii = 2 : lev
    
    imageI_old = imageI_new;
    clear imageI_new

    %  Half Gradient
    PD = imdilate(imageI_old,textse);
    PE= imerode(imageI_old,textse);
    rho_minus=imageI_old-PE;
    rho_plus=PD-imageI_old;
    D=rho_minus-rho_plus;
    PS = imageI_old -0.5*D;
    % PS = 0.5*squeeze(PD+PE); %equivalently
            
    % Downsampling
    if first
        for il=1:size(imageI_old,3)
            imageI_new(:,:,il)=PS(2:2:end,2:2:end,il);
        end
        first=0;
    else
        for il=1:size(imageI_old,3)
            imageI_new(:,:,il)=PS(1:2:end,1:2:end,il);
        end
    end
    Sizes(ii,:)=[size(imageI_new,1) size(imageI_new,1)];
    imageI_resized_old=imageI_new;
    for ir=ii:-1:2,
        for il=1:size(Im,3)
            imageI_resized_new(:,:,il)  = imresize(imageI_resized_old(:,:,il),[Sizes(ir-1,1) Sizes(ir-1,2)],int_meth);
        end
        imageI_resized_old=imageI_resized_new;
        clear imageI_resized_new
    end
    
    if sum(isfinite(imageI_resized_old(:)))~=numel(imageI_resized_old)
        P(:,:,:,1:lev) =repmat(P(:,:,:,1),1,1,1,lev);
        break
    else
        P(:,:,:,ii) = imageI_resized_old;
    end
    
    clear imageI_resized_old
end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PRACS/PRACS.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           PRACS fuses the upsampled MultiSpectral (MS) and PANchromatic (PAN) images by 
%           exploiting the Partial Replacement Adaptive CS (PRACS) algorithm. 
% 
% Interface:
%           I_Fus_PRACS = PRACS(I_MS,I_PAN,ratio)
%
% Inputs:
%           I_MS:           MS image upsampled at PAN scale;
%           I_PAN:          PAN image;
%           ratio:          Scale ratio between MS and PAN. Pre-condition: Integer value.
%
% Outputs:
%           I_Fus_PRACS:    PRACS pansharpened image.
% 
% References:
%           [Choi11]        J. Choi, K. Yu, and Y. Kim, A new adaptive component-substitution-based satellite image fusion by using partial replacement, IEEE
%                           Transactions on Geoscience and Remote Sensing, vol. 49, no. 1, pp. 295309, January 2011.
%           [Vivone15]      G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % % 
% 
% Version: 1
% 
% % % % % % % % % % % % % 
% 
% Copyright (C) 2019
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_PRACS = PRACS(I_MS,I_PAN,ratio)

beta = 0.95; % for 11-bit data
% beta = 1.95; % for 8-bit data

I_MS = double(I_MS);
I_PAN = double(I_PAN);
[N,M,L] = size(I_MS);

%%% Histogram matching of each MS band to Pan
msexp_hm = zeros(N,M,L);
for k=1:L
    b = I_MS(:,:,k);
    b = (b - mean2(b) + mean2(I_PAN)/std2(I_PAN)*std2(b)) * std2(I_PAN)/std2(b);
    b(b<0) = 0;
    msexp_hm(:,:,k) = b;
end

%%% Computing low-resolution Pan by bicubic decimation/interpolation
aux = imresize(I_PAN,1/ratio);
pan_l = imresize(aux,ratio);
clear aux

%%% Regression of Pan_low vs MS (with offset)
bb = zeros(N*M,L);
for k = 1:L
    bb(:,k) = reshape(squeeze(msexp_hm(:,:,k)),[N*M,1]);
end
bb = [ones(N*M,1),bb];
alpha = regress(pan_l(:),bb); 

%%% Initial estimate of intensity
aux = bb * alpha;
I_l = reshape(aux,[N,M]);
clear aux

clear bb

%%% Partial Replacement
I_h = zeros(N,M,L);
cc  = zeros(1,L);
for k=1:L
    b = msexp_hm(:,:,k);
    cc(k) = corr2(I_l(:),b(:));
    aux = cc(k)*I_PAN(:)+(1-cc(k))*b(:);
    I_h(:,:,k) = reshape(aux,[N,M]);
end
clear aux

%%% Band-dependent intensity
%%% For each band, compute low-resolution I_h by bicubic decimation/interpolation
I_h_low = zeros(N,M,L);
for k=1:L
    aux = imresize(I_h(:,:,k),1/ratio);
    I_h_low(:,:,k) = imresize(aux,ratio);
end
clear aux
%%%

%%% Regression of I_h_low_k vs MS (with offset)

alpha = zeros(L+1,L);
for k = 1:L
    bb(:,k) = reshape(squeeze(msexp_hm(:,:,k)),[N*M,1]);
end
bb = [ones(N*M,1),bb];
for k=1:L
    aux = I_h_low(:,:,k);
    alpha(:,k) = regress(aux(:),bb);
end
clear aux

%%% Intensities
I_l_prime = zeros(N,M,L);
for k=1:L
    aux = bb * alpha(:,k);
    I_l_prime(:,:,k) = reshape(aux,[N,M]);
end
clear aux

%%% Computing detail images
delta = zeros(N,M,L);
for k=1:L
    delta(:,:,k)= I_h(:,:,k)-I_l_prime(:,:,k)-(mean2(I_h(:,:,k))-mean2(I_l_prime(:,:,k)));
end

%%% Computing mean of std. devs.
aux3 = zeros(1,L);
for k=1:L
    aux3(k) = std2(I_MS(:,:,k));
end
aux3 = mean(aux3);

%%% Computing weights
w = zeros(1,L);
for k=1:L
    aux1 = I_l_prime(:,:,k);
    b = I_MS(:,:,k);
    w(k) = beta .* corr2(aux1(:),b(:))*std(b(:))/aux3;%std(aux2(:));
end

%%% Computing local instability adjustment parameter
L_I = zeros(N,M,L);
for k=1:L
    b = I_MS(:,:,k);
    I = I_l_prime(:,:,k);
    aux = 1-abs(1-corr2(I_l(:),b(:))*b(:)./I(:));
    L_I(:,:,k) = reshape(aux,[N,M]);
end

%%% Computing pansharpened image
det = zeros(N,M,L);
I_Fus_PRACS = zeros(N,M,L);
for k=1:L
    det(:,:,k) = w(k) * L_I(:,:,k) .* delta(:,:,k);
    I_Fus_PRACS(:,:,k) = I_MS(:,:,k) + det(:,:,k);
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/PWMBF.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%       Model-based fusion using PCA and wavelets.
% 
% Interface:
%       Z = PWMBF(Pan,Low,ratio,r,wavelet,degrade,reduced,whiten)
% 
% Inputs:
%         Pan : Panchromatic image;
%          Low: Low spatial resolution MS image;
%        ratio: Scale ratio between Pan and Low;
%            r: Number of principal components;
%      wavelet: flag;
%      degrade: flag.
% 
% Output:
%    Z:     Pansharpened image;
% 
% References:
%           [Palsson15]     F. Palsson, J.R. Sveinsson, M.O. Ulfarsson, J.A. Benediktsson, "Model-based fusion of multi-and hyperspectral images using PCA and wavelets", 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 2652-2663, May 2015.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Z = PWMBF(Pan,Low,ratio,r,wavelet,degrade)

addpath(sprintf('%s/rwt/bin',pwd))

% Wavelet parameters
L=4;
type='rwt';

Low=double(Low);
Pan=double(Pan);

N=size(Pan,1);
Q=size(Pan,3);
nb=size(Low,3);

if(r>nb)
    error('Number of PCs greater than number of bands');
end

X=Pan;
Ylow=Low;

if(degrade)
    X=imresize(Pan,1/ratio);
    Ylow=imresize(Low,1/ratio);
    N=N/4;
end

% Upsample Y
Y=imresize(Ylow,ratio,'bicubic');

% Degrade X
Xtilde=imresize(imresize(X,1/ratio,'bilinear'),ratio,'bicubic');

X=reshape(X,[N^2 Q]);
Xtilde=reshape(Xtilde,[N^2 Q]);
Y=reshape(Y,[N^2 nb]);

% PCA transform
[F, D, R]=svd(Y,'econ');
G=F*D;
U=R;

wfilter=daubcqf(4,'min');

if wavelet
    x=compute_PhiTX(Xtilde,L,wfilter,type);
    x0=compute_PhiTX(X,L,wfilter,type);
    y=compute_PhiTX(G(:,1:r),L,wfilter,type);
    yl=y(1:N^2,:);
    zh=zeros(3*L*N^2,r);
    for p=1:r
        for j=1:3*L
            xh=x(j*N^2+1:(j+1)*N^2,:);
            xh0=x0(j*N^2+1:(j+1)*N^2,:);
            yh=y(j*N^2+1:(j+1)*N^2,p);
            Cyy=yh'*yh/N^2;
            Cyx=yh'*xh/N^2;
            Cxx=xh'*xh/N^2;
            Cn=diag(mad(abs(yh))/0.6745).^2;
            inv_Cxx=inv(Cxx);
            Cy_x=Cyy-Cyx*inv_Cxx*Cyx';
            if Q>1
                CyxiCxx=Cyx*inv_Cxx;
                mu_zx=xh*CyxiCxx';
                mu_zx0=xh0*CyxiCxx';
            else
                mu_zx=repmat((Cyx*inv_Cxx)',[N^2 1]).*xh;
                mu_zx0=repmat((Cyx*inv_Cxx)',[N^2 1]).*xh0;
            end
            ymu=yh-mu_zx;
            CC=Cy_x*inv(Cy_x+Cn);
            zh((j-1)*N^2+1:N^2+(j-1)*N^2,p)=mu_zx0+ymu*CC;
        end
    end
    z=[yl;zh];
    B=compute_PhiX(z,L,wfilter,type);
    deg=0;
    if deg == 1
        U = U(:,1:r);
        Zhat=B*U';
    else
        G(:,1:r)=B;
        Zhat=G*U';
    end
else
    Cn=0;
    yh=G(:,1:r);
    xh=Xtilde;
    xh0=X;
    Cyy=yh'*yh/N^2;
    Cyx=yh'*xh/N^2;
    Cxx=xh'*xh/N^2;
    inv_Cxx=inv(Cxx);
    Cy_x=Cyy-Cyx*inv_Cxx*Cyx';
    CyxiCxx=Cyx*inv_Cxx;
    mu_zx=xh*CyxiCxx';
    mu_zx0=xh0*CyxiCxx';
    ymu=yh-mu_zx;
    CC=Cy_x/(Cy_x+Cn);
    B=mu_zx0+ymu*CC;
    G(:,1:r)=B;
    Zhat=G*U';
end

Z=reshape(Zhat,[N N nb]);

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/compute_PhiTX.m
================================================
function PhiTX=compute_PhiTX(X,L,h,type)
vec = @(x) x(:);
[M,T]=size(X);

switch lower(type)
    case 'dwt2'
        PhiTX=zeros(size(X));
        for k=1:T
            PhiTX(:,k)=vec(FWT2_PO(reshape(X(:,k),[sqrt(M) sqrt(M)]),log2(sqrt(M))-L,h));
        end
    case 'dwt'
        PhiTX=zeros(size(X));
        for k=1:T
            PhiTX(:,k)=vec(mdwt(reshape(X(:,k),[sqrt(M) sqrt(M)]),h,L));
        end
    case 'rwt'
        PhiTX=zeros((3*L+1)*M,T);
        for k=1:T
            [xl xh L]=mrdwt(reshape(X(:,k),[sqrt(M) sqrt(M)]),h,L);
            PhiTX(:,k)=vec([xl xh])/2;
        end
    case 'swt'
        PhiTX=zeros((3*L+1)*M,T);
        for k=1:T
            PhiTX(:,k)=vec(myswt2(reshape(X(:,k),[sqrt(M) sqrt(M)]),L,'db4'));
        end
    case 'iso'
        PhiTX=zeros((L+1)*M,T);
        for k=1:T
            PhiTX(:,k)=vec(cell2mat(atrousdec(reshape(X(:,k),[sqrt(M) sqrt(M)]),'maxflat',L)));
        end
    case 'cwt'
        J=L;
        [Faf, Fsf] = FSfarras; % 1st stage anal. & synth. filters
        [af, sf] = dualfilt1;
        for k=1:T
            w=dualtree2D(reshape(X(:,k),[sqrt(M) sqrt(M)]),J,Faf,af);
            W=[];
            for j=1:J
                W=[W' vec(w{j}{1}{1})']';
                W=[W' vec(w{j}{1}{2})']';
                W=[W' vec(w{j}{1}{3})']';
            end
            W=[W' vec(w{J+1}{1})']';
            for j=1:J
                W=[W' vec(w{j}{2}{1})']';
                W=[W' vec(w{j}{2}{2})']';
                W=[W' vec(w{j}{2}{3})']';
            end
            W=[W' vec(w{J+1}{2})']';
            PhiTX(:,k)=W;
        end
    case 'cplxdt'
        J=L;
        [Faf, Fsf] = FSfarras; % 1st stage anal. & synth. filters
        [af, sf] = dualfilt1;
        for k=1:T
            w=cplxdual2D(reshape(X(:,k),[sqrt(M) sqrt(M)]),J,Faf,af);
            W=[];
            for j=1:J
                W=[W' vec(w{j}{1}{1}{1})']';
                W=[W' vec(w{j}{1}{1}{2})']';
                W=[W' vec(w{j}{1}{1}{3})']';
                W=[W' vec(w{j}{1}{2}{1})']';
                W=[W' vec(w{j}{1}{2}{2})']';
                W=[W' vec(w{j}{1}{2}{3})']';
            end
            W=[W' vec(w{J+1}{1}{1})']';
            W=[W' vec(w{J+1}{1}{2})']';
            for j=1:J
                W=[W' vec(w{j}{2}{1}{1})']';
                W=[W' vec(w{j}{2}{1}{2})']';
                W=[W' vec(w{j}{2}{1}{3})']';
                W=[W' vec(w{j}{2}{2}{1})']';
                W=[W' vec(w{j}{2}{2}{2})']';
                W=[W' vec(w{j}{2}{2}{3})']';
            end
            W=[W' vec(w{J+1}{2}{1})']';
            W=[W' vec(w{J+1}{2}{2})']';
            PhiTX(:,k)=W;
        end
    otherwise
        error(['Unknown method ' type]);
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/compute_PhiX.m
================================================
function PhiX=compute_PhiX(X,L,h,type)
vec = @(x) x(:);
[N,r]=size(X);

switch lower(type)
    case 'dwt'
        M=N;
        PhiX=zeros(size(X));
        for k=1:r
            PhiX(:,k)=vec(midwt(reshape(X(:,k),[sqrt(M) sqrt(M)]),h,L));
        end
    case 'dwt2'
        M=N;
        PhiX=zeros(size(X));
        for k=1:r
            PhiX(:,k)=vec(IWT2_PO(reshape(X(:,k),[sqrt(M) sqrt(M)]),log2(sqrt(M))-L,h));
        end
    case 'rwt'
        M=N/(3*L+1);
        PhiX=zeros(M,r);
        for k=1:r
            PhiX(:,k)=vec(mirdwt(reshape(X(1:M,k),[sqrt(M) sqrt(M)]),reshape(X(M+1:end,k),[sqrt(M) 3*L*sqrt(M)]),h,L))*2;
        end
    case 'swt'
        M=N/(3*L+1);
        PhiX=zeros(M,r);
        for k=1:r
            PhiX(:,k)=vec(iswt2(reshape(X(:,k),[sqrt(M) sqrt(M) 3*L+1]),'db4'));
        end
    case 'iso'
        M=N/(L+1);
        PhiX=zeros(M,r);
        for k=1:r
            xc=reshape(X(:,k),[sqrt(M) (L+1)*sqrt(M)]);
            xc=mat2cell(xc,[sqrt(M)],repmat(sqrt(M),[1 L+1]));
            PhiX(:,k)=vec(atrousrec(xc,'maxflat'));
        end
    case 'cwt'
        J=L;
        [Faf, Fsf] = FSfarras; % 1st stage anal. & synth. filters
        [af, sf] = dualfilt1;
        
        PhiX=zeros(size(X,1)/2,size(X,2));
        n=sqrt(size(X,1)/2);
        for c=1:r
            
            W=X(:,c);
            j_offset=0;
            for j=1:J
                for k=1:3
                    w2{j}{1}{k}=reshape(W(j_offset+1+(k-1)*(n/2^j)^2:j_offset+k*(n/2^j)^2),[n/2^j n/2^j]);
                    w2{j}{2}{k}=reshape(W(j_offset+n^2+1+(k-1)*(n/2^j)^2:j_offset+n^2+k*(n/2^j)^2),[n/2^j n/2^j]);
                end
                j_offset=j_offset+3*(n/2^j)^2;
            end
            w2{J+1}{1}=reshape(W(n^2-(n/2^(J))^2+1:n^2),[n/2^J n/2^J]);
            w2{J+1}{2}=reshape(W(2*n^2-(n/2^(J))^2+1:2*n^2),[n/2^J n/2^J]);
            PhiX(:,c)=vec(idualtree2D(w2,J,Fsf,sf));
        end
    case 'cplxdt'
        J=L;
        [Faf, Fsf] = FSfarras; % 1st stage anal. & synth. filters
        [af, sf] = dualfilt1;
        
        PhiX=zeros(size(X,1)/4,size(X,2));
        n=sqrt(size(X,1)/4);
        for c=1:r
            
            W=X(:,c);
            j_offset=0;
            for j=1:J
                l_offset=0;
                for l=1:2
                    for k=1:3
                        w2{j}{1}{l}{k}=reshape(W(j_offset+l_offset+1+(k-1)*(n/2^j)^2:j_offset+l_offset+k*(n/2^j)^2),[n/2^j n/2^j]);
                        w2{j}{2}{l}{k}=reshape(W(j_offset+l_offset+2*n^2+1+(k-1)*(n/2^j)^2:j_offset+l_offset+2*n^2+k*(n/2^j)^2),[n/2^j n/2^j]);
                    end
                    l_offset=l_offset+3*(n/2^j)^2;
                end
                j_offset=j_offset+6*(n/2^j)^2;
            end
            w2{J+1}{1}{1}=reshape(W(2*n^2-2*(n/2^(J))^2+1:2*n^2-(n/2^(J))^2),[n/2^J n/2^J]);
            w2{J+1}{1}{2}=reshape(W(2*n^2-(n/2^(J))^2+1:2*n^2),[n/2^J n/2^J]);
            w2{J+1}{2}{1}=reshape(W(4*n^2-2*(n/2^(J))^2+1:4*n^2-(n/2^(J))^2),[n/2^J n/2^J]);
            w2{J+1}{2}{2}=reshape(W(4*n^2-(n/2^(J))^2+1:4*n^2),[n/2^J n/2^J]);
            PhiX(:,c)=vec(icplxdual2D(w2,J,Fsf,sf));
        end
    otherwise
        error(['Unknown method ' type]);
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/readme
================================================
test


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/AUTHORS
================================================
The primary authors of Rice Wavelet Toolbox are and/or have been:
 * Richard Baraniuk
 * Hyeokho Choi
 * Ramesh Neelamani
 * Vinay Ribeiro
 * Rebecca Hindman
 * Justin Romberg
 * Haitao Guo
 * Felix Fernandes
 * Brent Hendricks
 * Ramesh Gopinath
 * Markus Lang
 * Jan Erik Odegard
 * Dong Wei 
 * Joshua Jackson


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/CMakeLists.txt
================================================
cmake_minimum_required (VERSION 2.6)
project (rwt)
subdirs(lib/src)
subdirs(doc)
subdirs(python)

#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
#FIND_PACKAGE(MatlabMex REQUIRED)

# This section based on http://www.cmake.org/pipermail/cmake/2003-June/003953.html
IF (UNIX)
  ADD_CUSTOM_TARGET (distclean @echo cleaning for source distribution)
  SET(DISTCLEANED
   CMakeFiles
   cmake.depends
   cmake.check_depends
   CMakeCache.txt
   cmake.check_cache
   Makefile
   *.cmake
   */CMakeCache.txt
   */CMakeFiles
   */Makefile
   */*.cmake
   */*/CMakeCache.txt
   */*/CMakeFiles
   */*/*.cmake
   */*/Makefile
   lib/src/*.a
   doc/Doxyfile
   doc/html
   doc/latex
   core core.*
   gmon.out
   */*.mex*
   */*.o
   lib/src/*.o
   python/rwtPYTHON_wrap.cxx
   python/rwt.py
   python/rwt.pyc
   python/_rwt.so
   *~
  )
  
  ADD_CUSTOM_COMMAND(
    DEPENDS clean
    COMMENT "distribution clean"
    COMMAND rm
    ARGS    -Rf CMakeTmp ${DISTCLEANED}
    TARGET  distclean
  )
ENDIF(UNIX)


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/HACKING
================================================
PHREAK
Look, you wanna be elite? You gotta do a
righteous hack. None of this accidental shit.

CEREAL
Oh yeah, you want a seriously righteous hack,
you score one of those Gibsons man. You know,
supercomputers they use to like, do physics,
and look for oil and stuff?

-Hackers (1995)

================================================================================
= INTRODUCTION
================================================================================

There are a number of ways in which wavelet toolbox might be expanded or
improved. Arbitrary dimension handling or just 3d, non-orthogonal wavelets,
support for other environments, and so on. If you are seeking to implement these
or any other changes, this document will be your launching point. All of this 
applies to the state of the code as of the 3.0 release - if this sentence 
hasn't been updated but the code has then you may assume that some of the rest 
of this text may be outdated.

================================================================================
= DEVELOPER DOCUMENTATION
================================================================================

You will likely want to begin by building the documentation files. You'll need
CMake, Doxygen, and GraphViz. Hopefully you are on Linux, OSX, or some other 
Unix flavor. If you are using Windows as your primary development platform, take 
this time to stop and think about your life and where it's going. Ok, so to 
build the documentation run:
  cmake .
  make doc
After this you should see HTML documentation in doc/html and a pdf with the
same content at doc/latex/refman.pdf

If you haven't used Doxygen before, here's what you need to know: put a ! after
the opening /* in a comment to include that comment in the generated docs, look
at the top of some existing functions to see how function parameters are shown,
and note that you can use latex formulas.

================================================================================
= UNIT TESTS
================================================================================

Before changing anything you should make your way over to the tests directory
from the MATLAB prompt and run:
 runtests
Witness how all of these tests pass. Take care that this is still true after
any changes you make. Now of course you have an IQ that must be measured using
2-byte integers and have never once introduced a bug into computer software,
but these tests are important so that mortals can come along later and know
whether a change they made broke some function of the software or not. 

There are also Python tests at python/test_rwt.py and these are mostly the same
as the MATLAB tests. If you are feeling adventurous you might look into unifying
these into a single script that generates both sets of tests. You should run
both sets of tests before publishing any commit that could concievably affect
them. If you don't know every line of code in both platforms intimately then you
should take the safe route and run both test scripts, and possibly the Octave
tests as well. Unfortunately Octave lacks a lot of things that would be needed
to run MATLAB xUnit.

================================================================================
= TOUR OF THE C CODE
================================================================================

As of version 3.0, all MATLAB-specific C code has been isolated to a few places.
The files in the mex/ directory are MATLAB MEX wrappers for the transforms and
these files are intended to be as short as possible. All the initialization
code common to the different transforms is found in lib/src/init.c and some of
that code is also shared with Python.

The real magic of making things work across different environments is in 
lib/inc/rwt_platform.h - in particular the mat() macro abstracts away memory 
addressing so you don't have to worry about row major order and column major 
order. The rwt_printf, mat_offset, offset_row, and offset_col macros will be
very useful if you need to change any of the code that uses the mat() macro.

To understand the code for the transforms themselves, start with lib/src/dwt.c
which is the best documented of the transforms. The rest of them are written
and structured in a very similar fashion.

The flow of the code is as follows. One of the transforms is called from MATLAB.
This invokes one of the wrappers from the mex directory. The function here calls
rwt_matlab_init in lib/src/init.c which calls other init functions. From here 
the mex wrapper calls the transform in lib/src. For example, the mdwt function
for the discrete wavlet transform calls dwt() in the lib/src/dwt.c file. This
function has a few helpers in the same file. It allocates memory necessary for
the transform in dwt_allocate(), calculates the high and low pass coefficients
in dwt_coefficients(), performs the convolution in dwt_convolution, and frees
the allocated memory in the dwt_free() function.

In the case of Python, a python wrapper function in python/rwt.i calls some of
the same initialization code in lib/src/init.c then decides if the input is 1D
or 2D and calls a matching C wrapper function, also found in the python/rwt.i
file. Finally, this wrapper function calls the transform function found in the
lib/src directory.

================================================================================
= PYTHON / NUMPY
================================================================================

The HardTh, SoftTh, daubcqf, denoise, and makesig functions are implemeted twice 
- once in MATLAB and once in Python. This was simpler than rewriting them in C.
If you change any of these you will be glad to see that MATLAB and numpy are 
extremely similar.

Here follows the differences you may need to know. MATLAB indexes start at 1 and
numpy starts at 0. For three part indexes the order changes - a[b:c:d] in MATLAB
code corresponds to a[b-1:d:c] in Python/numpy. You must use the ddof=1 argument
to the std() function in Python. The size() function in MATLAB returns 2 numbers
for 1D inputs and the same function in Python returns 1 number. MATLAB assumes
additional return values beyond the 1st should be dropped if not assigned to
a separate variable - Python does not.

A quick look over the SWIG/numpy documentation might lead you to think that you
could use OUTPUT_ARRAY or INPLACE_FARRAY or some other macro to change how the
python bindings work to be more reasonable. You are probably wrong. Probably.

================================================================================
= THE BUILD SYSTEM
================================================================================

The CMake build system was selected for its license similarity to Wavelet 
Toolbox itself, though this is not strictly necessary. CMake also allows for
sophisticated results with relatively little work. Anything you want to do will
likely require some searching and playing. You may be tempted to switch to some
other more common build system, but this would probably only make things worse.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/INSTALL
================================================
================================================================================
=  MATLAB Installation Instructions                                            =
================================================================================

1. Make sure you have the latest source code. See the GitHub page at
   https://github.com/ricedsp/rwt
   If you have the command line git tool installed you should be able to run:
   git clone https://github.com/ricedsp/rwt.git

2. Properly set up your system to create MEX-files. Refer to the MATLAB 
   documentation section "Build MEX-Files" at 
   http://www.mathworks.com/help/matlab/matlab_external/building-mex-files.html

3. Run MATLAB and change to the "bin" subdirectory containing the .m files

4. Compile the toolbox by executing the Matlab command: compile

5. Add the toolbox "bin" subdirectory to your Matlab path.

================================================================================
=  Octave Installation Instructions                                            =
================================================================================

Octave installation is similar to the procedure for MATLAB above. On Linux you
will need the octave-dev (Debian/Ubuntu) or octave-devel (RedHat, etc.) package
installed.

================================================================================
=  Python Installation Instructions                                            =
================================================================================

Python installation requires SWIG version 2.0.11 or greater and CMake. Also you
should have numpy and scipy installed. To install the python bindings, execute
the following commands:
 cd python
 cmake .
 sudo make install

On OSX, CMake is available from Macports http://www.macports.org/
For Redhat Enterprise Linux, Scientific Linux, CentOS, etc. there is a package
available on RepoForge http://repoforge.org/use/


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/LICENSE
================================================
Copyright (c) 2000 RICE UNIVERSITY. All rights reserved.

This software is distributed and licensed to you on a non-exclusive 
basis, free-of-charge. Redistribution and use in source and binary forms, 
with or without modification, are permitted provided that the following 
conditions are met:

1. Redistribution of source code must retain the above copyright notice, 
   this list of conditions and the following disclaimer.
2. Redistribution in binary form must reproduce the above copyright notice, 
   this list of conditions and the following disclaimer in the 
   documentation and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors 
   may be used to endorse or promote products derived from this software 
   without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY WILLIAM MARSH RICE UNIVERSITY, HOUSTON, TEXAS, 
AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, 
BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RICE UNIVERSITY 
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
OR BUSINESS INTERRUPTIONS) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
OTHERWISE), PRODUCT LIABILITY, OR OTHERWISE ARISING IN ANY WAY OUT OF THE 
USE OF THIS SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

For information on commercial licenses, contact Rice University's Office of 
Technology Transfer at techtran@rice.edu or (713) 348-6173


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/HardTh.m
================================================
function  x = HardTh(y,thld)
%    x = HardTh(y,thld); 
%
%    HARDTH hard thresholds the input signal y with the threshold value
%    thld.
%
%    Input:  
%       y    : 1D or 2D signal to be thresholded
%       thld : threshold value
%
%    Output: 
%       x : Hard thresholded output (x = (abs(y)>thld).*y)
%
%  HERE'S AN EASY WAY TO RUN THE EXAMPLES:
%  Cut-and-paste the example you want to run to a new file 
%  called ex.m, for example. Delete out the % at the beginning 
%  of each line in ex.m (Can use search-and-replace in your editor
%  to replace it with a space). Type 'ex' in matlab and hit return.
%
%
%    Example:
%       y = makesig('WernerSorrows',8);
%       thld = 1;
%       x = HardTh(y,thld)
%       x = 1.5545 5.3175 0 1.6956  -1.2678 0 1.7332 0
%
%    See also: SoftTh
%
%Author: Haitao Guo  <harry@jazz.rice.edu>

x = (abs(y) > thld).*y; 


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/SoftTh.m
================================================
function  x = SoftTh(y,thld)
%    x = SoftTh(y,thld); 
%
%    SOFTTH soft thresholds the input signal y with the threshold value
%    thld.
%
%    Input:  
%       y    : 1D or 2D signal to be thresholded
%       thld : Threshold value
%
%    Output: 
%       x : Soft thresholded output (x = sign(y)(|y|-thld)_+)
%
%  HERE'S AN EASY WAY TO RUN THE EXAMPLES:
%  Cut-and-paste the example you want to run to a new file 
%  called ex.m, for example. Delete out the % at the beginning 
%  of each line in ex.m (Can use search-and-replace in your editor
%  to replace it with a space). Type 'ex' in matlab and hit return.
%
%
%    Example:
%       y = makesig('Doppler',8);
%       thld = 0.2;
%       x = SoftTh(y,thld)
%       x = 0 0 0 -0.0703 0 0.2001 0.0483 0 
%
%    See also: HardTh
%
%    Reference: 
%       "De-noising via Soft-Thresholding" Tech. Rept. Statistics,
%       Stanford, 1992. D.L. Donoho.
%
%Author: Haitao Guo  <harry@jazz.rice.edu>

x = abs(y);
x = sign(y).*(x >= thld).*(x - thld); 


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/compile.m
================================================
%    COMPILE compiles the c files and generates mex files.
%

if exist('OCTAVE_VERSION', 'builtin')
  mkoctfile --mex -v -DOCTAVE_MEX_FILE ../mex/mdwt.c   ../lib/src/dwt.c   ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -o omdwt.mex
  mkoctfile --mex -v -DOCTAVE_MEX_FILE ../mex/midwt.c  ../lib/src/idwt.c  ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -o omidwt.mex
  mkoctfile --mex -v -DOCTAVE_MEX_FILE ../mex/mrdwt.c  ../lib/src/rdwt.c  ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -o omrdwt.mex
  mkoctfile --mex -v -DOCTAVE_MEX_FILE ../mex/mirdwt.c ../lib/src/irdwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -o omirdwt.mex
else
  x = computer();
  if (x(length(x)-1:length(x)) == '64')
    mex -v -largeArrayDims ../mex/mdwt.c   ../lib/src/dwt.c   ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin
    mex -v -largeArrayDims ../mex/midwt.c  ../lib/src/idwt.c  ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin
    mex -v -largeArrayDims ../mex/mrdwt.c  ../lib/src/rdwt.c  ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin
    mex -v -largeArrayDims ../mex/mirdwt.c ../lib/src/irdwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin
  else
    mex -v -compatibleArrayDims ../mex/mdwt.c   ../lib/src/dwt.c   ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin
    mex -v -compatibleArrayDims ../mex/midwt.c  ../lib/src/idwt.c  ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin
    mex -v -compatibleArrayDims ../mex/mrdwt.c  ../lib/src/rdwt.c  ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin
    mex -v -compatibleArrayDims ../mex/mirdwt.c ../lib/src/irdwt.c ../lib/src/init.c ../lib/src/platform.c -I../lib/inc -outdir ../bin
  end
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/daubcqf.m
================================================
function [h_0,h_1] = daubcqf(N,TYPE)
%    [h_0,h_1] = daubcqf(N,TYPE); 
%
%    Function computes the Daubechies' scaling and wavelet filters
%    (normalized to sqrt(2)).
%
%    Input: 
%       N    : Length of filter (must be even)
%       TYPE : Optional parameter that distinguishes the minimum phase,
%              maximum phase and mid-phase solutions ('min', 'max', or
%              'mid'). If no argument is specified, the minimum phase
%              solution is used.
%
%    Output: 
%       h_0 : Minimal phase Daubechies' scaling filter 
%       h_1 : Minimal phase Daubechies' wavelet filter 
%
%    Example:
%       N = 4;
%       TYPE = 'min';
%       [h_0,h_1] = daubcqf(N,TYPE)
%       h_0 = 0.4830 0.8365 0.2241 -0.1294
%       h_1 = 0.1294 0.2241 -0.8365 0.4830
%
%    Reference: "Orthonormal Bases of Compactly Supported Wavelets",
%                CPAM, Oct.89 
%
%Author: Ramesh Gopinath  <ramesh@dsp.rice.edu>

if(nargin < 2),
  TYPE = 'min';
end;
if(rem(N,2) ~= 0),
  error('No Daubechies filter exists for ODD length');
end;
K = N/2;
a = 1;
p = 1;
q = 1;
h_0 = [1 1];
for j  = 1:K-1,
  a = -a * 0.25 * (j + K - 1)/j;
  h_0 = [0 h_0] + [h_0 0];
  p = [0 -p] + [p 0];
  p = [0 -p] + [p 0];
  q = [0 q 0] + a*p;
end;
q = sort(roots(q));
qt = q(1:K-1);
if TYPE=='mid',
  if rem(K,2)==1,  
    qt = q([1:4:N-2 2:4:N-2]);
  else
    qt = q([1 4:4:K-1 5:4:K-1 N-3:-4:K N-4:-4:K]);
  end;
end;
h_0 = conv(h_0,real(poly(qt)));
h_0 = sqrt(2)*h_0/sum(h_0); 	%Normalize to sqrt(2);
if(TYPE=='max'),
  h_0 = fliplr(h_0);
end;
if(abs(sum(h_0 .^ 2))-1 > 1e-4) 
  error('Numerically unstable for this value of "N".');
end;
h_1 = rot90(h_0,2);
h_1(1:2:N)=-h_1(1:2:N);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/denoise.m
================================================
function [xd,xn,option] = denoise(x,h,type,option)
%    [xd,xn,option] = denoise(x,h,type,option); 
%
%    DENOISE is a generic program for wavelet based denoising.
%    The program will denoise the signal x using the 2-band wavelet
%    system described by the filter h using either the traditional 
%    discrete wavelet transform (DWT) or the linear shift invariant 
%    discrete wavelet transform (also known as the undecimated DWT
%    (UDWT)). 
%
%    Input:  
%       x         : 1D or 2D signal to be denoised
%       h         : Scaling filter to be applied
%       type      : Type of transform (Default: type = 0)
%                   0 --> Discrete wavelet transform (DWT)
%                   1 --> Undecimated DWT (UDWT)
%       option    : Default settings is marked with '*':
%                   *type = 0 --> option = [0 3.0 0 0 0 0]
%                   type = 1 --> option = [0 3.6 0 1 0 0]
%       option(1) : Whether to threshold low-pass part
%                   0 --> Don't threshold low pass component 
%                   1 --> Threshold low pass component
%       option(2) : Threshold multiplier, c. The threshold is
%                   computed as: 
%                     thld = c*MAD(noise_estimate)). 
%                   The default values are:
%                     c = 3.0 for the DWT based denoising
%                     c = 3.6 for the UDWT based denoising
%       option(3) : Type of variance estimator
%                   0 --> MAD (mean absolute deviation)
%                   1 --> STD (classical numerical std estimate)
%       option(4) : Type of thresholding
%                   2 --> Soft thresholding
%                   1 --> Hard thresholding
%       option(5) : Number of levels, L, in wavelet decomposition. By
%                   setting this to the default value '0' a maximal
%                   decomposition is used.
%       option(6) : Actual threshold to use (setting this to
%                   anything but 0 will mean that option(3)
%                   is ignored)
%
%    Output: 
%       xd     : Estimate of noise free signal 
%       xn     : The estimated noise signal (x-xd)
%       option : A vector of actual parameters used by the
%                program. The vector is configured the same way as
%                the input option vector with one added element
%                option(7) = type.
%
%  HERE'S AN EASY WAY TO RUN THE EXAMPLES:
%  Cut-and-paste the example you want to run to a new file 
%  called ex.m, for example. Delete out the % at the beginning 
%  of each line in ex.m (Can use search-and-replace in your editor
%  to replace it with a space). Type 'ex' in matlab and hit return.
%
%    Example 1: 
%       h = daubcqf(6); [s,N] = makesig('Doppler'); n = randn(1,N);
%       x = s + n/10;     % (approximately 10dB SNR)
%       figure;plot(x);hold on;plot(s,'r');
%
%       %Denoise x with the default method based on the DWT
%       [xd,xn,opt1] = denoise(x,h);
%       figure;plot(xd);hold on;plot(s,'r');
%
%       %Denoise x using the undecimated (LSI) wavelet transform
%       [yd,yn,opt2] = denoise(x,h,1);
%       figure;plot(yd);hold on;plot(s,'r');
%
% Example 2: (on an image)  
%      h = daubcqf(6);  load lena; 
%      noisyLena = lena + 25 * randn(size(lena));
%      figure; colormap(gray); imagesc(lena); title('Original Image');
%       figure; colormap(gray); imagesc(noisyLena); title('Noisy Image'); 
%       Denoise lena with the default method based on the DWT
%      [denoisedLena,xn,opt1] = denoise(noisyLena,h);
%      figure; colormap(gray); imagesc(denoisedLena); title('denoised Image');
%       
%
%    See also: mdwt, midwt, mrdwt, mirdwt, SoftTh, HardTh, setopt
%
%Author: Jan Erik Odegard  <odegard@ece.rice.edu>

if(nargin < 2)
  error('You need to provide at least 2 inputs: x and h');
end;
if(nargin < 3),
  type = 0;
  option = [];
elseif(nargin < 4)
  option = [];
end;
if(isempty(type)),
  type = 0;
end;
if(type == 0),
  default_opt = [0 3.0 0 2 0 0];
elseif(type == 1),
  default_opt = [0 3.6 0 1 0 0];
else
  error(['Unknown denoising method',10,...
	  'If it is any good we need to have a serious talk :-)']);
end;
option = setopt(option,default_opt);
[mx,nx] = size(x);
dim = min(mx,nx);
if(dim == 1),
  n = max(mx,nx);
else
  n = dim;
end;
if(option(5) == 0),
  L = floor(log2(n));
else
  L = option(5);
end;
if(type == 0), 			% Denoising by DWT
  xd = mdwt(x,h,L);
  if (option(6) == 0),
    tmp = xd(floor(mx/2)+1:mx,floor(nx/2)+1:nx);
    if(option(3) == 0),
      thld = option(2)*median(abs(tmp(:)))/.67;
    elseif(option(3) == 1),
      thld = option(2)*std(tmp(:));
    else
      error('Unknown threshold estimator, Use either MAD or STD');
    end;
  else
    thld = option(6);
  end;
  if(dim == 1)
    ix = 1:n/(2^L);
    ykeep = xd(ix);
  else
    ix = 1:mx/(2^L);
    jx = 1:nx/(2^L);
    ykeep = xd(ix,jx);
  end;
  if(option(4) == 2),
    xd = SoftTh(xd,thld);
  elseif(option(4) == 1),
    xd = HardTh(xd,thld);
  else
    error('Unknown threshold rule. Use either Soft (2) or Hard (1)');
  end;
  if (option(1) == 0),
    if(dim == 1),
      xd(ix) = ykeep;
    else
      xd(ix,jx) = ykeep;
    end;
  end;
  xd = midwt(xd,h,L);
elseif(type == 1), 			% Denoising by UDWT
  [xl,xh] = mrdwt(x,h,L);
  if(dim == 1),
    c_offset = 1;
  else
    c_offset = 2*nx + 1;
  end;
  if (option(6) == 0),
    tmp = xh(:,c_offset:c_offset+nx-1);
    if(option(3) == 0),
      thld = option(2)*median(abs(tmp(:)))/.67;
    elseif(option(3) == 1),
      thld = option(2)*std(tmp(:));
    else
      error('Unknown threshold estimator, Use either MAD or STD');
    end;
  else
    thld = option(6);
  end;
  if(option(4) == 2),
    xh = SoftTh(xh,thld);
    if(option(1) == 1),
      xl = SoftTh(xl,thld);
    end;
  elseif(option(4) == 1),
    xh = HardTh(xh,thld);
    if(option(1) == 1),
      xl = HardTh(xl,thld);
    end;
  else
    error('Unknown threshold rule. Use either Soft (2) or Hard (1)');
  end;
  xd = mirdwt(xl,xh,h,L);
else 					% Denoising by unknown method
  error(['Unknown denoising method',10,...
         'If it is any good we need to have a serious talk :-)']);
end;
option(6) = thld;
option(7) = type;
xn = x - xd; 


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/makesig.m
================================================
function [x,N] = makesig(SigName,N)
% [x,N] = makesig(SigName,N) Creates artificial test signal identical to the
%     standard test signals proposed and used by D. Donoho and I. Johnstone
%     in WaveLab (- a matlab toolbox developed by Donoho et al. the statistics
%     department at Stanford University).
%
%    Input:  SigName - Name of the desired signal (Default 'all')
%                        'AllSig' (Returns a matrix with all the signals)
%                        'HeaviSine'
%                        'Bumps'
%                        'Blocks'
%                        'Doppler'
%                        'Ramp'
%                        'Cusp'
%                        'Sing'
%                        'HiSine'
%                        'LoSine'
%                        'LinChirp'
%                        'TwoChirp'
%                        'QuadChirp'
%                        'MishMash'
%                        'WernerSorrows' (Heisenberg)
%                        'Leopold' (Kronecker)
%            N       - Length in samples of the desired signal (Default 512)
%
%    Output: x   - vector/matrix of test signals
%            N   - length of signal returned
%
%    See also: 
%
%    References:
%            WaveLab can be accessed at
%            www_url: http://playfair.stanford.edu/~wavelab/
%            Also see various articles by D.L. Donoho et al. at
%            web_url: http://playfair.stanford.edu/
%
%Author: Jan Erik Odegard  <odegard@ece.rice.edu>
%This m-file is a copy of the  code provided with WaveLab
%customized to be consistent with RWT.

if(nargin < 1)
  SigName = 'AllSig';
  N = 512;
elseif(nargin == 1)
  N = 512;
end;
t = (1:N) ./N;
x = [];
y = [];
if(strcmp(SigName,'HeaviSine') | strcmp(SigName,'AllSig')),
  y = 4.*sin(4*pi.*t);
  y = y - sign(t - .3) - sign(.72 - t);
end;
x = [x;y];
y = [];
if(strcmp(SigName,'Bumps') | strcmp(SigName,'AllSig')),
  pos = [ .1 .13 .15 .23 .25 .40 .44 .65  .76 .78 .81];
  hgt = [ 4  5   3   4  5  4.2 2.1 4.3  3.1 5.1 4.2];
  wth = [.005 .005 .006 .01 .01 .03 .01 .01  .005 .008 .005];
  y = zeros(size(t));
  for j =1:length(pos)
    y = y + hgt(j)./( 1 + abs((t - pos(j))./wth(j))).^4;
  end 
end;
x = [x;y];
y = [];
if(strcmp(SigName,'Blocks') | strcmp(SigName,'AllSig')),
  pos = [ .1 .13 .15 .23 .25 .40 .44 .65  .76 .78 .81];
  hgt = [4 (-5) 3 (-4) 5 (-4.2) 2.1 4.3  (-3.1) 2.1 (-4.2)];
  y = zeros(size(t));
  for j=1:length(pos)
    y = y + (1 + sign(t-pos(j))).*(hgt(j)/2) ;
  end
end;
x = [x;y];
y = [];
if(strcmp(SigName,'Doppler') | strcmp(SigName,'AllSig')),
  y = sqrt(t.*(1-t)).*sin((2*pi*1.05) ./(t+.05));
end;
x = [x;y];
y = [];
if(strcmp(SigName,'Ramp') | strcmp(SigName,'AllSig')),
  y = t - (t >= .37);
end;
x = [x;y];
y = [];
if(strcmp(SigName,'Cusp') | strcmp(SigName,'AllSig')),
  y = sqrt(abs(t - .37));
end;
x = [x;y];
y = [];
if(strcmp(SigName,'Sing') | strcmp(SigName,'AllSig')),
  k = floor(N * .37);
  y = 1 ./abs(t - (k+.5)/N);
end;
x = [x;y];
y = [];
if(strcmp(SigName,'HiSine') | strcmp(SigName,'AllSig')),
  y = sin( pi * (N * .6902) .* t);
end;
x = [x;y];
y = [];
if(strcmp(SigName,'LoSine') | strcmp(SigName,'AllSig')),
  y = sin( pi * (N * .3333) .* t);
end;
x = [x;y];
y = [];
if(strcmp(SigName,'LinChirp') | strcmp(SigName,'AllSig')),
  y = sin(pi .* t .* ((N .* .125) .* t));
end;
x = [x;y];
y = [];
if(strcmp(SigName,'TwoChirp') | strcmp(SigName,'AllSig')),
  y = sin(pi .* t .* (N .* t)) + sin((pi/3) .* t .* (N .* t));
end;
x = [x;y];
y = [];
if(strcmp(SigName,'QuadChirp') | strcmp(SigName,'AllSig')),
  y = sin( (pi/3) .* t .* (N .* t.^2));
end;
x = [x;y];
y = [];
if(strcmp(SigName,'MishMash') | strcmp(SigName,'AllSig')),  
  % QuadChirp + LinChirp + HiSine
  y = sin( (pi/3) .* t .* (N .* t.^2)) ;
  y = y +  sin( pi * (N * .6902) .* t);
  y = y +  sin(pi .* t .* (N .* .125 .* t));
end;
x = [x;y];
y = [];
if(strcmp(SigName,'WernerSorrows') | strcmp(SigName,'AllSig')),
  y = sin( pi .* t .* (N/2 .* t.^2)) ;
  y = y +  sin( pi * (N * .6902) .* t);
  y = y +  sin(pi .* t .* (N .* t));
  pos = [ .1 .13 .15 .23 .25 .40 .44 .65  .76 .78 .81];
  hgt = [ 4  5   3   4  5  4.2 2.1 4.3  3.1 5.1 4.2];
  wth = [.005 .005 .006 .01 .01 .03 .01 .01  .005 .008 .005];
  for j =1:length(pos)
    y = y + hgt(j)./( 1 + abs((t - pos(j))./wth(j))).^4;
  end 
end;
x = [x;y];
y = [];
if(strcmp(SigName,'Leopold') | strcmp(SigName,'AllSig')),
  y = (t == floor(.37 * N)/N); 		% Kronecker
end;
x = [x;y];
y = [];

%  disp(sprintf('MakeSignal: I don*t recognize << %s>>',SigName))
%  disp('Allowable SigNames are:')
%  disp('AllSig'),
%  disp('HeaviSine'),
%  disp('Bumps'),
%  disp('Blocks'),
%  disp('Doppler'),
%  disp('Ramp'),
%  disp('Cusp'),
%  disp('Crease'),
%  disp('Sing'),
%  disp('HiSine'),
%  disp('LoSine'),
%  disp('LinChirp'),
%  disp('TwoChirp'),
%  disp('QuadChirp'),
%  disp('MishMash'),
%  disp('WernerSorrows'),
%  disp('Leopold'),
%end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/mdwt.m
================================================
function [y,L] = mdwt(x,h,L)
%    [y,L] = mdwt(x,h,L);
%
%    Function computes the discrete wavelet transform y for a 1D or 2D input
%    signal x using the scaling filter h.
%
%    Input:
%	x : finite length 1D or 2D signal (implicitly periodized)
%       h : scaling filter
%       L : number of levels. In the case of a 1D signal, length(x) must be
%           divisible by 2^L; in the case of a 2D signal, the row and the
%           column dimension must be divisible by 2^L. If no argument is
%           specified, a full DWT is returned for maximal possible L.
%
%    Output:
%       y : the wavelet transform of the signal 
%           (see example to understand the coefficients)
%       L : number of decomposition levels
%
%    1D Example:
%       x = makesig('LinChirp',8);
%       h = daubcqf(4,'min');
%       L = 2;
%       [y,L] = mdwt(x,h,L)
%
%    1D Example's  output and explanation:
%
%       y = [1.1097 0.8767 0.8204 -0.5201 -0.0339 0.1001 0.2201 -0.1401]
%       L = 2
%
%    The coefficients in output y are arranged as follows
%
%       y(1) and y(2) : Scaling coefficients (lowest frequency)
%       y(3) and y(4) : Band pass wavelet coefficients
%       y(5) to y(8)  : Finest scale wavelet coefficients (highest frequency)
%
%    2D Example:
%
%       load test_image        
%       h = daubcqf(4,'min');
%       L = 1;
%       [y,L] = mdwt(test_image,h,L);
%
%    2D Example's  output and explanation:
%
%       The coefficients in y are arranged as follows.
%
%              .------------------.
%              |         |        |
%              |    4    |   2    |
%              |         |        |
%              |   L,L   |   H,L  |
%              |         |        |
%              --------------------
%              |         |        |
%              |    3    |   1    |
%              |         |        |
%              |   L,H   |  H,H   |
%              |         |        |
%              `------------------'
%       
%       where 
%            1 : High pass vertically and high pass horizontally
%            2 : Low pass vertically and high pass horizontally
%            3 : High pass vertically and low  pass horizontally
%            4 : Low pass vertically and Low pass horizontally 
%                (scaling coefficients)
%
%
%
%
%    See also: midwt, mrdwt, mirdwt
%
%Author: Markus Lang  <lang@jazz.rice.edu>
if exist('OCTAVE_VERSION', 'builtin')
  x = x * 1.0;
  if (exist('L'))
    [y,L] = omdwt(x,h,L);
  else  
    [y,L] = omdwt(x,h);
  end
else
  error('You must compile wavelet toolbox before use')
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/midwt.m
================================================
function [y,L] = midwt(x,h,L)
%    [x,L] = midwt(y,h,L);
% 
%    Function computes the inverse discrete wavelet transform x for a 1D or
%    2D input signal y using the scaling filter h.
%
%    Input:
%	y : finite length 1D or 2D input signal (implicitly periodized)
%           (see function mdwt to find the structure of y)
%       h : scaling filter
%       L : number of levels. In the case of a 1D signal, length(x) must be
%           divisible by 2^L; in the case of a 2D signal, the row and the
%           column dimension must be divisible by 2^L.  If no argument is
%           specified, a full inverse DWT is returned for maximal possible
%           L.
%
%    Output:
%       x : periodic reconstructed signal
%       L : number of decomposition levels
%
%    1D Example:
%       xin = makesig('LinChirp',8);
%       h = daubcqf(4,'min');
%       L = 1;
%       [y,L] = mdwt(xin,h,L);
%       [x,L] = midwt(y,h,L)
%
%    1D Example's  output:
%
%       x = 0.0491 0.1951 0.4276 0.7071 0.9415 0.9808 0.6716 0.0000
%       L = 1
%
%    See also: mdwt, mrdwt, mirdwt
%
%Author: Markus Lang  <lang@jazz.rice.edu>
if exist('OCTAVE_VERSION', 'builtin')
  if (exist('L'))
    [y,L] = omidwt(x,h,L);
  else  
    [y,L] = omidwt(x,h);
  end
else
  error('You must compile wavelet toolbox before use')
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/mirdwt.m
================================================
function [x,L] = mirdwt(yl,yh,h,L)
%    function [x,L] = mirdwt(yl,yh,h,L);
% 
%    Function computes the inverse redundant discrete wavelet
%    transform x  for a 1D or 2D input signal. (Redundant means here
%    that the sub-sampling after each stage of the forward transform
%    has been omitted.) yl contains the lowpass and yl the highpass
%    components as computed, e.g., by mrdwt. In the case of a 2D
%    signal, the ordering in
%    yh is [lh hl hh lh hl ... ] (first letter refers to row, second
%    to column filtering).  
%
%    Input:
%       yl : lowpass component
%       yh : highpass components
%       h  : scaling filter
%       L  : number of levels. In the case of a 1D signal, 
%            length(yl) must  be divisible by 2^L;
%            in the case of a 2D signal, the row and
%            the column dimension must be divisible by 2^L.
%   
%    Output:
%	     x : finite length 1D or 2D signal
%	     L : number of levels
%
%  HERE'S AN EASY WAY TO RUN THE EXAMPLES:
%  Cut-and-paste the example you want to run to a new file 
%  called ex.m, for example. Delete out the % at the beginning 
%  of each line in ex.m (Can use search-and-replace in your editor
%  to replace it with a space). Type 'ex' in matlab and hit return.
%
%
%    Example 1:
%    xin = makesig('Leopold',8);
%    h = daubcqf(4,'min');
%    L = 1;
%    [yl,yh,L] = mrdwt(xin,h,L);
%    [x,L] = mirdwt(yl,yh,h,L)
%    x = 0.0000 1.0000 0.0000 -0.0000 0 0 0 -0.0000
%    L = 1
%  
%    Example 2:  
%    load lena;
%    h = daubcqf(4,'min');
%    L = 2;
%    [ll_lev2,yh,L] = mrdwt(lena,h,L); % lena is a 256x256 matrix
%    N = 256;
%    lh_lev1 = yh(:,1:N); 
%    hl_lev1 = yh(:,N+1:2*N); 
%    hh_lev1 = yh(:,2*N+1:3*N);
%    lh_lev2 = yh(:,3*N+1:4*N); 
%    hl_lev2 = yh(:,4*N+1:5*N); 
%    hh_lev2 = yh(:,5*N+1:6*N);
%    figure; colormap(gray); imagesc(lena); title('Original Image');
%    figure; colormap(gray); imagesc(ll_lev2); title('LL Level 2');
%    figure; colormap(gray); imagesc(hh_lev2); title('HH Level 2');
%    figure; colormap(gray); imagesc(hl_lev2); title('HL Level 2');
%    figure; colormap(gray); imagesc(lh_lev2); title('LH Level 2');
%    figure; colormap(gray); imagesc(hh_lev1); title('HH Level 1');
%    figure; colormap(gray); imagesc(hl_lev2); title('HL Level 1');
%    figure; colormap(gray); imagesc(lh_lev2); title('LH Level 1');
%    [lena_Hat,L] = mirdwt(ll_lev2,yh,h,L);
%    figure; colormap(gray); imagesc(lena_Hat); 
%                            title('Reconstructed Image');
%
%    See also: mdwt, midwt, mrdwt
%
%    Warning! min(size(yl))/2^L should be greater than length(h)
%
%Author: Markus Lang  <lang@jazz.rice.edu>
if exist('OCTAVE_VERSION', 'builtin')
  yl = yl * 1.0;
  yh = yh * 1.0;
  if (exist('L'))
    [x,L] = omirdwt(yl,yh,h,L);
  else  
    [x,L] = omirdwt(yl,yh,h);
  end
else
  error('You must compile wavelet toolbox before use')
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/mrdwt.m
================================================
function [yl,yh,L] = mrdwt(x,h,L)
%    [yl,yh,L] = mrdwt(x,h,L);
% 
%    Function computes the redundant discrete wavelet transform y
%    for a 1D  or 2D input signal. (Redundant means here that the
%    sub-sampling after each stage is omitted.) yl contains the
%    lowpass and yh the highpass components. In the case of a 2D
%    signal, the ordering in yh is 
%    [lh hl hh lh hl ... ] (first letter refers to row, second to
%    column filtering). 
%
%    Input:
%	     x : finite length 1D or 2D signal (implicitly periodized)
%       h : scaling filter
%       L : number of levels. In the case of a 1D 
%           length(x) must be  divisible by 2^L;
%           in the case of a 2D signal, the row and the
%           column dimension must be divisible by 2^L.
%           If no argument is
%           specified, a full DWT is returned for maximal possible L.
%   
%    Output:
%       yl : lowpass component
%       yh : highpass components
%       L  : number of levels
%
%  HERE'S AN EASY WAY TO RUN THE EXAMPLES:
%  Cut-and-paste the example you want to run to a new file 
%  called ex.m, for example. Delete out the % at the beginning 
%  of each line in ex.m (Can use search-and-replace in your editor
%  to replace it with a space). Type 'ex' in matlab and hit return.
%
%
%    Example 1::
%    x = makesig('Leopold',8);
%    h = daubcqf(4,'min');
%    L = 1;
%    [yl,yh,L] = mrdwt(x,h,L)
%    yl =  0.8365  0.4830 0 0 0 0 -0.1294 0.2241
%    yh = -0.2241 -0.1294 0 0 0 0 -0.4830 0.8365
%    L = 1
%    Example 2:
%    load lena;
%    h = daubcqf(4,'min');
%    L = 2;
%    [ll_lev2,yh,L] = mrdwt(lena,h,L); % lena is a 256x256 matrix
%    N = 256;
%    lh_lev1 = yh(:,1:N); 
%    hl_lev1 = yh(:,N+1:2*N); 
%    hh_lev1 = yh(:,2*N+1:3*N);
%    lh_lev2 = yh(:,3*N+1:4*N); 
%    hl_lev2 = yh(:,4*N+1:5*N); 
%    hh_lev2 = yh(:,5*N+1:6*N);
%    figure; colormap(gray); imagesc(lena); title('Original Image');
%    figure; colormap(gray); imagesc(ll_lev2); title('LL Level 2');
%    figure; colormap(gray); imagesc(hh_lev2); title('HH Level 2');
%    figure; colormap(gray); imagesc(hl_lev2); title('HL Level 2');
%    figure; colormap(gray); imagesc(lh_lev2); title('LH Level 2');
%    figure; colormap(gray); imagesc(hh_lev1); title('HH Level 1');
%    figure; colormap(gray); imagesc(hl_lev2); title('HL Level 1');
%    figure; colormap(gray); imagesc(lh_lev2); title('LH Level 1');
%           
%    See also: mdwt, midwt, mirdwt
%
%    Warning! min(size(x))/2^L should be greater than length(h)
%
%Author: Markus Lang  <lang@jazz.rice.edu>
if exist('OCTAVE_VERSION', 'builtin')
  x = x * 1.0;
  if (exist('L'))
    [yl,yh,L] = omrdwt(x,h,L);
  else  
    [yl,yh,L] = omrdwt(x,h);
  end
else
  error('You must compile wavelet toolbox before use')
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/bin/setopt.m
================================================
function option = setopt(opt_par,default);
%    option = setopt(opt_par,default); 
%
%    SETOPT can modify a default option vector with user specified options.
%
%    Input: 
%       opt_par : Users desired option vector
%       default : Program default option vector
%
%    Output: 
%       option : New option vector
%
%    Example:
%       opt_par = [1 2 3 4];
%       default = [1 1 1 1];
%       option = setopt(opt_par,default)
%       option = 1     2     3     4
%
%Author: Jan Erik Odegard  <odegard@ece.rice.edu>

if (nargin < 2) 
  error('You need two inputs');
end;
len = length(opt_par);
option = zeros(size(default));
option(1:len) = opt_par(1:len);
option = option + (option == 0).*default; 


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.01/INSTALL
================================================
#################################################################################
#File Name: INSTALL 
#Last Modification Date:        11/16/95     10:30:38
#Current Version: INSTALL   1.13
#File Creation Date: Wed Aug 25 09:25:06 1993 
#Author: Ramesh Gopinath  <ramesh@dsp.rice.edu>  
#
#Copyright: All software, documentation, and related files in this distribution
#           are Copyright (c) 1993-1995 Rice University
#
#Permission is granted for use and non-profit distribution providing that this
#notice be clearly maintained. The right to distribute any portion for profit
#or as part of any commercial product is specifically reserved for the author.
#
#Change History:
#
#################################################################################
In order to install this distribution of wlet-tools:

1. Uncompress and extract the tar archive in the desired directory. 

   uncompress RWT.tar.Z
   tar xvf RWT.tar

   NOTE: New subdirectories (rice-wlet-tools and rice-atr-tools) will
   be generated in directory where you extract the archive.

2. cd rice-wlet-tools

3. make all

4. make install

5. Append the paths to the mex, mfile and wdemo directories. That is,
	in .cshrc add the following lines at the end:

   setenv RWT_HOME YOUR/LOCAL/PATH/TO
   setenv RWT_PATH $RWT_HOME/rice-wlet-tools/mex:$RWT_HOME/rice-wlet-tools/mfiles:\
	$RWT_HOME/rice-wlet-tools/wdemos:$RWT_HOME/rice-atr-tools/mex:\
	$RWT_HOME/rice-atr-tools/mfiles:$RWT_HOME/rice-atr-tools/sardemo:\
	$RWT_HOME
   setenv MATLABPATH $RWT_PATH':'$MATLABPATH

	where 

	YOUR/LOCAL/PATH/TO

		 is replaced with
	
	the actual path to the directory where rice-wlet-tools and
	rice-atr-tools are located on your system

   NOTE: If you do not have the environment variable MATLABPATH previously defined
   change the line

   setenv MATLABPATH $RWT_PATH':'$MATLABPATH

   to

   setenv MATLABPATH $RWT_PATH


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.01/README
================================================
#################################################################################
#File Name: README 
#Last Modification Date:        9/1/94     10:11:28
#Current Version: README   1.5
#File Creation Date: Wed Aug 25 09:25:06 1993 
#Author: Ramesh Gopinath  <ramesh@dsp.rice.edu>  
#
#Copyright: All software, documentation, and related files in this distribution
#           are Copyright (c) 1993  Rice University
#
#Permission is granted for use and non-profit distribution providing that this
#notice be clearly maintained. The right to distribute any portion for profit
#or as part of any commercial product is specifically reserved for the author.
#
#Change History:
#
#################################################################################
This "rice-wlet-tools", version 2.01
Released - <Mon Apr 18 16:51:38 1994>

INSTALLATION: 
To install this distribution of wlet-tools see INSTALL.

SOURCE:
     ftp:    cml.rice.edu (128.42.62.23) /pub/software
     mosaic: URL http://jazz.rice.edu

Associated references can be obtained from directory
/pub/dsp/papers and /pub/reports

EMAIL: 
For bug reports and questions send email to webmaster-dsp@ece.rice.edu

CONDITIONS FOR USE:
       This software is Copyright (C) Rice University 1993.
	You have the right to use, free of charge, with the following terms 
        and conditions:

      (1) You can redistribute this software in source form.  If you
            redistribute this software in compiled form you will include
            the source code.
      (2) You can distribute your own applications that link this software
            if you include the source code for this software.
      (3) You own full rights to any output files you generate with this
            software.
      (4) You can make modifications to this software and use it for
            in-house use only.  Under no circumstances can modified software
            be redistributed.
      (5) If you make any modifications to this software you will send
            the changes by email to webmaster-dsp@ece.rice.edu
      (6) The DSP group at Rice University shall be credited should
            this software be used in in any form or written about in any
            publication.
      (7) This software is provided "as is", without warranty by Rice University.
     	    In no event shall Rice University be liable for any loss or for any
            indirect, special, punitive, exemplary, incidental, or
            consequential damages arising from the use, possession or
            performance of this software.

---------
ALTERNATIVE WAY OF GETTING TO SOFTWARE AND REPORTS
(THIS MIGHT BE DISCONTINUED SINCE IT IS NOT ROBUST):
     It can also be obtained (usually) with the following command on unix systems:

     %telnet dsp.rice.edu 5555 |sed '1,3d' | csh -fbs software
 OR  %telnet 128.42.4.62 5555 |sed '1,3d' | csh -fbs software

You probably want to add

	alias riceget "telnet 128.42.4.62 5555 |sed '1,3d' | csh -fsb"

so that you can access the distribution (which will hopefully be updated periodically)

	%riceget OPTIONS

where options is a list of options. 

	%riceget help

would return all options currently available.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.01/doc/index.html
================================================
  <HTML> <HEAD>
      <TITLE>Rice Wavelet Toolbox Documentation</TITLE>
    </HEAD>
    <BODY>

      <HR SIZE=15>

	<CENTER><H1>Rice Wavelet Toolbox Documentation Version
	2.01</H1></CENTER>

	<HR SIZE=15><P>

	    <TABLE BORDER>

	      <CAPTION> A <B>Y</B> in the <B>M-file</B> and/or the
	      <B>MEX-file</B> column indicates whether the given
	      function is implemented as a matlab M-file, a MEX-file
	      or both. An <B>X</B> in the <B>Depend</B> column will
	      indicate that although the function itself is not a
	      MEX-file it depends on subroutines written in C and
	      compiled as MEX-files for significant speedup on 2D
	      problems in particular.

		<TR><TH ALIGN=left>Function
		    <TH ALIGN=left>Description </TH>
		    <TH>M-file</TH>
		    <TH>MEX-file</TH>
		    <TH>Depend</TH>
		</TR>	    
		
		<TR>
		  <TD><!-- <A
		  HREF=/cgi-bin/mat-help?denoise+/../../RWT2/>denoise</A>-->
		  denoise </TD>
		  <TD>Nonlinear wavelet denoising </TD>
		  <TD ALIGN=center>Y </TD>
		  <TD ALIGN=center> </TD>
		  <TD ALIGN=center>X </TD>
		</TR>

		<TR>
		  <TD><!--<A
		  HREF=/cgi-bin/mat-help?dwt+/RWT/doc/>dwt</A>--> dwt
		  </TD>
		  <TD>Computes the 1D and 2D discrete wavelet
		  transform </TD>
		  <TD ALIGN=center>Y </TD> 
		  <TD ALIGN=center>Y </TD>
		  <TD ALIGN=center> </TD>
		</TR>

		<TR>
		  <TD><!--<A
		  HREF=/cgi-bin/mat-help?hoelder+/RWT/doc/>hoeleder</A>-->
		  hoeleder </TD>
		  <TD>Estimate of the Hoelder exponent for a given
		  scaling function </TD>
		  <TD ALIGN=center>Y </TD>
		  <TD ALIGN=center> </TD>
		  <TD ALIGN=center> </TD>
		</TR>

		<TR>
		  <TD><!--<A
		  HREF=/cgi-bin/mat-help?makesig+/RWT/doc/>makesig</A>-->
		  makesig </TD>
		  <TD>Generates the 'Donoho' test signals  </TD>
		  <TD ALIGN=center>Y </TD>
		  <TD ALIGN=center> </TD>
		  <TD ALIGN=center> </TD>
		</TR>
	    </TABLE>
	    
	    <P>

	      <HR SIZE=4>

		The lastest version of the Rice Wavelet Toolbox is
		available in <A HREF="/software/rwt.shtml">Version
		2.3</A>

		<!-- <A HREF="/software/RWT/"><IMG ALT="Previous"
		SRC="/icon/back.gif"></A> <A HREF="/"><IMG ALT="DSP"
		SRC="/icon/dsp_home.gif"></A> --> </HR>

	      <HR SIZE=4>
		<!-- <ADDRESS> <A HREF="/~odegard/">
		&lt;webmaster-dsp@rice.edu&gt;</A> Send your feedback
		<A HREF="/~odegard/mailto.html"> here</A>
		</ADDRESS>--> </HR>
    </BODY>
  </HTML>


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.3/INSTALL
================================================
Installation instructions
-------------------------

In order to install this distribution of Rice Wavelet Tools version 2.3
released - <Dec 1 2000>

1. Properly set up your system to create MEX-files. Please refer to the
   "Matlab Application Program Guide" to properly set up of your matlab
   and C-compiler to be able to compile C-mex files on your system.
   All reference documentations are available on the MathWorks web page:
   www.mathworks.com

2. Make a toolbox directory and uncompress/extract all the files.
   For example, in the unix environment,

   	gunzip rwt.tar.gz
   	tar xvf rwt.tar

3. Run MATLAB and change to the temporary directory containing the files.

4. Compile the toolbox by executing the Matlab command 

   	compile

5. Add the toolbox directory to your Matlab path.

6. For further instructions, please refer to the README file.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.3/INSTALL_PRECOMPILED
================================================
Installation instructions
-------------------------

In order to install this distribution of Rice Wavelet Tools version 2.3
released - <Dec 1 2000>

1. Make a toolbox directory and uncompress/extract all the files.
   For example, in the unix environment,

   	gunzip rwt.tar.gz
   	tar xvf rwt.tar

2. Add the toolbox directory to your Matlab path.

3. For further instructions, please refer to the README file.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.3/LICENSE
================================================
This "rice-wlet-tools", version 2.3
Released - <Dec 1 2000>

CONDITIONS FOR USE:
Copyright (c) 2000 RICE UNIVERSITY. All rights reserved.

This software is distributed and licensed to you on a non-exclusive 
basis, free-of-charge. Redistribution and use in source and binary forms, 
with or without modification, are permitted provided that the following 
conditions are met:

1. Redistribution of source code must retain the above copyright notice, 
   this list of conditions and the following disclaimer.
2. Redistribution in binary form must reproduce the above copyright notice, 
   this list of conditions and the following disclaimer in the 
   documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software 
   must display the following acknowledgment: This product includes 
   software developed by Rice University, Houston, Texas and its contributors.
4. Neither the name of the University nor the names of its contributors 
   may be used to endorse or promote products derived from this software 
   without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY WILLIAM MARSH RICE UNIVERSITY, HOUSTON, TEXAS, 
AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, 
BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RICE UNIVERSITY 
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
OR BUSINESS INTERRUPTIONS) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
OTHERWISE), PRODUCT LIABILITY, OR OTHERWISE ARISING IN ANY WAY OUT OF THE 
USE OF THIS SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

For information on commercial licenses, contact Rice University's Office of 
Technology Transfer at techtran@rice.edu or (713) 348-6173


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/dist/2.3/README
================================================
Rice Wavelet Tools version 2.3
Released - <Dec 1 2000>

NEWER VERSION:
     A newer version of this toolbox is available at 
     http://www.dsp.rice.edu/software/rwt.shtml

INSTALLATION: 
To install this distribution of Rice Wavelet Tools see the INSTALL file.

SOURCE:
     www.dsp.rice.edu/software/rwt.shtml

EMAIL: 
For bug reports and questions, send email to webmaster-dsp@ece.rice.edu

CONDITIONS FOR USE:
See the LICENSE file

TOOLBOX FUNCTIONS:

 Wavelet Transforms
     mdwt - Discrete orthogonal wavelet transform using the Mallat algorithm (1D and 2D)
     midwt - Inverse discrete orthogonal wavelet transform
     mrdwt - Undecimated (redundant) discrete wavelet transform (1D and 2D)
     mirdwt - Inverse undecimated discrete wavelet transform
     daubcqf - Daubechies filter coefficients

 Wavelet Domain Processing
     denoise - Denoise signals and images by thresholding wavelet coefficients
     HardTh - Hard thresholding
     SoftTh - Soft thresholding

 Other
     makesig - Create Donoho-Johnstone test signals
     compile - Compile the Rice Wavelet Toolbox

Functions omitted in this version of toolbox can be found in 
version 2.01 at www.dsp.rice.edu/software/RWT2.01/RWT-2.01.tar.Z

This version may not compile with Matlab 6.0 (Release 12) and above. 
This problem has been fixed in version 2.4 at
www.dsp.rice.edu/software/rwt.shtml


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/doc/CMakeLists.txt
================================================
# add a target to generate API documentation with Doxygen
find_package(Doxygen)
if(DOXYGEN_FOUND)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
add_custom_target(doc
${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating API documentation with Doxygen" VERBATIM
)

find_package(LATEX)
find_program(DOXYFILE_MAKE make)
mark_as_advanced(DOXYFILE_MAKE)
add_custom_command(TARGET doc
        POST_BUILD
        COMMAND "${DOXYFILE_MAKE}"
        COMMENT "Running LaTeX for Doxygen documentation in ${CMAKE_CURRENT_SOURCE_DIR}/latex..."
        WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/latex")


endif(DOXYGEN_FOUND)


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/doc/Doxyfile.in
================================================
# Doxyfile 1.8.3.1

# This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project.
#
# All text after a hash (#) is considered a comment and will be ignored.
# The format is:
#       TAG = value [value, ...]
# For lists items can also be appended using:
#       TAG += value [value, ...]
# Values that contain spaces should be placed between quotes (" ").

#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------

# This tag specifies the encoding used for all characters in the config file
# that follow. The default is UTF-8 which is also the encoding used for all
# text before the first occurrence of this tag. Doxygen uses libiconv (or the
# iconv built into libc) for the transcoding. See
# http://www.gnu.org/software/libiconv for the list of possible encodings.

DOXYFILE_ENCODING      = UTF-8

# The PROJECT_NAME tag is a single word (or sequence of words) that should
# identify the project. Note that if you do not use Doxywizard you need
# to put quotes around the project name if it contains spaces.

PROJECT_NAME           = "Rice Wavelet Toolbox"

# The PROJECT_NUMBER tag can be used to enter a project or revision number.
# This could be handy for archiving the generated documentation or
# if some version control system is used.

PROJECT_NUMBER         =

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer
# a quick idea about the purpose of the project. Keep the description short.

PROJECT_BRIEF          =

# With the PROJECT_LOGO tag one can specify an logo or icon that is
# included in the documentation. The maximum height of the logo should not
# exceed 55 pixels and the maximum width should not exceed 200 pixels.
# Doxygen will copy the logo to the output directory.

PROJECT_LOGO           =

# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
# If a relative path is entered, it will be relative to the location
# where doxygen was started. If left blank the current directory will be used.

OUTPUT_DIRECTORY       =

# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
# 4096 sub-directories (in 2 levels) under the output directory of each output
# format and will distribute the generated files over these directories.
# Enabling this option can be useful when feeding doxygen a huge amount of
# source files, where putting all generated files in the same directory would
# otherwise cause performance problems for the file system.

CREATE_SUBDIRS         = NO

# The OUTPUT_LANGUAGE tag is used to specify the language in which all
# documentation generated by doxygen is written. Doxygen will use this
# information to generate all constant output in the proper language.
# The default language is English, other supported languages are:
# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.

OUTPUT_LANGUAGE        = English

# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
# include brief member descriptions after the members that are listed in
# the file and class documentation (similar to JavaDoc).
# Set to NO to disable this.

BRIEF_MEMBER_DESC      = YES

# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
# the brief description of a member or function before the detailed description.
# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
# brief descriptions will be completely suppressed.

REPEAT_BRIEF           = YES

# This tag implements a quasi-intelligent brief description abbreviator
# that is used to form the text in various listings. Each string
# in this list, if found as the leading text of the brief description, will be
# stripped from the text and the result after processing the whole list, is
# used as the annotated text. Otherwise, the brief description is used as-is.
# If left blank, the following values are used ("$name" is automatically
# replaced with the name of the entity): "The $name class" "The $name widget"
# "The $name file" "is" "provides" "specifies" "contains"
# "represents" "a" "an" "the"

ABBREVIATE_BRIEF       =

# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
# Doxygen will generate a detailed section even if there is only a brief
# description.

ALWAYS_DETAILED_SEC    = NO

# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
# inherited members of a class in the documentation of that class as if those
# members were ordinary class members. Constructors, destructors and assignment
# operators of the base classes will not be shown.

INLINE_INHERITED_MEMB  = NO

# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
# path before files name in the file list and in the header files. If set
# to NO the shortest path that makes the file name unique will be used.

FULL_PATH_NAMES        = YES

# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
# can be used to strip a user-defined part of the path. Stripping is
# only done if one of the specified strings matches the left-hand part of
# the path. The tag can be used to show relative paths in the file list.
# If left blank the directory from which doxygen is run is used as the
# path to strip. Note that you specify absolute paths here, but also
# relative paths, which will be relative from the directory where doxygen is
# started.

STRIP_FROM_PATH        = @CMAKE_CURRENT_SOURCE_DIR@/..

# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
# the path mentioned in the documentation of a class, which tells
# the reader which header file to include in order to use a class.
# If left blank only the name of the header file containing the class
# definition is used. Otherwise one should specify the include paths that
# are normally passed to the compiler using the -I flag.

STRIP_FROM_INC_PATH    =

# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
# (but less readable) file names. This can be useful if your file system
# doesn't support long names like on DOS, Mac, or CD-ROM.

SHORT_NAMES            = NO

# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
# will interpret the first line (until the first dot) of a JavaDoc-style
# comment as the brief description. If set to NO, the JavaDoc
# comments will behave just like regular Qt-style comments
# (thus requiring an explicit @brief command for a brief description.)

JAVADOC_AUTOBRIEF      = NO

# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
# interpret the first line (until the first dot) of a Qt-style
# comment as the brief description. If set to NO, the comments
# will behave just like regular Qt-style comments (thus requiring
# an explicit \brief command for a brief description.)

QT_AUTOBRIEF           = NO

# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
# treat a multi-line C++ special comment block (i.e. a block of //! or ///
# comments) as a brief description. This used to be the default behaviour.
# The new default is to treat a multi-line C++ comment block as a detailed
# description. Set this tag to YES if you prefer the old behaviour instead.

MULTILINE_CPP_IS_BRIEF = NO

# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
# member inherits the documentation from any documented member that it
# re-implements.

INHERIT_DOCS           = YES

# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
# a new page for each member. If set to NO, the documentation of a member will
# be part of the file/class/namespace that contains it.

SEPARATE_MEMBER_PAGES  = NO

# The TAB_SIZE tag can be used to set the number of spaces in a tab.
# Doxygen uses this value to replace tabs by spaces in code fragments.

TAB_SIZE               = 4

# This tag can be used to specify a number of aliases that acts
# as commands in the documentation. An alias has the form "name=value".
# For example adding "sideeffect=\par Side Effects:\n" will allow you to
# put the command \sideeffect (or @sideeffect) in the documentation, which
# will result in a user-defined paragraph with heading "Side Effects:".
# You can put \n's in the value part of an alias to insert newlines.

ALIASES                =

# This tag can be used to specify a number of word-keyword mappings (TCL only).
# A mapping has the form "name=value". For example adding
# "class=itcl::class" will allow you to use the command class in the
# itcl::class meaning.

TCL_SUBST              =

# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
# sources only. Doxygen will then generate output that is more tailored for C.
# For instance, some of the names that are used will be different. The list
# of all members will be omitted, etc.

OPTIMIZE_OUTPUT_FOR_C  = YES

# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
# sources only. Doxygen will then generate output that is more tailored for
# Java. For instance, namespaces will be presented as packages, qualified
# scopes will look different, etc.

OPTIMIZE_OUTPUT_JAVA   = NO

# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
# sources only. Doxygen will then generate output that is more tailored for
# Fortran.

OPTIMIZE_FOR_FORTRAN   = NO

# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
# sources. Doxygen will then generate output that is tailored for
# VHDL.

OPTIMIZE_OUTPUT_VHDL   = NO

# Doxygen selects the parser to use depending on the extension of the files it
# parses. With this tag you can assign which parser to use for a given
# extension. Doxygen has a built-in mapping, but you can override or extend it
# using this tag. The format is ext=language, where ext is a file extension,
# and language is one of the parsers supported by doxygen: IDL, Java,
# Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C,
# C++. For instance to make doxygen treat .inc files as Fortran files (default
# is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note
# that for custom extensions you also need to set FILE_PATTERNS otherwise the
# files are not read by doxygen.

EXTENSION_MAPPING      =

# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all
# comments according to the Markdown format, which allows for more readable
# documentation. See http://daringfireball.net/projects/markdown/ for details.
# The output of markdown processing is further processed by doxygen, so you
# can mix doxygen, HTML, and XML commands with Markdown formatting.
# Disable only in case of backward compatibilities issues.

MARKDOWN_SUPPORT       = YES

# When enabled doxygen tries to link words that correspond to documented classes,
# or namespaces to their corresponding documentation. Such a link can be
# prevented in individual cases by by putting a % sign in front of the word or
# globally by setting AUTOLINK_SUPPORT to NO.

AUTOLINK_SUPPORT       = YES

# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
# to include (a tag file for) the STL sources as input, then you should
# set this tag to YES in order to let doxygen match functions declarations and
# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
# func(std::string) {}). This also makes the inheritance and collaboration
# diagrams that involve STL classes more complete and accurate.

BUILTIN_STL_SUPPORT    = NO

# If you use Microsoft's C++/CLI language, you should set this option to YES to
# enable parsing support.

CPP_CLI_SUPPORT        = NO

# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
# Doxygen will parse them like normal C++ but will assume all classes use public
# instead of private inheritance when no explicit protection keyword is present.

SIP_SUPPORT            = NO

# For Microsoft's IDL there are propget and propput attributes to indicate
# getter and setter methods for a property. Setting this option to YES (the
# default) will make doxygen replace the get and set methods by a property in
# the documentation. This will only work if the methods are indeed getting or
# setting a simple type. If this is not the case, or you want to show the
# methods anyway, you should set this option to NO.

IDL_PROPERTY_SUPPORT   = YES

# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
# tag is set to YES, then doxygen will reuse the documentation of the first
# member in the group (if any) for the other members of the group. By default
# all members of a group must be documented explicitly.

DISTRIBUTE_GROUP_DOC   = NO

# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
# the same type (for instance a group of public functions) to be put as a
# subgroup of that type (e.g. under the Public Functions section). Set it to
# NO to prevent subgrouping. Alternatively, this can be done per class using
# the \nosubgrouping command.

SUBGROUPING            = YES

# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
# unions are shown inside the group in which they are included (e.g. using
# @ingroup) instead of on a separate page (for HTML and Man pages) or
# section (for LaTeX and RTF).

INLINE_GROUPED_CLASSES = NO

# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
# unions with only public data fields will be shown inline in the documentation
# of the scope in which they are defined (i.e. file, namespace, or group
# documentation), provided this scope is documented. If set to NO (the default),
# structs, classes, and unions are shown on a separate page (for HTML and Man
# pages) or section (for LaTeX and RTF).

INLINE_SIMPLE_STRUCTS  = NO

# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
# is documented as struct, union, or enum with the name of the typedef. So
# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
# with name TypeT. When disabled the typedef will appear as a member of a file,
# namespace, or class. And the struct will be named TypeS. This can typically
# be useful for C code in case the coding convention dictates that all compound
# types are typedef'ed and only the typedef is referenced, never the tag name.

TYPEDEF_HIDES_STRUCT   = NO

# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
# determine which symbols to keep in memory and which to flush to disk.
# When the cache is full, less often used symbols will be written to disk.
# For small to medium size projects (<1000 input files) the default value is
# probably good enough. For larger projects a too small cache size can cause
# doxygen to be busy swapping symbols to and from disk most of the time
# causing a significant performance penalty.
# If the system has enough physical memory increasing the cache will improve the
# performance by keeping more symbols in memory. Note that the value works on
# a logarithmic scale so increasing the size by one will roughly double the
# memory usage. The cache size is given by this formula:
# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
# corresponding to a cache size of 2^16 = 65536 symbols.

SYMBOL_CACHE_SIZE      = 0

# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
# their name and scope. Since this can be an expensive process and often the
# same symbol appear multiple times in the code, doxygen keeps a cache of
# pre-resolved symbols. If the cache is too small doxygen will become slower.
# If the cache is too large, memory is wasted. The cache size is given by this
# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0,
# corresponding to a cache size of 2^16 = 65536 symbols.

LOOKUP_CACHE_SIZE      = 0

#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------

# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
# documentation are documented, even if no documentation was available.
# Private class members and static file members will be hidden unless
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES

EXTRACT_ALL            = YES

# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
# will be included in the documentation.

EXTRACT_PRIVATE        = NO

# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
# scope will be included in the documentation.

EXTRACT_PACKAGE        = NO

# If the EXTRACT_STATIC tag is set to YES all static members of a file
# will be included in the documentation.

EXTRACT_STATIC         = NO

# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
# defined locally in source files will be included in the documentation.
# If set to NO only classes defined in header files are included.

EXTRACT_LOCAL_CLASSES  = YES

# This flag is only useful for Objective-C code. When set to YES local
# methods, which are defined in the implementation section but not in
# the interface are included in the documentation.
# If set to NO (the default) only methods in the interface are included.

EXTRACT_LOCAL_METHODS  = NO

# If this flag is set to YES, the members of anonymous namespaces will be
# extracted and appear in the documentation as a namespace called
# 'anonymous_namespace{file}', where file will be replaced with the base
# name of the file that contains the anonymous namespace. By default
# anonymous namespaces are hidden.

EXTRACT_ANON_NSPACES   = NO

# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
# undocumented members of documented classes, files or namespaces.
# If set to NO (the default) these members will be included in the
# various overviews, but no documentation section is generated.
# This option has no effect if EXTRACT_ALL is enabled.

HIDE_UNDOC_MEMBERS     = NO

# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
# undocumented classes that are normally visible in the class hierarchy.
# If set to NO (the default) these classes will be included in the various
# overviews. This option has no effect if EXTRACT_ALL is enabled.

HIDE_UNDOC_CLASSES     = NO

# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
# friend (class|struct|union) declarations.
# If set to NO (the default) these declarations will be included in the
# documentation.

HIDE_FRIEND_COMPOUNDS  = NO

# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
# documentation blocks found inside the body of a function.
# If set to NO (the default) these blocks will be appended to the
# function's detailed documentation block.

HIDE_IN_BODY_DOCS      = NO

# The INTERNAL_DOCS tag determines if documentation
# that is typed after a \internal command is included. If the tag is set
# to NO (the default) then the documentation will be excluded.
# Set it to YES to include the internal documentation.

INTERNAL_DOCS          = NO

# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
# file names in lower-case letters. If set to YES upper-case letters are also
# allowed. This is useful if you have classes or files whose names only differ
# in case and if your file system supports case sensitive file names. Windows
# and Mac users are advised to set this option to NO.

CASE_SENSE_NAMES       = NO

# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
# will show members with their full class and namespace scopes in the
# documentation. If set to YES the scope will be hidden.

HIDE_SCOPE_NAMES       = NO

# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
# will put a list of the files that are included by a file in the documentation
# of that file.

SHOW_INCLUDE_FILES     = YES

# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
# will list include files with double quotes in the documentation
# rather than with sharp brackets.

FORCE_LOCAL_INCLUDES   = NO

# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
# is inserted in the documentation for inline members.

INLINE_INFO            = YES

# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
# will sort the (detailed) documentation of file and class members
# alphabetically by member name. If set to NO the members will appear in
# declaration order.

SORT_MEMBER_DOCS       = YES

# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
# brief documentation of file, namespace and class members alphabetically
# by member name. If set to NO (the default) the members will appear in
# declaration order.

SORT_BRIEF_DOCS        = NO

# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
# will sort the (brief and detailed) documentation of class members so that
# constructors and destructors are listed first. If set to NO (the default)
# the constructors will appear in the respective orders defined by
# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.

SORT_MEMBERS_CTORS_1ST = NO

# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
# hierarchy of group names into alphabetical order. If set to NO (the default)
# the group names will appear in their defined order.

SORT_GROUP_NAMES       = NO

# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
# sorted by fully-qualified names, including namespaces. If set to
# NO (the default), the class list will be sorted only by class name,
# not including the namespace part.
# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
# Note: This option applies only to the class list, not to the
# alphabetical list.

SORT_BY_SCOPE_NAME     = NO

# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
# do proper type resolution of all parameters of a function it will reject a
# match between the prototype and the implementation of a member function even
# if there is only one candidate or it is obvious which candidate to choose
# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
# will still accept a match between prototype and implementation in such cases.

STRICT_PROTO_MATCHING  = NO

# The GENERATE_TODOLIST tag can be used to enable (YES) or
# disable (NO) the todo list. This list is created by putting \todo
# commands in the documentation.

GENERATE_TODOLIST      = YES

# The GENERATE_TESTLIST tag can be used to enable (YES) or
# disable (NO) the test list. This list is created by putting \test
# commands in the documentation.

GENERATE_TESTLIST      = YES

# The GENERATE_BUGLIST tag can be used to enable (YES) or
# disable (NO) the bug list. This list is created by putting \bug
# commands in the documentation.

GENERATE_BUGLIST       = YES

# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
# disable (NO) the deprecated list. This list is created by putting
# \deprecated commands in the documentation.

GENERATE_DEPRECATEDLIST= YES

# The ENABLED_SECTIONS tag can be used to enable conditional
# documentation sections, marked by \if section-label ... \endif
# and \cond section-label ... \endcond blocks.

ENABLED_SECTIONS       =

# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
# the initial value of a variable or macro consists of for it to appear in
# the documentation. If the initializer consists of more lines than specified
# here it will be hidden. Use a value of 0 to hide initializers completely.
# The appearance of the initializer of individual variables and macros in the
# documentation can be controlled using \showinitializer or \hideinitializer
# command in the documentation regardless of this setting.

MAX_INITIALIZER_LINES  = 30

# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
# at the bottom of the documentation of classes and structs. If set to YES the
# list will mention the files that were used to generate the documentation.

SHOW_USED_FILES        = YES

# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
# This will remove the Files entry from the Quick Index and from the
# Folder Tree View (if specified). The default is YES.

SHOW_FILES             = YES

# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
# Namespaces page.
# This will remove the Namespaces entry from the Quick Index
# and from the Folder Tree View (if specified). The default is YES.

SHOW_NAMESPACES        = YES

# The FILE_VERSION_FILTER tag can be used to specify a program or script that
# doxygen should invoke to get the current version for each file (typically from
# the version control system). Doxygen will invoke the program by executing (via
# popen()) the command <command> <input-file>, where <command> is the value of
# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
# provided by doxygen. Whatever the program writes to standard output
# is used as the file version. See the manual for examples.

FILE_VERSION_FILTER    =

# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
# by doxygen. The layout file controls the global structure of the generated
# output files in an output format independent way. To create the layout file
# that represents doxygen's defaults, run doxygen with the -l option.
# You can optionally specify a file name after the option, if omitted
# DoxygenLayout.xml will be used as the name of the layout file.

LAYOUT_FILE            =

# The CITE_BIB_FILES tag can be used to specify one or more bib files
# containing the references data. This must be a list of .bib files. The
# .bib extension is automatically appended if omitted. Using this command
# requires the bibtex tool to be installed. See also
# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
# feature you need bibtex and perl available in the search path. Do not use
# file names with spaces, bibtex cannot handle them.

CITE_BIB_FILES         =

#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------

# The QUIET tag can be used to turn on/off the messages that are generated
# by doxygen. Possible values are YES and NO. If left blank NO is used.

QUIET                  = NO

# The WARNINGS tag can be used to turn on/off the warning messages that are
# generated by doxygen. Possible values are YES and NO. If left blank
# NO is used.

WARNINGS               = YES

# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
# automatically be disabled.

WARN_IF_UNDOCUMENTED   = YES

# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
# potential errors in the documentation, such as not documenting some
# parameters in a documented function, or documenting parameters that
# don't exist or using markup commands wrongly.

WARN_IF_DOC_ERROR      = YES

# The WARN_NO_PARAMDOC option can be enabled to get warnings for
# functions that are documented, but have no documentation for their parameters
# or return value. If set to NO (the default) doxygen will only warn about
# wrong or incomplete parameter documentation, but not about the absence of
# documentation.

WARN_NO_PARAMDOC       = NO

# The WARN_FORMAT tag determines the format of the warning messages that
# doxygen can produce. The string should contain the $file, $line, and $text
# tags, which will be replaced by the file and line number from which the
# warning originated and the warning text. Optionally the format may contain
# $version, which will be replaced by the version of the file (if it could
# be obtained via FILE_VERSION_FILTER)

WARN_FORMAT            = "$file:$line: $text"

# The WARN_LOGFILE tag can be used to specify a file to which warning
# and error messages should be written. If left blank the output is written
# to stderr.

WARN_LOGFILE           =

#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------

# The INPUT tag can be used to specify the files and/or directories that contain
# documented source files. You may enter file names like "myfile.cpp" or
# directories like "/usr/src/myproject". Separate the files or directories
# with spaces.

INPUT                  = @CMAKE_CURRENT_SOURCE_DIR@/../src @CMAKE_CURRENT_SOURCE_DIR@/../lib/src @CMAKE_CURRENT_SOURCE_DIR@/../lib/inc @CMAKE_CURRENT_SOURCE_DIR@/../mex


# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
# also the default input encoding. Doxygen uses libiconv (or the iconv built
# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
# the list of possible encodings.

INPUT_ENCODING         = UTF-8

# If the value of the INPUT tag contains directories, you can use the
# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
# and *.h) to filter out the source-files in the directories. If left
# blank the following patterns are tested:
# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
# *.f90 *.f *.for *.vhd *.vhdl

FILE_PATTERNS          =

# The RECURSIVE tag can be used to turn specify whether or not subdirectories
# should be searched for input files as well. Possible values are YES and NO.
# If left blank NO is used.

RECURSIVE              = NO

# The EXCLUDE tag can be used to specify files and/or directories that should be
# excluded from the INPUT source files. This way you can easily exclude a
# subdirectory from a directory tree whose root is specified with the INPUT tag.
# Note that relative paths are relative to the directory from which doxygen is
# run.

EXCLUDE                =

# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded
# from the input.

EXCLUDE_SYMLINKS       = NO

# If the value of the INPUT tag contains directories, you can use the
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
# certain files from those directories. Note that the wildcards are matched
# against the file with absolute path, so to exclude all test directories
# for example use the pattern */test/*

EXCLUDE_PATTERNS       =

# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
# (namespaces, classes, functions, etc.) that should be excluded from the
# output. The symbol name can be a fully qualified name, a word, or if the
# wildcard * is used, a substring. Examples: ANamespace, AClass,
# AClass::ANamespace, ANamespace::*Test

EXCLUDE_SYMBOLS        =

# The EXAMPLE_PATH tag can be used to specify one or more files or
# directories that contain example code fragments that are included (see
# the \include command).

EXAMPLE_PATH           =

# If the value of the EXAMPLE_PATH tag contains directories, you can use the
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
# and *.h) to filter out the source-files in the directories. If left
# blank all files are included.

EXAMPLE_PATTERNS       =

# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
# searched for input files to be used with the \include or \dontinclude
# commands irrespective of the value of the RECURSIVE tag.
# Possible values are YES and NO. If left blank NO is used.

EXAMPLE_RECURSIVE      = NO

# The IMAGE_PATH tag can be used to specify one or more files or
# directories that contain image that are included in the documentation (see
# the \image command).

IMAGE_PATH             =

# The INPUT_FILTER tag can be used to specify a program that doxygen should
# invoke to filter for each input file. Doxygen will invoke the filter program
# by executing (via popen()) the command <filter> <input-file>, where <filter>
# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
# input file. Doxygen will then use the output that the filter program writes
# to standard output.
# If FILTER_PATTERNS is specified, this tag will be
# ignored.

INPUT_FILTER           =

# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
# basis.
# Doxygen will compare the file name with each pattern and apply the
# filter if there is a match.
# The filters are a list of the form:
# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
# info on how filters are used. If FILTER_PATTERNS is empty or if
# non of the patterns match the file name, INPUT_FILTER is applied.

FILTER_PATTERNS        =

# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
# INPUT_FILTER) will be used to filter the input files when producing source
# files to browse (i.e. when SOURCE_BROWSER is set to YES).

FILTER_SOURCE_FILES    = NO

# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
# and it is also possible to disable source filtering for a specific pattern
# using *.ext= (so without naming a filter). This option only has effect when
# FILTER_SOURCE_FILES is enabled.

FILTER_SOURCE_PATTERNS =

# If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that
# is part of the input, its contents will be placed on the main page (index.html).
# This can be useful if you have a project on for instance GitHub and want reuse
# the introduction page also for the doxygen output.

USE_MDFILE_AS_MAINPAGE =

#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------

# If the SOURCE_BROWSER tag is set to YES then a list of source files will
# be generated. Documented entities will be cross-referenced with these sources.
# Note: To get rid of all source code in the generated output, make sure also
# VERBATIM_HEADERS is set to NO.

SOURCE_BROWSER         = YES

# Setting the INLINE_SOURCES tag to YES will include the body
# of functions and classes directly in the documentation.

INLINE_SOURCES         = YES

# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
# doxygen to hide any special comment blocks from generated source code
# fragments. Normal C, C++ and Fortran comments will always remain visible.

STRIP_CODE_COMMENTS    = YES

# If the REFERENCED_BY_RELATION tag is set to YES
# then for each documented function all documented
# functions referencing it will be listed.

REFERENCED_BY_RELATION = NO

# If the REFERENCES_RELATION tag is set to YES
# then for each documented function all documented entities
# called/used by that function will be listed.

REFERENCES_RELATION    = NO

# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
# link to the source code.
# Otherwise they will link to the documentation.

REFERENCES_LINK_SOURCE = YES

# If the USE_HTAGS tag is set to YES then the references to source code
# will point to the HTML generated by the htags(1) tool instead of doxygen
# built-in source browser. The htags tool is part of GNU's global source
# tagging system (see http://www.gnu.org/software/global/global.html). You
# will need version 4.8.6 or higher.

USE_HTAGS              = NO

# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
# will generate a verbatim copy of the header file for each class for
# which an include is specified. Set to NO to disable this.

VERBATIM_HEADERS       = YES

#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------

# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
# of all compounds will be generated. Enable this if the project
# contains a lot of classes, structs, unions or interfaces.

ALPHABETICAL_INDEX     = YES

# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
# in which this list will be split (can be a number in the range [1..20])

COLS_IN_ALPHA_INDEX    = 5

# In case all classes in a project start with a common prefix, all
# classes will be put under the same header in the alphabetical index.
# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
# should be ignored while generating the index headers.

IGNORE_PREFIX          =

#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------

# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
# generate HTML output.

GENERATE_HTML          = YES

# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
# put in front of it. If left blank `html' will be used as the default path.

HTML_OUTPUT            = html

# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
# doxygen will generate files with .html extension.

HTML_FILE_EXTENSION    = .html

# The HTML_HEADER tag can be used to specify a personal HTML header for
# each generated HTML page. If it is left blank doxygen will generate a
# standard header. Note that when using a custom header you are responsible
#  for the proper inclusion of any scripts and style sheets that doxygen
# needs, which is dependent on the configuration options used.
# It is advised to generate a default header using "doxygen -w html
# header.html footer.html stylesheet.css YourConfigFile" and then modify
# that header. Note that the header is subject to change so you typically
# have to redo this when upgrading to a newer version of doxygen or when
# changing the value of configuration settings such as GENERATE_TREEVIEW!

HTML_HEADER            =

# The HTML_FOOTER tag can be used to specify a personal HTML footer for
# each generated HTML page. If it is left blank doxygen will generate a
# standard footer.

HTML_FOOTER            =

# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
# style sheet that is used by each HTML page. It can be used to
# fine-tune the look of the HTML output. If left blank doxygen will
# generate a default style sheet. Note that it is recommended to use
# HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this
# tag will in the future become obsolete.

HTML_STYLESHEET        =

# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional
# user-defined cascading style sheet that is included after the standard
# style sheets created by doxygen. Using this option one can overrule
# certain style aspects. This is preferred over using HTML_STYLESHEET
# since it does not replace the standard style sheet and is therefor more
# robust against future updates. Doxygen will copy the style sheet file to
# the output directory.

HTML_EXTRA_STYLESHEET  =

# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
# other source files which should be copied to the HTML output directory. Note
# that these files will be copied to the base HTML output directory. Use the
# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
# files. In the HTML_STYLESHEET file, use the file name only. Also note that
# the files will be copied as-is; there are no commands or markers available.

HTML_EXTRA_FILES       =

# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
# Doxygen will adjust the colors in the style sheet and background images
# according to this color. Hue is specified as an angle on a colorwheel,
# see http://en.wikipedia.org/wiki/Hue for more information.
# For instance the value 0 represents red, 60 is yellow, 120 is green,
# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
# The allowed range is 0 to 359.

HTML_COLORSTYLE_HUE    = 220

# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
# the colors in the HTML output. For a value of 0 the output will use
# grayscales only. A value of 255 will produce the most vivid colors.

HTML_COLORSTYLE_SAT    = 100

# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
# the luminance component of the colors in the HTML output. Values below
# 100 gradually make the output lighter, whereas values above 100 make
# the output darker. The value divided by 100 is the actual gamma applied,
# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
# and 100 does not change the gamma.

HTML_COLORSTYLE_GAMMA  = 80

# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
# page will contain the date and time when the page was generated. Setting
# this to NO can help when comparing the output of multiple runs.

HTML_TIMESTAMP         = YES

# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
# documentation will contain sections that can be hidden and shown after the
# page has loaded.

HTML_DYNAMIC_SECTIONS  = NO

# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of
# entries shown in the various tree structured indices initially; the user
# can expand and collapse entries dynamically later on. Doxygen will expand
# the tree to such a level that at most the specified number of entries are
# visible (unless a fully collapsed tree already exceeds this amount).
# So setting the number of entries 1 will produce a full collapsed tree by
# default. 0 is a special value representing an infinite number of entries
# and will result in a full expanded tree by default.

HTML_INDEX_NUM_ENTRIES = 100

# If the GENERATE_DOCSET tag is set to YES, additional index files
# will be generated that can be used as input for Apple's Xcode 3
# integrated development environment, introduced with OSX 10.5 (Leopard).
# To create a documentation set, doxygen will generate a Makefile in the
# HTML output directory. Running make will produce the docset in that
# directory and running "make install" will install the docset in
# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
# it at startup.
# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
# for more information.

GENERATE_DOCSET        = NO

# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
# feed. A documentation feed provides an umbrella under which multiple
# documentation sets from a single provider (such as a company or product suite)
# can be grouped.

DOCSET_FEEDNAME        = "Doxygen generated docs"

# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
# should uniquely identify the documentation set bundle. This should be a
# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
# will append .docset to the name.

DOCSET_BUNDLE_ID       = org.doxygen.Project

# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely
# identify the documentation publisher. This should be a reverse domain-name
# style string, e.g. com.mycompany.MyDocSet.documentation.

DOCSET_PUBLISHER_ID    = org.doxygen.Publisher

# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.

DOCSET_PUBLISHER_NAME  = Publisher

# If the GENERATE_HTMLHELP tag is set to YES, additional index files
# will be generated that can be used as input for tools like the
# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
# of the generated HTML documentation.

GENERATE_HTMLHELP      = NO

# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
# be used to specify the file name of the resulting .chm file. You
# can add a path in front of the file if the result should not be
# written to the html output directory.

CHM_FILE               =

# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
# be used to specify the location (absolute path including file name) of
# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
# the HTML help compiler on the generated index.hhp.

HHC_LOCATION           =

# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
# controls if a separate .chi index file is generated (YES) or that
# it should be included in the master .chm file (NO).

GENERATE_CHI           = NO

# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
# is used to encode HtmlHelp index (hhk), content (hhc) and project file
# content.

CHM_INDEX_ENCODING     =

# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
# controls whether a binary table of contents is generated (YES) or a
# normal table of contents (NO) in the .chm file.

BINARY_TOC             = NO

# The TOC_EXPAND flag can be set to YES to add extra items for group members
# to the contents of the HTML help documentation and to the tree view.

TOC_EXPAND             = NO

# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
# that can be used as input for Qt's qhelpgenerator to generate a
# Qt Compressed Help (.qch) of the generated HTML documentation.

GENERATE_QHP           = NO

# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
# be used to specify the file name of the resulting .qch file.
# The path specified is relative to the HTML output folder.

QCH_FILE               =

# The QHP_NAMESPACE tag specifies the namespace to use when generating
# Qt Help Project output. For more information please see
# http://doc.trolltech.com/qthelpproject.html#namespace

QHP_NAMESPACE          = org.doxygen.Project

# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
# Qt Help Project output. For more information please see
# http://doc.trolltech.com/qthelpproject.html#virtual-folders

QHP_VIRTUAL_FOLDER     = doc

# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
# add. For more information please see
# http://doc.trolltech.com/qthelpproject.html#custom-filters

QHP_CUST_FILTER_NAME   =

# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
# custom filter to add. For more information please see
# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
# Qt Help Project / Custom Filters</a>.

QHP_CUST_FILTER_ATTRS  =

# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
# project's
# filter section matches.
# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
# Qt Help Project / Filter Attributes</a>.

QHP_SECT_FILTER_ATTRS  =

# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
# be used to specify the location of Qt's qhelpgenerator.
# If non-empty doxygen will try to run qhelpgenerator on the generated
# .qhp file.

QHG_LOCATION           =

# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
#  will be generated, which together with the HTML files, form an Eclipse help
# plugin. To install this plugin and make it available under the help contents
# menu in Eclipse, the contents of the directory containing the HTML and XML
# files needs to be copied into the plugins directory of eclipse. The name of
# the directory within the plugins directory should be the same as
# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
# the help appears.

GENERATE_ECLIPSEHELP   = NO

# A unique identifier for the eclipse help plugin. When installing the plugin
# the directory name containing the HTML and XML files should also have
# this name.

ECLIPSE_DOC_ID         = org.doxygen.Project

# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
# at top of each HTML page. The value NO (the default) enables the index and
# the value YES disables it. Since the tabs have the same information as the
# navigation tree you can set this option to NO if you already set
# GENERATE_TREEVIEW to YES.

DISABLE_INDEX          = NO

# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
# structure should be generated to display hierarchical information.
# If the tag value is set to YES, a side panel will be generated
# containing a tree-like index structure (just like the one that
# is generated for HTML Help). For this to work a browser that supports
# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
# Windows users are probably better off using the HTML help feature.
# Since the tree basically has the same information as the tab index you
# could consider to set DISABLE_INDEX to NO when enabling this option.

GENERATE_TREEVIEW      = NO

# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
# (range [0,1..20]) that doxygen will group on one line in the generated HTML
# documentation. Note that a value of 0 will completely suppress the enum
# values from appearing in the overview section.

ENUM_VALUES_PER_LINE   = 4

# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
# used to set the initial width (in pixels) of the frame in which the tree
# is shown.

TREEVIEW_WIDTH         = 250

# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
# links to external symbols imported via tag files in a separate window.

EXT_LINKS_IN_WINDOW    = NO

# Use this tag to change the font size of Latex formulas included
# as images in the HTML documentation. The default is 10. Note that
# when you change the font size after a successful doxygen run you need
# to manually remove any form_*.png images from the HTML output directory
# to force them to be regenerated.

FORMULA_FONTSIZE       = 14

# Use the FORMULA_TRANPARENT tag to determine whether or not the images
# generated for formulas are transparent PNGs. Transparent PNGs are
# not supported properly for IE 6.0, but are supported on all modern browsers.
# Note that when changing this option you need to delete any form_*.png files
# in the HTML output before the changes have effect.

FORMULA_TRANSPARENT    = YES

# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
# (see http://www.mathjax.org) which uses client side Javascript for the
# rendering instead of using prerendered bitmaps. Use this if you do not
# have LaTeX installed or if you want to formulas look prettier in the HTML
# output. When enabled you may also need to install MathJax separately and
# configure the path to it using the MATHJAX_RELPATH option.

USE_MATHJAX            = NO

# When MathJax is enabled you can set the default output format to be used for
# thA MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and
# SVG. The default value is HTML-CSS, which is slower, but has the best
# compatibility.

MATHJAX_FORMAT         = HTML-CSS

# When MathJax is enabled you need to specify the location relative to the
# HTML output directory using the MATHJAX_RELPATH option. The destination
# directory should contain the MathJax.js script. For instance, if the mathjax
# directory is located at the same level as the HTML output directory, then
# MATHJAX_RELPATH should be ../mathjax. The default value points to
# the MathJax Content Delivery Network so you can quickly see the result without
# installing MathJax.
# However, it is strongly recommended to install a local
# copy of MathJax from http://www.mathjax.org before deployment.

MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest

# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
# names that should be enabled during MathJax rendering.

MATHJAX_EXTENSIONS     =

# When the SEARCHENGINE tag is enabled doxygen will generate a search box
# for the HTML output. The underlying search engine uses javascript
# and DHTML and should work on any modern browser. Note that when using
# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
# (GENERATE_DOCSET) there is already a search function so this one should
# typically be disabled. For large projects the javascript based search engine
# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.

SEARCHENGINE           = YES

# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
# implemented using a web server instead of a web client using Javascript.
# There are two flavours of web server based search depending on the
# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for
# searching and an index file used by the script. When EXTERNAL_SEARCH is
# enabled the indexing and searching needs to be provided by external tools.
# See the manual for details.

SERVER_BASED_SEARCH    = NO

# When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP
# script for searching. Instead the search results are written to an XML file
# which needs to be processed by an external indexer. Doxygen will invoke an
# external search engine pointed to by the SEARCHENGINE_URL option to obtain
# the search results. Doxygen ships with an example indexer (doxyindexer) and
# search engine (doxysearch.cgi) which are based on the open source search engine
# library Xapian. See the manual for configuration details.

EXTERNAL_SEARCH        = NO

# The SEARCHENGINE_URL should point to a search engine hosted by a web server
# which will returned the search results when EXTERNAL_SEARCH is enabled.
# Doxygen ships with an example search engine (doxysearch) which is based on
# the open source search engine library Xapian. See the manual for configuration
# details.

SEARCHENGINE_URL       =

# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
# search data is written to a file for indexing by an external tool. With the
# SEARCHDATA_FILE tag the name of this file can be specified.

SEARCHDATA_FILE        = searchdata.xml

# When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the
# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
# projects and redirect the results back to the right project.

EXTERNAL_SEARCH_ID     =

# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
# projects other than the one defined by this configuration file, but that are
# all added to the same external search index. Each project needs to have a
# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id
# of to a relative location where the documentation can be found.
# The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ...

EXTRA_SEARCH_MAPPINGS  =

#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------

# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
# generate Latex output.

GENERATE_LATEX         = YES

# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
# put in front of it. If left blank `latex' will be used as the default path.

LATEX_OUTPUT           = latex

# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
# invoked. If left blank `latex' will be used as the default command name.
# Note that when enabling USE_PDFLATEX this option is only used for
# generating bitmaps for formulas in the HTML output, but not in the
# Makefile that is written to the output directory.

LATEX_CMD_NAME         = latex

# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
# generate index for LaTeX. If left blank `makeindex' will be used as the
# default command name.

MAKEINDEX_CMD_NAME     = makeindex

# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
# LaTeX documents. This may be useful for small projects and may help to
# save some trees in general.

COMPACT_LATEX          = NO

# The PAPER_TYPE tag can be used to set the paper type that is used
# by the printer. Possible values are: a4, letter, legal and
# executive. If left blank a4wide will be used.

PAPER_TYPE             = a4

# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
# packages that should be included in the LaTeX output.

EXTRA_PACKAGES         =

# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
# the generated latex document. The header should contain everything until
# the first chapter. If it is left blank doxygen will generate a
# standard header. Notice: only use this tag if you know what you are doing!

LATEX_HEADER           =

# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
# the generated latex document. The footer should contain everything after
# the last chapter. If it is left blank doxygen will generate a
# standard footer. Notice: only use this tag if you know what you are doing!

LATEX_FOOTER           =

# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
# is prepared for conversion to pdf (using ps2pdf). The pdf file will
# contain links (just like the HTML output) instead of page references
# This makes the output suitable for online browsing using a pdf viewer.

PDF_HYPERLINKS         = YES

# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
# plain latex in the generated Makefile. Set this option to YES to get a
# higher quality PDF documentation.

USE_PDFLATEX           = YES

# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
# command to the generated LaTeX files. This will instruct LaTeX to keep
# running if errors occur, instead of asking the user for help.
# This option is also used when generating formulas in HTML.

LATEX_BATCHMODE        = NO

# If LATEX_HIDE_INDICES is set to YES then doxygen will not
# include the index chapters (such as File Index, Compound Index, etc.)
# in the output.

LATEX_HIDE_INDICES     = NO

# If LATEX_SOURCE_CODE is set to YES then doxygen will include
# source code with syntax highlighting in the LaTeX output.
# Note that which sources are shown also depends on other settings
# such as SOURCE_BROWSER.

LATEX_SOURCE_CODE      = NO

# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
# http://en.wikipedia.org/wiki/BibTeX for more info.

LATEX_BIB_STYLE        = plain

#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------

# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
# The RTF output is optimized for Word 97 and may not look very pretty with
# other RTF readers or editors.

GENERATE_RTF           = NO

# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
# put in front of it. If left blank `rtf' will be used as the default path.

RTF_OUTPUT             = rtf

# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
# RTF documents. This may be useful for small projects and may help to
# save some trees in general.

COMPACT_RTF            = NO

# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
# will contain hyperlink fields. The RTF file will
# contain links (just like the HTML output) instead of page references.
# This makes the output suitable for online browsing using WORD or other
# programs which support those fields.
# Note: wordpad (write) and others do not support links.

RTF_HYPERLINKS         = NO

# Load style sheet definitions from file. Syntax is similar to doxygen's
# config file, i.e. a series of assignments. You only have to provide
# replacements, missing definitions are set to their default value.

RTF_STYLESHEET_FILE    =

# Set optional variables used in the generation of an rtf document.
# Syntax is similar to doxygen's config file.

RTF_EXTENSIONS_FILE    =

#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------

# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
# generate man pages

GENERATE_MAN           = NO

# The MAN_OUTPUT tag is used to specify where the man pages will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
# put in front of it. If left blank `man' will be used as the default path.

MAN_OUTPUT             = man

# The MAN_EXTENSION tag determines the extension that is added to
# the generated man pages (default is the subroutine's section .3)

MAN_EXTENSION          = .3

# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
# then it will generate one additional man file for each entity
# documented in the real man page(s). These additional files
# only source the real man page, but without them the man command
# would be unable to find the correct page. The default is NO.

MAN_LINKS              = NO

#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------

# If the GENERATE_XML tag is set to YES Doxygen will
# generate an XML file that captures the structure of
# the code including all documentation.

GENERATE_XML           = NO

# The XML_OUTPUT tag is used to specify where the XML pages will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
# put in front of it. If left blank `xml' will be used as the default path.

XML_OUTPUT             = xml

# The XML_SCHEMA tag can be used to specify an XML schema,
# which can be used by a validating XML parser to check the
# syntax of the XML files.

XML_SCHEMA             =

# The XML_DTD tag can be used to specify an XML DTD,
# which can be used by a validating XML parser to check the
# syntax of the XML files.

XML_DTD                =

# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
# dump the program listings (including syntax highlighting
# and cross-referencing information) to the XML output. Note that
# enabling this will significantly increase the size of the XML output.

XML_PROGRAMLISTING     = YES

#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------

# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
# generate an AutoGen Definitions (see autogen.sf.net) file
# that captures the structure of the code including all
# documentation. Note that this feature is still experimental
# and incomplete at the moment.

GENERATE_AUTOGEN_DEF   = NO

#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------

# If the GENERATE_PERLMOD tag is set to YES Doxygen will
# generate a Perl module file that captures the structure of
# the code including all documentation. Note that this
# feature is still experimental and incomplete at the
# moment.

GENERATE_PERLMOD       = NO

# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
# the necessary Makefile rules, Perl scripts and LaTeX code to be able
# to generate PDF and DVI output from the Perl module output.

PERLMOD_LATEX          = NO

# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
# nicely formatted so it can be parsed by a human reader.
# This is useful
# if you want to understand what is going on.
# On the other hand, if this
# tag is set to NO the size of the Perl module output will be much smaller
# and Perl will parse it just the same.

PERLMOD_PRETTY         = YES

# The names of the make variables in the generated doxyrules.make file
# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
# This is useful so different doxyrules.make files included by the same
# Makefile don't overwrite each other's variables.

PERLMOD_MAKEVAR_PREFIX =

#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------

# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
# evaluate all C-preprocessor directives found in the sources and include
# files.

ENABLE_PREPROCESSING   = YES

# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
# names in the source code. If set to NO (the default) only conditional
# compilation will be performed. Macro expansion can be done in a controlled
# way by setting EXPAND_ONLY_PREDEF to YES.

MACRO_EXPANSION        = NO

# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
# then the macro expansion is limited to the macros specified with the
# PREDEFINED and EXPAND_AS_DEFINED tags.

EXPAND_ONLY_PREDEF     = NO

# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
# pointed to by INCLUDE_PATH will be searched when a #include is found.

SEARCH_INCLUDES        = YES

# The INCLUDE_PATH tag can be used to specify one or more directories that
# contain include files that are not input files but should be processed by
# the preprocessor.

INCLUDE_PATH           =

# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
# patterns (like *.h and *.hpp) to filter out the header-files in the
# directories. If left blank, the patterns specified with FILE_PATTERNS will
# be used.

INCLUDE_FILE_PATTERNS  =

# The PREDEFINED tag can be used to specify one or more macro names that
# are defined before the preprocessor is started (similar to the -D option of
# gcc). The argument of the tag is a list of macros of the form: name
# or name=definition (no spaces). If the definition and the = are
# omitted =1 is assumed. To prevent a macro definition from being
# undefined via #undef or recursively expanded use the := operator
# instead of the = operator.

PREDEFINED             =

# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
# this tag can be used to specify a list of macro names that should be expanded.
# The macro definition that is found in the sources will be used.
# Use the PREDEFINED tag if you want to use a different macro definition that
# overrules the definition found in the source code.

EXPAND_AS_DEFINED      =

# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
# doxygen's preprocessor will remove all references to function-like macros
# that are alone on a line, have an all uppercase name, and do not end with a
# semicolon, because these will confuse the parser if not removed.

SKIP_FUNCTION_MACROS   = YES

#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------

# The TAGFILES option can be used to specify one or more tagfiles. For each
# tag file the location of the external documentation should be added. The
# format of a tag file without this location is as follows:
#
# TAGFILES = file1 file2 ...
# Adding location for the tag files is done as follows:
#
# TAGFILES = file1=loc1 "file2 = loc2" ...
# where "loc1" and "loc2" can be relative or absolute paths
# or URLs. Note that each tag file must have a unique name (where the name does
# NOT include the path). If a tag file is not located in the directory in which
# doxygen is run, you must also specify the path to the tagfile here.

TAGFILES               =

# When a file name is specified after GENERATE_TAGFILE, doxygen will create
# a tag file that is based on the input files it reads.

GENERATE_TAGFILE       =

# If the ALLEXTERNALS tag is set to YES all external classes will be listed
# in the class index. If set to NO only the inherited external classes
# will be listed.

ALLEXTERNALS           = NO

# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
# in the modules index. If set to NO, only the current project's groups will
# be listed.

EXTERNAL_GROUPS        = YES

# The PERL_PATH should be the absolute path and name of the perl script
# interpreter (i.e. the result of `which perl').

PERL_PATH              = /usr/bin/perl

#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------

# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
# or super classes. Setting the tag to NO turns the diagrams off. Note that
# this option also works with HAVE_DOT disabled, but it is recommended to
# install and use dot, since it yields more powerful graphs.

CLASS_DIAGRAMS         = YES

# You can define message sequence charts within doxygen comments using the \msc
# command. Doxygen will then run the mscgen tool (see
# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
# documentation. The MSCGEN_PATH tag allows you to specify the directory where
# the mscgen tool resides. If left empty the tool is assumed to be found in the
# default search path.

MSCGEN_PATH            =

# If set to YES, the inheritance and collaboration graphs will hide
# inheritance and usage relations if the target is undocumented
# or is not a class.

HIDE_UNDOC_RELATIONS   = YES

# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
# available from the path. This tool is part of Graphviz, a graph visualization
# toolkit from AT&T and Lucent Bell Labs. The other options in this section
# have no effect if this option is set to NO (the default)

HAVE_DOT               = YES

# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
# allowed to run in parallel. When set to 0 (the default) doxygen will
# base this on the number of processors available in the system. You can set it
# explicitly to a value larger than 0 to get control over the balance
# between CPU load and processing speed.

DOT_NUM_THREADS        = 0

# By default doxygen will use the Helvetica font for all dot files that
# doxygen generates. When you want a differently looking font you can specify
# the font name using DOT_FONTNAME. You need to make sure dot is able to find
# the font, which can be done by putting it in a standard location or by setting
# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
# directory containing the font.

DOT_FONTNAME           = Helvetica

# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
# The default size is 10pt.

DOT_FONTSIZE           = 10

# By default doxygen will tell dot to use the Helvetica font.
# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
# set the path where dot can find it.

DOT_FONTPATH           =

# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
# will generate a graph for each documented class showing the direct and
# indirect inheritance relations. Setting this tag to YES will force the
# CLASS_DIAGRAMS tag to NO.

CLASS_GRAPH            = YES

# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
# will generate a graph for each documented class showing the direct and
# indirect implementation dependencies (inheritance, containment, and
# class references variables) of the class with other documented classes.

COLLABORATION_GRAPH    = YES

# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
# will generate a graph for groups, showing the direct groups dependencies

GROUP_GRAPHS           = YES

# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
# collaboration diagrams in a style similar to the OMG's Unified Modeling
# Language.

UML_LOOK               = NO

# If the UML_LOOK tag is enabled, the fields and methods are shown inside
# the class node. If there are many fields or methods and many nodes the
# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS
# threshold limits the number of items for each type to make the size more
# managable. Set this to 0 for no limit. Note that the threshold may be
# exceeded by 50% before the limit is enforced.

UML_LIMIT_NUM_FIELDS   = 10

# If set to YES, the inheritance and collaboration graphs will show the
# relations between templates and their instances.

TEMPLATE_RELATIONS     = NO

# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
# tags are set to YES then doxygen will generate a graph for each documented
# file showing the direct and indirect include dependencies of the file with
# other documented files.

INCLUDE_GRAPH          = YES

# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
# documented header file showing the documented files that directly or
# indirectly include this file.

INCLUDED_BY_GRAPH      = YES

# If the CALL_GRAPH and HAVE_DOT options are set to YES then
# doxygen will generate a call dependency graph for every global function
# or class method. Note that enabling this option will significantly increase
# the time of a run. So in most cases it will be better to enable call graphs
# for selected functions only using the \callgraph command.

CALL_GRAPH             = YES

# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
# doxygen will generate a caller dependency graph for every global function
# or class method. Note that enabling this option will significantly increase
# the time of a run. So in most cases it will be better to enable caller
# graphs for selected functions only using the \callergraph command.

CALLER_GRAPH           = YES

# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
# will generate a graphical hierarchy of all classes instead of a textual one.

GRAPHICAL_HIERARCHY    = YES

# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES
# then doxygen will show the dependencies a directory has on other directories
# in a graphical way. The dependency relations are determined by the #include
# relations between the files in the directories.

DIRECTORY_GRAPH        = YES

# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
# generated by dot. Possible values are svg, png, jpg, or gif.
# If left blank png will be used. If you choose svg you need to set
# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
# visible in IE 9+ (other browsers do not have this requirement).

DOT_IMAGE_FORMAT       = png

# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
# enable generation of interactive SVG images that allow zooming and panning.
# Note that this requires a modern browser other than Internet Explorer.
# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
# visible. Older versions of IE do not have SVG support.

INTERACTIVE_SVG        = NO

# The tag DOT_PATH can be used to specify the path where the dot tool can be
# found. If left blank, it is assumed the dot tool can be found in the path.

DOT_PATH               =

# The DOTFILE_DIRS tag can be used to specify one or more directories that
# contain dot files that are included in the documentation (see the
# \dotfile command).

DOTFILE_DIRS           =

# The MSCFILE_DIRS tag can be used to specify one or more directories that
# contain msc files that are included in the documentation (see the
# \mscfile command).

MSCFILE_DIRS           =

# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
# nodes that will be shown in the graph. If the number of nodes in a graph
# becomes larger than this value, doxygen will truncate the graph, which is
# visualized by representing a node as a red box. Note that doxygen if the
# number of direct children of the root node in a graph is already larger than
# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.

DOT_GRAPH_MAX_NODES    = 50

# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
# graphs generated by dot. A depth value of 3 means that only nodes reachable
# from the root by following a path via at most 3 edges will be shown. Nodes
# that lay further from the root node will be omitted. Note that setting this
# option to 1 or 2 may greatly reduce the computation time needed for large
# code bases. Also note that the size of a graph can be further restricted by
# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.

MAX_DOT_GRAPH_DEPTH    = 0

# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
# background. This is disabled by default, because dot on Windows does not
# seem to support this out of the box. Warning: Depending on the platform used,
# enabling this option may lead to badly anti-aliased labels on the edges of
# a graph (i.e. they become hard to read).

DOT_TRANSPARENT        = NO

# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
# files in one run (i.e. multiple -o and -T options on the command line). This
# makes dot run faster, but since only newer versions of dot (>1.8.10)
# support this, this feature is disabled by default.

DOT_MULTI_TARGETS      = NO

# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
# generate a legend page explaining the meaning of the various boxes and
# arrows in the dot generated graphs.

GENERATE_LEGEND        = YES

# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
# remove the intermediate dot files that are used to generate
# the various graphs.

DOT_CLEANUP            = YES


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/inc/rwt_init.h
================================================
/*! \file rwt_init.h
    \brief Header for matlab init functions in init.c
*/
#ifndef RWT_INIT_H_
#define RWT_INIT_H_

#include "rwt_platform.h"

#if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE)
  #include "mex.h"
  #ifndef OCTAVE_MEX_FILE
    #include "matrix.h"
  #endif
  typedef struct {
    size_t nrows;     /*!< The number of rows in the input matrix. Output matrix will match.  */
    size_t ncols;     /*!< The number of columns in the input matrix. Output matrix will match. */
    int levels;       /*!< L, the number of levels for the transform. */
    int ncoeff;       /*!< Length of h / the number of scaling coefficients */
    double *scalings; /*!< Wavelet scaling coefficients */
  } rwt_init_params;
  typedef enum {NORMAL_DWT, REDUNDANT_DWT, INVERSE_DWT, INVERSE_REDUNDANT_DWT} transform_t;
#endif

#ifdef __cplusplus
extern "C" {
#endif

#if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE)
  rwt_init_params rwt_matlab_init(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[], transform_t dwtType);
#else
  int rwt_find_levels(size_t m, size_t n);
  int rwt_check_levels(int levels, size_t rows, size_t cols);
#endif

#ifdef __cplusplus
}
#endif

#endif /* RWT_INIT_H_ */


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/inc/rwt_platform.h
================================================
/*! \file rwt_platform.h
    \brief Abstract away environment differences and provide some common macros
*/
#ifndef RWT_PLATFORM_H
#define RWT_PLATFORM_H

#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>

/*! For MATLAB we address 2d inputs and outputs in column-major order */
/*! For Python we address 2d inputs and outputs in row-major order */
/*! The offset macros are for debugging */
/*! The parameters for the mat() macro are:
 *    a - the base pointer to the matrix of values
 *    i - index of the target row
 *    j - index of the target column
 *    m - the number of rows
 *    n - the number of columns
 */
#if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE)
  #define COLUMN_MAJOR_ORDER 1
  #include "mex.h"
  #ifndef OCTAVE_MEX_FILE
    #include "matrix.h"
  #endif
  #define mat(a, i, j, m, n) (*(a + (m*(j)+i)))
  #define mat_offset(a, i, j, m, n) (m*(j)+i)
  #define offset_row(offset, m, n) (offset % m)
  #define offset_col(offset, m, n) ((offset - (offset % m)) / m)
  #define rwt_printf(fmt, ...) mexPrintf(fmt, ##__VA_ARGS__)
  #define rwt_errormsg(msg) mexErrMsgTxt(msg)
#else
  #define ROW_MAJOR_ORDER 1
  #define mat(a, i, j, m, n) (*(a + (n*(i)+j)))
  #define mat_offset(a, i, j, m, n) (n*(i)+j)
  #define offset_row(offset, m, n) ((offset - (offset % n)) / n)
  #define offset_col(offset, m, n) (offset % n)
  #define rwt_printf(fmt, ...) printf(fmt, ##__VA_ARGS__)
  #define rwt_errormsg(msg) printf("\033[91m%s\033[0m\n", msg);
#endif

#ifndef max
  #define max(A,B) (A > B ? A : B)
#endif
#ifndef min
  #define min(A,B) (A < B ? A : B)
#endif
#define even(x)  ((x & 1) ? 0 : 1)

#ifdef __cplusplus
extern "C" {
#endif

void *rwt_malloc(size_t size);
void *rwt_calloc(size_t num, size_t size);
void rwt_free(void *ptr);

#ifdef __cplusplus
}
#endif

#endif


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/inc/rwt_transforms.h
================================================
/*! \file rwt_transforms.h
    \brief Function prototypes for the transform implementations
*/
#ifndef TRANSFORMS_H_
#define TRANSFORMS_H_

#include <math.h>

#ifdef __cplusplus
extern "C" {
#endif

/*! dwt and rdwt take an input x and store the result in y or yl and yh
 *  idwt and irdwt take an input y or yl and yh and store the result in x
 *  In all cases it is expected that the output array has already been
 *  allocated prior to calling the transform function.
 */
void   dwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *y);
void  idwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *y);
void  rdwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *yl, double *yh);
void irdwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *yl, double *yh);

#ifdef __cplusplus
}
#endif

#endif /* TRANSFORMS_H_ */


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/src/CMakeLists.txt
================================================
include_directories ("${PROJECT_SOURCE_DIR}/lib/inc")
add_library(dwt dwt.c)
add_library(idwt idwt.c)
add_library(irdwt irdwt.c)
add_library(rdwt rdwt.c)
add_library(platform platform.c)


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/src/dwt.c
================================================
/*! \file dwt.c
    \brief Implementation of the discrete wavelet transform

*/

#include "rwt_platform.h"

/*!
 * Perform convolution for dwt
 *
 * @param x_in input signal values
 * @param lx the length of x_in
 * @param coeff_low the low pass coefficients
 * @param coeff_high the high pass coefficients
 * @param ncoeff_minus_one one less than the number of scaling coefficients
 * @param x_out_low low pass results
 * @param x_out_high high pass results
 * 
 * For the convolution we will calculate the output of the lowpass and highpass filters in parallel
 *
 * Normally we can describe the calculation of a convolution as
 * \f$ (\textbf{w} * \textbf{z})_k = \frac{1}{N} \sum\limits_{l=0}^{2N-1} w_{k-l} \cdot z_{l} \f$
 *
 * Our actual implementation resembles this
 *
 */
void dwt_convolution(double *x_in, size_t lx, double *coeff_low, double *coeff_high, int ncoeff_minus_one, double *x_out_low, double *x_out_high) {
  size_t i, j, ind;
  double x0, x1;
  for (i=lx; i<lx+ncoeff_minus_one; i++) { 
    x_in[i] = *(x_in+(i-lx)); /*! extend x_in by creating a small mirror at the end of length ncoeff_minus_one */
  }
  ind = 0;
  for (i=0; i<(lx); i+=2) {   /*! Step through the input values, moving right 2 values each loop */
    x0 = 0;
    x1 = 0;
    for (j=0; j<=ncoeff_minus_one; j++) {                   /*! Take the high and low filters in reverse order */
      x0 = x0 + x_in[i+j] * coeff_low[ncoeff_minus_one-j];  /*! Sum the product of the next ncoeff values of x_in with the filter coefficients */
      x1 = x1 + x_in[i+j] * coeff_high[ncoeff_minus_one-j];
    }
    x_out_low[ind] = x0; /*! Place these calculated sums in the next position of the output */
    x_out_high[ind++] = x1;
  }
}


/*!
 * Allocate memory for dwt
 *
 * @param m      the number of rows of the input matrix
 * @param n      the number of columns of the input matrix
 * @param ncoeff the number of scaling coefficients
 * @param x_dummy      storage space for input data being passed to the convolution
 * @param y_dummy_low  storage space for low pass convolution results
 * @param y_dummy_high storage space for high pass convolution results
 * @param coeff_low    storage space for the low pass coefficients
 * @param coeff_high   storage space for the high pass coefficients
 *
 * The low pass and high pass filter coefficients are the same size as the scaling coefficients
 * For the output storage area we will need as much space as the input: m*n
 * For the input storage area we will need the same plus one less than the length of the coeffiecients
 */
void dwt_allocate(size_t m, size_t n, int ncoeff, double **x_dummy, double **y_dummy_low, double **y_dummy_high, double **coeff_low, double **coeff_high) {
  *x_dummy      = (double *) rwt_calloc(max(m,n)+ncoeff-1, sizeof(double));
  *y_dummy_low  = (double *) rwt_calloc(max(m,n),          sizeof(double));
  *y_dummy_high = (double *) rwt_calloc(max(m,n),          sizeof(double));
  *coeff_low    = (double *) rwt_calloc(ncoeff,            sizeof(double));
  *coeff_high   = (double *) rwt_calloc(ncoeff,            sizeof(double));
}


/*!
 * Free memory that we allocated for dwt
 *
 * @param x_dummy      storage space for input data being passed to the convolution
 * @param y_dummy_low  storage space for low pass convolution results
 * @param y_dummy_high storage space for high pass convolution results
 * @param coeff_low    storage space for the low pass coefficients
 * @param coeff_high   storage space for the high pass coefficients
 *
 */
void dwt_free(double **x_dummy, double **y_dummy_low, double **y_dummy_high, double **coeff_low, double **coeff_high) {
  rwt_free(*x_dummy);
  rwt_free(*y_dummy_low);
  rwt_free(*y_dummy_high);
  rwt_free(*coeff_low);
  rwt_free(*coeff_high);
}


/*!
 * Put the scaling coeffients into a form ready for use in the convolution function
 *
 * @param ncoeff length of h / the number of scaling coefficients
 * @param h  the wavelet scaling coefficients
 * @param coeff_low the low pass coefficients - reversed h
 * @param coeff_high the high pass coefficients - forward h, alternate values are sign flipped
 *
 * The coefficients of our Quadrature Mirror Filter are described by
 * \f$ g\left[lh - 1 - n \right] = (-1)^n * h\left[n\right] \f$
 *
 */
void dwt_coefficients(int ncoeff, double *h, double **coeff_low, double **coeff_high) {
  int i;
  for (i=0; i<ncoeff; i++) {
    (*coeff_low)[i] = h[(ncoeff-i)-1];
    (*coeff_high)[i] = h[i];
  }
  for (i=0; i<ncoeff; i+=2)
    (*coeff_high)[i] = -((*coeff_high)[i]);
}


/*!
 * Perform the discrete wavelet transform
 *
 * @param x      the input signal
 * @param nrows  number of rows in the input
 * @param ncols  number of columns in the input
 * @param h      wavelet scaling coefficients
 * @param ncoeff length of h / the number of scaling coefficients
 * @param levels the number of levels
 * @param y      the output signal with the wavelet transform applied
 *
 * The discrete wavelet transform begins with a set of samples of a signal whose length
 * is a power of 2. This exponent will be the maximum number of levels of the transform
 * that we can perform.
 *
 */
void dwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *y) {
  double  *coeff_low, *coeff_high, *y_dummy_low, *y_dummy_high, *x_dummy;
  long i;
  int current_level, ncoeff_minus_one;
  size_t current_rows, current_cols, row_cursor, column_cursor, idx_rows, idx_columns;

  if (ncols==1) { /*! Accept either column vectors or row vectors. Store the length in the variable n */
    ncols = nrows;
    nrows = 1;
  }
  
  dwt_allocate(nrows, ncols, ncoeff, &x_dummy, &y_dummy_low, &y_dummy_high, &coeff_low, &coeff_high);
  dwt_coefficients(ncoeff, h, &coeff_low, &coeff_high); /*! For performance, calculate what we can outside the loops */
  ncoeff_minus_one = ncoeff - 1;
  current_rows = 2*nrows; /*! current_rows and current_cols start at 2x since we divide by 2 at the start of the loop */
  current_cols = 2*ncols;
 
  for (current_level=1; current_level<=levels; current_level++) {
    if (nrows==1)
      current_rows = 1;
    else{
      current_rows = current_rows/2;
      row_cursor = current_rows/2;     
    }
    current_cols = current_cols/2;
    column_cursor = current_cols/2;

    for (idx_rows=0; idx_rows<current_rows; idx_rows++) {
      for (i=0; i<current_cols; i++)
	if (current_level==1)  
	  x_dummy[i] = mat(x, idx_rows, i, nrows, ncols);  
	else 
	  x_dummy[i] = mat(y, idx_rows, i, nrows, ncols);  
      /*! Perform filtering lowpass and highpass*/
      dwt_convolution(x_dummy, current_cols, coeff_low, coeff_high, ncoeff_minus_one, y_dummy_low, y_dummy_high); 
      /*! Restore dummy variables in matrices */
      idx_columns = column_cursor;
      for (i=0; i<column_cursor; i++) {    
	mat(y, idx_rows, i,             nrows, ncols) = y_dummy_low[i];  
	mat(y, idx_rows, idx_columns++, nrows, ncols) = y_dummy_high[i];  
      } 
    }  
    
    /*! For the 2d transform, we go through each of the columns after having gone through the rows */
    if (nrows>1) {
      for (idx_columns=0; idx_columns<current_cols; idx_columns++) { /* loop over columns */
	/*! Store in dummy variables */
	for (i=0; i<current_rows; i++)
	  x_dummy[i] = mat(y, i, idx_columns, nrows, ncols);  
	/*! Perform filtering lowpass and highpass*/
	dwt_convolution(x_dummy, current_rows, coeff_low, coeff_high, ncoeff_minus_one, y_dummy_low, y_dummy_high); 
	/*! Restore dummy variables in matrix */
	idx_rows = row_cursor;
	for (i=0; i<row_cursor; i++) {
	  mat(y, i,          idx_columns, nrows, ncols) = y_dummy_low[i];  
	  mat(y, idx_rows++, idx_columns, nrows, ncols) = y_dummy_high[i];  
	}
      }
    }
  }
  dwt_free(&x_dummy, &y_dummy_low, &y_dummy_high, &coeff_low, &coeff_high);
}


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/src/idwt.c
================================================
/*! \file idwt.c
    \brief Implementation of the inverse discrete wavelet transform

*/

#include "rwt_platform.h"

/*!
 * Perform convolution for idwt
 *
 * @param x_out
 * @param lx
 * @param coeff_low
 * @param coeff_high
 * @param ncoeff_minus_one
 * @param ncoeff_halved_minus_one
 * @param x_in_low
 * @param x_in_high
 * 
 */
void idwt_convolution(double *x_out, size_t lx, double *coeff_low, double *coeff_high, int ncoeff_minus_one, int ncoeff_halved_minus_one, double *x_in_low, double *x_in_high) {
  int k;
  size_t i, j, ind, tj;
  double x0, x1;

  for (k=ncoeff_halved_minus_one-1; k > -1; k--) {
    x_in_low[k]  = x_in_low[lx+k];
    x_in_high[k] = x_in_high[lx+k];
  }

  ind = 0;
  for (i=0; i<(lx); i++) {
    x0 = 0;
    x1 = 0;
    tj = 0;
    for (j=0; j<=ncoeff_halved_minus_one; j++) {
      x0 = x0 + (x_in_low[i+j] * coeff_low[ncoeff_minus_one-1-tj]) + (x_in_high[i+j] * coeff_high[ncoeff_minus_one-1-tj]);
      x1 = x1 + (x_in_low[i+j] * coeff_low[ncoeff_minus_one-tj])   + (x_in_high[i+j] * coeff_high[ncoeff_minus_one-tj]);
      tj += 2;
    }
    x_out[ind++] = x0;
    x_out[ind++] = x1;
  }
}


/*!
 * Allocate memory for idwt
 *
 * @param m the number of rows of the input matrix
 * @param n the number of columns of the input matrix
 * @param ncoeff the number of scaling coefficients
 * @param x_dummy
 * @param y_dummy_low
 * @param y_dummy_high
 * @param coeff_low
 * @param coeff_high
 *
 */
void idwt_allocate(size_t m, size_t n, int ncoeff, double **x_dummy, double **y_dummy_low, double **y_dummy_high, double **coeff_low, double **coeff_high) {
  *x_dummy      = (double *) rwt_calloc(max(m,n),            sizeof(double));
  *y_dummy_low  = (double *) rwt_calloc(max(m,n)+ncoeff/2-1, sizeof(double));
  *y_dummy_high = (double *) rwt_calloc(max(m,n)+ncoeff/2-1, sizeof(double));
  *coeff_low    = (double *) rwt_calloc(ncoeff,              sizeof(double));
  *coeff_high   = (double *) rwt_calloc(ncoeff,              sizeof(double));
}


/*!
 * Free memory we allocated for idwt
 *
 * @param x_dummy
 * @param y_dummy_low
 * @param y_dummy_high
 * @param coeff_low
 * @param coeff_high
 *
 */
void idwt_free(double **x_dummy, double **y_dummy_low, double **y_dummy_high, double **coeff_low, double **coeff_high) {
  rwt_free(*x_dummy);
  rwt_free(*y_dummy_low);
  rwt_free(*y_dummy_high);
  rwt_free(*coeff_low);
  rwt_free(*coeff_high);
}


/*!
 * Put the scaling coeffients into a form ready for use in the convolution function
 *
 * @param ncoeff length of h / the number of scaling coefficients
 * @param h  the wavelet scaling coefficients
 * @param coeff_low same as h
 * @param coeff_high reversed h, even values are sign flipped
 *
 */
void idwt_coefficients(int ncoeff, double *h, double **coeff_low, double **coeff_high) {
  int i;
  for (i=0; i<ncoeff; i++) {
    (*coeff_low)[i] = h[i];
    (*coeff_high)[i] = h[ncoeff-i-1];
  }
  for (i=1; i<=ncoeff; i+=2)
    (*coeff_high)[i] = -((*coeff_high)[i]);
}


/*!
 * Perform the inverse discrete wavelet transform
 *
 * @param x      the output signal with the inverse wavelet transform applied
 * @param nrows  number of rows in the input
 * @param ncols  number of columns in the input
 * @param h      wavelet scaling coefficients
 * @param ncoeff the number of scaling coefficients
 * @param levels the number of levels
 * @param y      the input signal
 *
 */
void idwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *y) {
  double  *coeff_low, *coeff_high, *y_dummy_low, *y_dummy_high, *x_dummy;
  long i;
  int current_level, ncoeff_minus_one, ncoeff_halved_minus_one, sample_f;
  size_t current_rows, current_cols, row_cursor, column_cursor, idx_rows, idx_cols;

  idwt_allocate(nrows, ncols, ncoeff, &x_dummy, &y_dummy_low, &y_dummy_high, &coeff_low, &coeff_high);
  idwt_coefficients(ncoeff, h, &coeff_low, &coeff_high);

  if (ncols==1) {
    ncols = nrows;
    nrows = 1;
  }
  
  ncoeff_minus_one = ncoeff - 1;
  ncoeff_halved_minus_one = ncoeff/2 - 1;
  /* 2^levels */
  sample_f = 1;
  for (i=1; i<levels; i++)
    sample_f = sample_f*2;
  
  if (nrows>1)
    current_rows = nrows/sample_f;
  else 
    current_rows = 1;
  current_cols = ncols/sample_f;

  for (i=0; i<(nrows*ncols); i++)
    x[i] = y[i];
  
  /* main loop */
  for (current_level=levels; current_level >= 1; current_level--) {
    row_cursor = current_rows/2;
    column_cursor = current_cols/2;
    
    /* go by columns in case of a 2D signal*/
    if (nrows>1) {
      for (idx_cols=0; idx_cols<current_cols; idx_cols++) {         /* loop over columns */
	/* store in dummy variables */
	idx_rows = row_cursor;
	for (i=0; i<row_cursor; i++){    
	  y_dummy_low[i+ncoeff_halved_minus_one]  = mat(x, i,          idx_cols, nrows, ncols);  
	  y_dummy_high[i+ncoeff_halved_minus_one] = mat(x, idx_rows++, idx_cols, nrows, ncols);  
	}
	/* perform filtering lowpass and highpass*/
	idwt_convolution(x_dummy, row_cursor, coeff_low, coeff_high, ncoeff_minus_one, ncoeff_halved_minus_one, y_dummy_low, y_dummy_high); 
	/* restore dummy variables in matrix */
	for (i=0; i<current_rows; i++)
	  mat(x, i, idx_cols, nrows, ncols) = x_dummy[i];  
      }
    }
    /* go by rows */
    for (idx_rows=0; idx_rows<current_rows; idx_rows++) {           /* loop over rows */
      /* store in dummy variable */
      idx_cols = column_cursor;
      for  (i=0; i<column_cursor; i++){    
	y_dummy_low[i+ncoeff_halved_minus_one]  = mat(x, idx_rows, i,          nrows, ncols);  
	y_dummy_high[i+ncoeff_halved_minus_one] = mat(x, idx_rows, idx_cols++, nrows, ncols);  
      } 
      /* perform filtering lowpass and highpass*/
      idwt_convolution(x_dummy, column_cursor, coeff_low, coeff_high, ncoeff_minus_one, ncoeff_halved_minus_one, y_dummy_low, y_dummy_high); 
      /* restore dummy variables in matrices */
      for (i=0; i<current_cols; i++)
        mat(x, idx_rows, i, nrows, ncols) = x_dummy[i];  
    }  
    if (nrows==1)
      current_rows = 1;
    else
      current_rows = current_rows*2;
    current_cols = current_cols*2;
  }
  idwt_free(&x_dummy, &y_dummy_low, &y_dummy_high, &coeff_low, &coeff_high);
}


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/src/init.c
================================================
/*! \file init.c
    \brief Parse input from MATLAB and do some sanity checking

*/

#include "rwt_init.h"
#include <math.h>

#if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE)
/*!
 * Checks for correct # of input variables based on type of transform.
 *
 * @param nrhs number of items on right hand side of matlab call
 * @param transform_type 
 *
 */
int rwt_check_parameter_count(int nrhs, transform_t transform_type) {
  if (transform_type == INVERSE_REDUNDANT_DWT) {
    if (nrhs > 4) {
      rwt_errormsg("There are at most 4 input parameters allowed!");
      return 1;
    }
    if (nrhs < 3) {
      rwt_errormsg("There are at least 3 input parameters required!");
      return 1;
    }
  }
  else {
    if (nrhs > 3) {
      rwt_errormsg("There are at most 3 input parameters allowed!");
      return 1;
    }
    if (nrhs < 2) {
      rwt_errormsg("There are at least 2 input parameters required!");
      return 1;
    }
  }
  return 0;
}


/*!
 * For the inverse redundant transform check that the dimensions of the low and high inputs match
 *
 * @param prhs
 * @param params
 *
 */
int rwt_check_yl_matches_yh(const mxArray *prhs[], size_t nrows, size_t ncols, int levels) {
  size_t mh = mxGetM(prhs[1]);
  size_t nh = mxGetN(prhs[1]);
  if (min(nrows, ncols) > 1) {
    if ((nrows != mh) | (3 * ncols * levels != nh)) {
      return 0;
    }
  }
  else {
    if ((nrows != mh) | (ncols * levels != nh)) {
      return 0;
    }
  }
  return 1;
}
#endif


/*!
 * Find L, the number of levels
 *
 * @param m the number of rows in the input
 * @param n the number of columns in the input
 *
 * L is the exponent of the largest power of 2 that is a factor of all input dimensions
 * 
 */
int rwt_find_levels(size_t m, size_t n) {
  size_t i, j, L;
  i = n ; j = 0;
  while (even(i)) {
    i = (i >> 1);
    j++;
  }
  L = m; i = 0;
  while (even(L)) {
    L = (L >> 1);
    i++;
  }
  if (min(m, n) == 1)
    L = max(i, j);
  else
    L = min(i, j);
  if (L == 0) {
    rwt_errormsg("Maximum number of levels is zero; no decomposition can be performed!");
    return -1;
  }
  else return L;
}


/*!
 * Check that length is divisble by 2^L
 *
 * @param length the number of rows or number of columns
 * @param L the number of levels
 *
 */
int rwt_check_dimensions(size_t length, int L) {
  double test = (double) length / pow(2.0, (double) L);
  if ((test - floor(test)) > 0.0) {
    return -1;
  }
  return 0;
}


/*!
 * Sanity check the levels parameter
 *
 * @param levels the number of levels specified or calculated for the input
 * @param rows the number of rows of input
 * @param cols the number of columns of input
 *
 */
int rwt_check_levels(int levels, size_t rows, size_t cols) {
  if (levels < 1) {
    rwt_errormsg("The number of levels, L, must be a positive integer");
    return -1;
  }

  /*! Check that both the rows and columns are divisible by 2^L */
  if ((rows > 1 && rwt_check_dimensions(rows, levels)) || (cols > 1 && rwt_check_dimensions(cols, levels))) {
    rwt_errormsg("All dimensions must be divisible by 2^L");
    return -1;
  }

  return 0;
}


#if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE)
/*!
 * Parse input from MATLAB and do some sanity checking
 *
 * @param nlhs number of items on left hand side of matlab call
 * @param plhs pointer to left hand side data structure
 * @param nrhs number of items on right hand side of matlab call
 * @param prhs pointer to right hand side data structure
 * @param transform_type which transform are we setting up to do
 *
 */
rwt_init_params rwt_matlab_init(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[], transform_t transform_type) {
  rwt_init_params params;
  int argNumL;

  /*! Check for correct # of input parameters */
  if (rwt_check_parameter_count(nrhs, transform_type) != 0) return params;
  /*! Check that we don't have more than two dimensions in the input since that is currently unsupported. */
  if (mxGetNumberOfDimensions(prhs[0]) > 2) {
    rwt_errormsg("Matrix must have fewer than 3 dimensions!");
    return params;
  }
  /*! Get the number of rows and columns in the input matrix. */
  params.nrows = mxGetM(prhs[0]);
  params.ncols = mxGetN(prhs[0]);

  if (params.nrows == 0 && params.ncols == 0) {
    rwt_errormsg("The input matrix cannot be empty");
    return params;
  }

  /*! Read the number of levels, L, from the input values if it was given, otherwise calculate L. Sanity check L */
  argNumL = (transform_type == INVERSE_REDUNDANT_DWT) ? 3 : 2;
  if ((argNumL + 1) == nrhs)
    params.levels = (int) *mxGetPr(prhs[argNumL]);
  else
    params.levels = rwt_find_levels(params.nrows, params.ncols);

  if (rwt_check_levels(params.levels, params.nrows, params.ncols)) {
    return params;
  }

  /*! Read the scaling coefficients, h, from the input and find their length, ncoeff. 
   *  In the case of the redundant transform, the scalings are found one further position to the right, 
   *  and also we check for matching dimensions in the low and high inputs
   */
  if (transform_type == INVERSE_REDUNDANT_DWT) {
    params.scalings = mxGetPr(prhs[2]);
    params.ncoeff = max(mxGetM(prhs[2]), mxGetN(prhs[2]));
    if (!rwt_check_yl_matches_yh(prhs, params.nrows, params.ncols, params.levels)) {
      rwt_errormsg("Dimensions of first two input matrices not consistent!");
      return params;
    }
  }
  else {
    params.scalings = mxGetPr(prhs[1]);
    params.ncoeff = max(mxGetM(prhs[1]), mxGetN(prhs[1]));
  }
  /*! Create the first item in the output array as a double matrix with the same dimensions as the input. */
  plhs[0] = mxCreateDoubleMatrix(params.nrows, params.ncols, mxREAL);
  return params;
}
#endif


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/src/irdwt.c
================================================
/*! \file irdwt.c
    \brief Implementation of the inverse redundant discrete wavelet transform

*/

#include "rwt_platform.h"

void irdwt_convolution(double *x_out, size_t lx, double *coeff_low, double *coeff_high, int ncoeff, double *x_in_low, double *x_in_high) {
  int k;
  size_t i, j;
  double x0;

  for (k=ncoeff-2; k > -1; k--) {
    x_in_low[k] = x_in_low[lx+k];
    x_in_high[k] = x_in_high[lx+k];
  }
  for (i=0; i<lx; i++){
    x0 = 0;
    for (j=0; j<ncoeff; j++)
      x0 = x0 + (x_in_low[j+i] * coeff_low[ncoeff-1-j]) + (x_in_high[j+i] * coeff_high[ncoeff-1-j]);
	
    x_out[i] = x0;
  }
}


void irdwt_allocate(size_t m, size_t n, int ncoeff, double **x_high, double **x_dummy_low, double **x_dummy_high, double **y_dummy_low_low, 
  double **y_dummy_low_high, double **y_dummy_high_low, double **y_dummy_high_high, double **coeff_low, double **coeff_high) {
  *x_high            = (double *) rwt_calloc(m*n,               sizeof(double));
  *x_dummy_low       = (double *) rwt_calloc(max(m,n),          sizeof(double));
  *x_dummy_high      = (double *) rwt_calloc(max(m,n),          sizeof(double));
  *y_dummy_low_low   = (double *) rwt_calloc(max(m,n)+ncoeff-1, sizeof(double));
  *y_dummy_low_high  = (double *) rwt_calloc(max(m,n)+ncoeff-1, sizeof(double));
  *y_dummy_high_low  = (double *) rwt_calloc(max(m,n)+ncoeff-1, sizeof(double));
  *y_dummy_high_high = (double *) rwt_calloc(max(m,n)+ncoeff-1, sizeof(double));
  *coeff_low         = (double *) rwt_calloc(ncoeff,            sizeof(double));
  *coeff_high        = (double *) rwt_calloc(ncoeff,            sizeof(double));
}


void irdwt_free(double **x_high, double **x_dummy_low, double **x_dummy_high, double **y_dummy_low_low, double **y_dummy_low_high, 
  double **y_dummy_high_low, double **y_dummy_high_high, double **coeff_low, double **coeff_high) {
  rwt_free(*x_high);
  rwt_free(*x_dummy_low);
  rwt_free(*x_dummy_high);
  rwt_free(*y_dummy_low_low);
  rwt_free(*y_dummy_low_high);
  rwt_free(*y_dummy_high_low);
  rwt_free(*y_dummy_high_high);
  rwt_free(*coeff_low);
  rwt_free(*coeff_high);
}

/* not the same as idwt_coefficients */
void irdwt_coefficients(int ncoeff, double *h, double **coeff_low, double **coeff_high) {
  int i;
  for (i=0; i<ncoeff; i++) {
    (*coeff_low)[i] = h[i]/2;
    (*coeff_high)[i] = h[ncoeff-i-1]/2;
  }
  for (i=1; i<=ncoeff; i+=2)
    (*coeff_high)[i] = -((*coeff_high)[i]);
}


void irdwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *y_low, double *y_high) {
  double  *coeff_low, *coeff_high, *y_dummy_low_low, *y_dummy_low_high, *y_dummy_high_low;
  double *y_dummy_high_high, *x_dummy_low , *x_dummy_high, *x_high;
  long i;
  int current_level, three_n_L, ncoeff_minus_one, sample_f;
  size_t current_rows, current_cols, column_cursor, column_blocks_per_row;
  size_t idx_rows, idx_cols, n_r, n_c;
  size_t row_blocks_per_column, column_cursor_plus_n, column_cursor_plus_double_n;

  irdwt_allocate(nrows, ncols, ncoeff, &x_high, &x_dummy_low, &x_dummy_high, &y_dummy_low_low, 
    &y_dummy_low_high, &y_dummy_high_low, &y_dummy_high_high, &coeff_low, &coeff_high);
  irdwt_coefficients(ncoeff, h, &coeff_low, &coeff_high);
 
  if (ncols==1) {
    ncols = nrows;
    nrows = 1;
  }
  /* analysis lowpass and highpass */
  
  three_n_L = 3*ncols*levels;
  ncoeff_minus_one = ncoeff - 1;
  /*! we calculate sample_f = 2^(levels - 1) with a loop since that is actually the recommended method for whole numbers */
  sample_f = 1;
  for (i=1; i<levels; i++)
    sample_f = sample_f*2;

  current_rows = nrows/sample_f;
  current_cols = ncols/sample_f;
  /* restore y_low in x */
  for (i=0; i<nrows*ncols; i++)
    x[i] = y_low[i];
  
  /* main loop */
  for (current_level=levels; current_level >= 1; current_level--) {
    /* actual (level dependent) column offset */
    if (nrows==1)
      column_cursor = ncols*(current_level-1);
    else
      column_cursor = 3*ncols*(current_level-1);
    column_cursor_plus_n = column_cursor + ncols;
    column_cursor_plus_double_n = column_cursor_plus_n + ncols;
    
    /* go by columns in case of a 2D signal*/
    if (nrows>1) {
      row_blocks_per_column = nrows/current_rows;   /* # of row blocks per column */
      for (idx_cols=0; idx_cols<ncols; idx_cols++) {          /* loop over column */
	for (n_r=0; n_r<row_blocks_per_column; n_r++) { /* loop within one column */
	  /* store in dummy variables */
	  idx_rows = -sample_f + n_r;
	  for (i=0; i<current_rows; i++) {    
	    idx_rows = idx_rows + sample_f;
	    y_dummy_low_low[i+ncoeff_minus_one]   = mat(x,      idx_rows, idx_cols,                               nrows, ncols);
	    y_dummy_low_high[i+ncoeff_minus_one]  = mat(y_high, idx_rows, idx_cols + column_cursor,               nrows, three_n_L);
	    y_dummy_high_low[i+ncoeff_minus_one]  = mat(y_high, idx_rows, idx_cols + column_cursor_plus_n,        nrows, three_n_L);
	    y_dummy_high_high[i+ncoeff_minus_one] = mat(y_high, idx_rows, idx_cols + column_cursor_plus_double_n, nrows, three_n_L);
	  }
	  /* perform filtering and adding: first LL/LH, then HL/HH */
	  irdwt_convolution(x_dummy_low,  current_rows, coeff_low, coeff_high, ncoeff, y_dummy_low_low,  y_dummy_low_high); 
	  irdwt_convolution(x_dummy_high, current_rows, coeff_low, coeff_high, ncoeff, y_dummy_high_low, y_dummy_high_high); 
	  /* store dummy variables in matrices */
	  idx_rows = -sample_f + n_r;
	  for (i=0; i<current_rows; i++) {
	    idx_rows = idx_rows + sample_f;
	    mat(x,      idx_rows, idx_cols, nrows, ncols) = x_dummy_low[i];
	    mat(x_high, idx_rows, idx_cols, nrows, ncols) = x_dummy_high[i];
	  }
	}
      }
    }
    
    /* go by rows */
    column_blocks_per_row = ncols/current_cols; /* # of column blocks per row */
    for (idx_rows=0; idx_rows<nrows; idx_rows++) {          /* loop over rows */
      for (n_c=0; n_c<column_blocks_per_row; n_c++) {  /* loop within one row */      
	/* store in dummy variable */
	idx_cols = -sample_f + n_c;
	for  (i=0; i<current_cols; i++) {    
	  idx_cols = idx_cols + sample_f;
	  y_dummy_low_low[i+ncoeff_minus_one] = mat(x, idx_rows, idx_cols, nrows, ncols);  
	  if (nrows>1)
	    y_dummy_high_high[i+ncoeff_minus_one] = mat(x_high, idx_rows, idx_cols,                 nrows, ncols);
	  else
            y_dummy_high_high[i+ncoeff_minus_one] = mat(y_high, idx_rows, idx_cols + column_cursor, nrows, three_n_L);
	} 
	/* perform filtering lowpass/highpass */
	irdwt_convolution(x_dummy_low, current_cols, coeff_low, coeff_high, ncoeff, y_dummy_low_low, y_dummy_high_high); 
	/* restore dummy variables in matrices */
	idx_cols = -sample_f + n_c;
	for (i=0; i<current_cols; i++) {    
	  idx_cols = idx_cols + sample_f;
	  mat(x, idx_rows, idx_cols, nrows, ncols) = x_dummy_low[i];  
	}
      }
    }
    sample_f = sample_f/2;
    current_rows = current_rows*2;
    current_cols = current_cols*2;
  }
  irdwt_free(&x_high, &x_dummy_low, &x_dummy_high, &y_dummy_low_low, &y_dummy_low_high, &y_dummy_high_low, &y_dummy_high_high, &coeff_low, &coeff_high);
}


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/src/platform.c
================================================
/*! \file platform.c
    \brief Wrap memory allocation routines so that we can use the MATLAB ones when we build for MATLAB.

    rwt_malloc, rwt_calloc, and rwt_free simply wrap the MATLAB or system versions of malloc, calloc, and free.
*/

#include "rwt_platform.h"

#if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE)
  void *rwt_malloc(size_t size) {
    return mxMalloc(size);
  }
  void *rwt_calloc(size_t num, size_t size) {
    return mxCalloc(num, size);
  }
  void rwt_free(void *ptr) {
    mxFree(ptr);
  }
#else
  void *rwt_malloc(size_t size) {
    return malloc(size);
  }
  void *rwt_calloc(size_t num, size_t size) {
    return calloc(num, size);
  }
  void rwt_free(void *ptr) {
    free(ptr);
  }
#endif


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/lib/src/rdwt.c
================================================
/*! \file rdwt.c
    \brief Implementation of the redundant discrete wavelet transform

*/

#include "rwt_platform.h"

/*!
 * Perform convolution for rdwt
 *
 * @param x_in input signal values
 * @param lx the length of x
 * @param coeff_low the low pass coefficients
 * @param coeff_high the high pass coefficients
 * @param ncoeff the number of scaling coefficients
 * @param x_out_low low pass results
 * @param x_out_high high pass results
 * 
 */
void rdwt_convolution(double *x_in, size_t lx, double *coeff_low, double *coeff_high, int ncoeff, double *x_out_low, double *x_out_high) {
  size_t i, j;
  double x0, x1;

  for (i=lx; i < lx+ncoeff-1; i++)
    x_in[i] = x_in[i-lx];
  for (i=0; i<lx; i++) {
    x0 = 0;
    x1 = 0;
    for (j=0; j<ncoeff; j++) {
      x0 = x0 + x_in[j+i] * coeff_low[ncoeff-1-j];
      x1 = x1 + x_in[j+i] * coeff_high[ncoeff-1-j];
    }
    x_out_low[i] = x0;
    x_out_high[i] = x1;
  }
}


/*!
 * Allocate memory for rdwt
 *
 * @param m the number of rows of the input matrix
 * @param n the number of columns of the input matrix
 * @param ncoeff the number of scaling coefficients
 * @param x_dummy_low
 * @param x_dummy_high
 * @param y_dummy_low_low
 * @param y_dummy_low_high
 * @param y_dummy_high_low
 * @param y_dummy_high_high
 * @param coeff_low
 * @param coeff_high
 *
 */
void rdwt_allocate(size_t m, size_t n, int ncoeff, double **x_dummy_low, double **x_dummy_high, double **y_dummy_low_low, 
  double **y_dummy_low_high, double **y_dummy_high_low, double **y_dummy_high_high, double **coeff_low, double **coeff_high) {
  *x_dummy_low       = (double *) rwt_calloc(max(m,n)+ncoeff-1, sizeof(double));
  *x_dummy_high      = (double *) rwt_calloc(max(m,n)+ncoeff-1, sizeof(double));
  *y_dummy_low_low   = (double *) rwt_calloc(max(m,n),          sizeof(double));
  *y_dummy_low_high  = (double *) rwt_calloc(max(m,n),          sizeof(double));
  *y_dummy_high_low  = (double *) rwt_calloc(max(m,n),          sizeof(double));
  *y_dummy_high_high = (double *) rwt_calloc(max(m,n),          sizeof(double));
  *coeff_low         = (double *) rwt_calloc(ncoeff,            sizeof(double));
  *coeff_high        = (double *) rwt_calloc(ncoeff,            sizeof(double));
}


/*!
 * Free memory that we allocated for dwt
 *
 * @param x_dummy_low
 * @param x_dummy_high
 * @param y_dummy_low_low
 * @param y_dummy_low_high
 * @param y_dummy_high_low
 * @param y_dummy_high_high
 * @param coeff_low
 * @param coeff_high
 *
 */
void rdwt_free(double **x_dummy_low, double **x_dummy_high, double **y_dummy_low_low, double **y_dummy_low_high, 
  double **y_dummy_high_low, double **y_dummy_high_high, double **coeff_low, double **coeff_high) {
  rwt_free(*x_dummy_low);
  rwt_free(*x_dummy_high);
  rwt_free(*y_dummy_low_low);
  rwt_free(*y_dummy_low_high);
  rwt_free(*y_dummy_high_low);
  rwt_free(*y_dummy_high_high);
  rwt_free(*coeff_low);
  rwt_free(*coeff_high);
}


/*!
 * Put the scaling coeffients into a form ready for use in the convolution function
 *
 * @param ncoeff length of h / the number of scaling coefficients
 * @param h  the wavelet scaling coefficients
 * @param coeff_low the high pass coefficients - reversed h
 * @param coeff_high the high pass coefficients - forward h, even values are sign flipped
 *
 * The coefficients of our Quadrature Mirror Filter are described by
 * \f$ g\left[lh - 1 - n \right] = (-1)^n * h\left[n\right] \f$
 *
 * This is identical to dwt_coefficients() 
 *
 */
void rdwt_coefficients(int ncoeff, double *h, double **coeff_low, double **coeff_high) {
  int i;
  for (i=0; i<ncoeff; i++) {
    (*coeff_low)[i] = h[(ncoeff-i)-1];
    (*coeff_high)[i] = h[i];
  }
  for (i=0; i<ncoeff; i+=2)
    (*coeff_high)[i] = -((*coeff_high)[i]);
}


/*!
 * Perform the redundant discrete wavelet transform
 *
 * @param x      the input signal
 * @param nrows  number of rows in the input
 * @param ncols  number of columns in the input
 * @param h      wavelet scaling coefficients
 * @param ncoeff length of h / the number of scaling coefficients
 * @param levels the number of levels
 * @param yl
 * @param yh
 *
 */
void rdwt(double *x, size_t nrows, size_t ncols, double *h, int ncoeff, int levels, double *yl, double *yh) {
  double *coeff_low, *coeff_high, *y_dummy_low_low, *y_dummy_low_high, *y_dummy_high_low;
  double *y_dummy_high_high, *x_dummy_low, *x_dummy_high;
  long i;
  int current_level, three_n_L, sample_f;
  size_t current_rows, current_cols, idx_rows, idx_cols, n_c, n_cb, n_r, n_rb;
  size_t column_cursor, column_cursor_plus_n, column_cursor_plus_double_n;

  rdwt_allocate(nrows, ncols, ncoeff, &x_dummy_low, &x_dummy_high, &y_dummy_low_low, &y_dummy_low_high, 
    &y_dummy_high_low, &y_dummy_high_high, &coeff_low, &coeff_high);

  rdwt_coefficients(ncoeff, h, &coeff_low, &coeff_high);

  if (ncols==1) {
    ncols = nrows;
    nrows = 1;
  }  

  /* analysis lowpass and highpass */
 
  three_n_L = 3*ncols*levels;
  current_rows = 2*nrows;
  current_cols = 2*ncols;
  for (i=0; i<nrows*ncols; i++)
    yl[i] = x[i];
  
  /* main loop */
  sample_f = 1;
  for (current_level=1; current_level <= levels; current_level++) {
    current_rows = current_rows/2;
    current_cols = current_cols/2;
    /* actual (level dependent) column offset */
    if (nrows==1)
      column_cursor = ncols*(current_level-1);
    else
      column_cursor = 3*ncols*(current_level-1);
    column_cursor_plus_n = column_cursor + ncols;
    column_cursor_plus_double_n = column_cursor_plus_n + ncols;
    
    /* go by rows */
    n_cb = ncols/current_cols;         /* # of column blocks per row */
    for (idx_rows=0; idx_rows<nrows; idx_rows++) { /* loop over rows */
      for (n_c=0; n_c<n_cb; n_c++) {          /* loop within one row */      
	/* store in dummy variable */
	idx_cols = -sample_f + n_c;
	for (i=0; i<current_cols; i++) {
	  idx_cols = idx_cols + sample_f;
	  x_dummy_low[i] = mat(yl, idx_rows, idx_cols, nrows, ncols);  
	}
	/* perform filtering lowpass/highpass */
	rdwt_convolution(x_dummy_low, current_cols, coeff_low, coeff_high, ncoeff, y_dummy_low_low, y_dummy_high_high); 
	/* restore dummy variables in matridx_colses */
	idx_cols = -sample_f + n_c;
	for  (i=0; i<current_cols; i++) {
          idx_cols = idx_cols + sample_f;
          mat(yl, idx_rows, idx_cols,                 nrows, ncols)     = y_dummy_low_low[i];
          mat(yh, idx_rows, idx_cols + column_cursor, nrows, three_n_L) = y_dummy_high_high[i];  
	} 
      }
    }
      
    /* go by columns in case of a 2D signal*/
    if (nrows>1) {
      n_rb = nrows/current_rows;           /* # of row blocks per column */
      for (idx_cols=0; idx_cols<ncols; idx_cols++) { /* loop over column */
	for (n_r=0; n_r<n_rb; n_r++) {         /* loop within one column */
	  /* store in dummy variables */
	  idx_rows = -sample_f + n_r;
	  for (i=0; i<current_rows; i++) {    
	    idx_rows = idx_rows + sample_f;
	    x_dummy_low[i]  = mat(yl, idx_rows, idx_cols,                 nrows, ncols);
	    x_dummy_high[i] = mat(yh, idx_rows, idx_cols + column_cursor, nrows, three_n_L);
	  }
	  /* perform filtering: first LL/LH, then HL/HH */
	  rdwt_convolution(x_dummy_low,  current_rows, coeff_low, coeff_high, ncoeff, y_dummy_low_low,  y_dummy_low_high);
	  rdwt_convolution(x_dummy_high, current_rows, coeff_low, coeff_high, ncoeff, y_dummy_high_low, y_dummy_high_high);
	  /* restore dummy variables in matrices */
	  idx_rows = -sample_f + n_r;
	  for (i=0; i<current_rows; i++) {
	    idx_rows = idx_rows + sample_f;
	    mat(yl, idx_rows, idx_cols,                               nrows, ncols)     = y_dummy_low_low[i];
	    mat(yh, idx_rows, idx_cols + column_cursor,               nrows, three_n_L) = y_dummy_low_high[i];
	    mat(yh, idx_rows, idx_cols + column_cursor_plus_n,        nrows, three_n_L) = y_dummy_high_low[i];
	    mat(yh, idx_rows, idx_cols + column_cursor_plus_double_n, nrows, three_n_L) = y_dummy_high_high[i];
	  }
	}
      }
    }
    sample_f = sample_f*2;
  }
  rdwt_free(&x_dummy_low, &x_dummy_high, &y_dummy_low_low, &y_dummy_low_high, &y_dummy_high_low, &y_dummy_high_high, &coeff_low, &coeff_high);
}


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/mex/mdwt.c
================================================
/*! \file mdwt.c
    \brief MATLAB gateway for the discrete wavelet transform

    This file is used to produce a MATLAB MEX binary for the discrete wavelet transform

%y = mdwt(x,h,L);
% 
% function computes the discrete wavelet transform y for a 1D or 2D input
% signal x.
%
%    Input:
%	x    : finite length 1D or 2D signal (implicitely periodized)
%       h    : scaling filter
%       L    : number of levels. in case of a 1D signal length(x) must be
%              divisible by 2^L; in case of a 2D signal the row and the
%              column dimension must be divisible by 2^L.
%
% see also: midwt, mrdwt, mirdwt
*/

#include "mex.h"
#include "rwt_init.h"
#include "rwt_transforms.h"

/*!
 * Matlab MEX definition for the discrete wavelet transform.
 *
 * @param nlhs number of items on left hand side of matlab call
 * @param plhs pointer to left hand side data structure
 * @param nrhs number of items on right hand side of matlab call
 * @param prhs pointer to right hand side data structure
 *
 */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
  rwt_init_params params = rwt_matlab_init(nlhs, plhs, nrhs, prhs, NORMAL_DWT);     /*! Check input and determine the parameters for dwt() */
  plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);                                                               /*! Create the output matrix */
  *mxGetPr(plhs[1]) = params.levels;                                                  /*! The second returned item is the number of levels */
  dwt(mxGetPr(prhs[0]), params.nrows, params.ncols, params.scalings, params.ncoeff, params.levels, mxGetPr(plhs[0]));  /*! Perform the DWT */
}


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/mex/midwt.c
================================================
/*! \file midwt.c
    \brief MATLAB gateway for the inverse discrete wavelet transform

    This file is used to produce a MATLAB MEX binary for the inverse discrete wavelet transform

%y = midwt(x,h,L);
% 
% function computes the inverse discrete wavelet transform y for a 1D or 2D
% input signal x.
%
%    Input:
%	x    : finite length 1D or 2D input signal (implicitely periodized)
%       h    : scaling filter
%       L    : number of levels. in case of a 1D signal length(x) must be
%              divisible by 2^L; in case of a 2D signal the row and the
%              column dimension must be divisible by 2^L.
%
% see also: mdwt, mrdwt, mirdwt
*/

#include "mex.h"
#include "rwt_init.h"
#include "rwt_transforms.h"

/*!
 * Matlab MEX definition for the inverse discrete wavelet transform.
 *
 * @param nlhs number of items on left hand side of matlab call
 * @param plhs pointer to left hand side data structure
 * @param nrhs number of items on right hand side of matlab call
 * @param prhs pointer to right hand side data structure
 *
 */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
  double *x, *y;
  rwt_init_params params = rwt_matlab_init(nlhs, plhs, nrhs, prhs, INVERSE_DWT);
  y = mxGetPr(prhs[0]);
  x = mxGetPr(plhs[0]);
  plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);
  *mxGetPr(plhs[1]) = params.levels;
  idwt(x, params.nrows, params.ncols, params.scalings, params.ncoeff, params.levels, y);
}


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/mex/mirdwt.c
================================================
/*! \file mirdwt.c
    \brief MATLAB gateway for the inverse redundant discrete wavelet transform

    This file is used to produce a MATLAB MEX binary for the inverse redundant discrete wavelet transform

%function x = mirdwt(y_low,y_high,h,L);
% 
% function computes the inverse redundant discrete wavelet transform y for a
% 1D or  2D input signal. redundant means here that the subsampling after
% each stage of the forward transform has been omitted. y_low contains the
% lowpass and y_low the highpass components as computed, e.g., by mrdwt. In
% case of a 2D signal the ordering in y_high is [ncoeff hl hh ncoeff hl ... ] (first
% letter refers to row, second to column filtering).  
%
%    Input:
%       y_low   : lowpass component
%       y_high   : highpass components
%       h    : scaling filter
%       L    : number of levels. in case of a 1D signal length(y_low) must be
%              divisible by 2^L; in case of a 2D signal the row and the
%              column dimension must be divisible by 2^L.
%   
%    Output:
%	x    : finite length 1D or 2D signal
%
% see also: mdwt, midwt, mrdwt
*/

#include "mex.h"
#include "rwt_init.h"
#include "rwt_transforms.h"

/*!
 * Matlab MEX definition for the redundant discrete wavelet transform.
 *
 * @param nlhs number of items on left hand side of matlab call
 * @param plhs pointer to left hand side data structure
 * @param nrhs number of items on right hand side of matlab call
 * @param prhs pointer to right hand side data structure
 *
 */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
  double *x, *yl, *yh;
  rwt_init_params params = rwt_matlab_init(nlhs, plhs, nrhs, prhs, INVERSE_REDUNDANT_DWT);
  yl = mxGetPr(prhs[0]);
  yh = mxGetPr(prhs[1]);
  x = mxGetPr(plhs[0]);
  plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);
  *mxGetPr(plhs[1]) = params.levels;
  irdwt(x, params.nrows, params.ncols, params.scalings, params.ncoeff, params.levels, yl, yh);
}


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/mex/mrdwt.c
================================================
/*! \file mrdwt.c
    \brief MATLAB gateway for the redundant discrete wavelet transform

    This file is used to produce a MATLAB MEX binary for the redundant discrete wavelet transform

%[yl,yh] = mrdwt(x,h,L);
% 
% function computes the redundant discrete wavelet transform y for a 1D or
% 2D input signal . redundant means here that the subsampling after each
% stage is omitted. yl contains the lowpass and yl the highpass
% components. In case of a 2D signal the ordering in yh is [ncoeff hl hh ncoeff hl
% ... ] (first letter refers to row, second to column filtering). 
%
%    Input:
%	x    : finite length 1D or 2D signal (implicitely periodized)
%       h    : scaling filter
%       L    : number of levels. in case of a 1D signal length(x) must be
%              divisible by 2^L; in case of a 2D signal the row and the
%              column dimension must be divisible by 2^L.
%   
%    Output:
%       yl   : lowpass component
%       yh   : highpass components
%
% see also: mdwt, midwt, mirdwt
*/

#include "mex.h"
#include "rwt_init.h"
#include "rwt_transforms.h"

/*!
 * Matlab MEX definition for the redundant discrete wavelet transform.
 *
 * @param nlhs number of items on left hand side of matlab call
 * @param plhs pointer to left hand side data structure
 * @param nrhs number of items on right hand side of matlab call
 * @param prhs pointer to right hand side data structure
 *
 */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
  double *x, *yl, *yh;
  rwt_init_params params = rwt_matlab_init(nlhs, plhs, nrhs, prhs, REDUNDANT_DWT);
  if (min(params.nrows, params.ncols) == 1)
    plhs[1] = mxCreateDoubleMatrix(params.nrows, params.levels*params.ncols, mxREAL);
  else
    plhs[1] = mxCreateDoubleMatrix(params.nrows, 3*params.levels*params.ncols, mxREAL);
  x = mxGetPr(prhs[0]);
  yl = mxGetPr(plhs[0]);
  yh = mxGetPr(plhs[1]);
  plhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
  *mxGetPr(plhs[2]) = params.levels;
  rdwt(x, params.nrows, params.ncols, params.scalings, params.ncoeff, params.levels, yl, yh);
}


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/python/CMakeLists.txt
================================================
cmake_minimum_required (VERSION 2.6)
FIND_PACKAGE(SWIG REQUIRED)
INCLUDE(${SWIG_USE_FILE})

FIND_PACKAGE(PythonLibs)
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_PATH})

FIND_PROGRAM(PYTHON_EXECUTABLE NAMES python python3.4 python3.3 python3.2 python3.1 python3.0 python3 python2.7)
message("python: " ${PYTHON_EXECUTABLE})

EXEC_PROGRAM ("${PYTHON_EXECUTABLE}"
  ARGS "-c 'import numpy; print(numpy.get_include())'"
  OUTPUT_VARIABLE NUMPY_INCLUDE_DIR
  RETURN_VALUE NUMPY_NOT_FOUND)
message("numpy : " ${NUMPY_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${NUMPY_INCLUDE_DIR})

INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../lib/inc)

SET(CMAKE_SWIG_FLAGS "")

SET_SOURCE_FILES_PROPERTIES(rwt.i PROPERTIES CPLUSPLUS ON)
#SET_SOURCE_FILES_PROPERTIES(rwt.i PROPERTIES SWIG_FLAGS "-includeall")
SWIG_ADD_MODULE(rwt python rwt.i ../lib/src/dwt.c ../lib/src/idwt.c ../lib/src/rdwt.c ../lib/src/irdwt.c ../lib/src/platform.c ../lib/src/init.c)
SWIG_LINK_LIBRARIES(rwt ${PYTHON_LIBRARIES})

execute_process(COMMAND python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())" OUTPUT_VARIABLE PYTHON_SITE_PACKAGES OUTPUT_STRIP_TRAILING_WHITESPACE)
install(TARGETS _rwt DESTINATION ${PYTHON_SITE_PACKAGES})
install(FILES rwt.py DESTINATION ${PYTHON_SITE_PACKAGES})


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/python/LICENSE.numpy
================================================
Copyright (c) 2005-2011, NumPy Developers.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    * Redistributions of source code must retain the above copyright
       notice, this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above
       copyright notice, this list of conditions and the following
       disclaimer in the documentation and/or other materials provided
       with the distribution.

    * Neither the name of the NumPy Developers nor the names of any
       contributors may be used to endorse or promote products derived
       from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/python/numpy.i
================================================
/* -*- C -*-  (not really, but good for syntax highlighting) */
#ifdef SWIGPYTHON

%{
#ifndef SWIG_FILE_WITH_INIT
#  define NO_IMPORT_ARRAY
#endif
#include "stdio.h"
#include <numpy/arrayobject.h>
%}

/**********************************************************************/

/* The following code originally appeared in
 * enthought/kiva/agg/src/numeric.i written by Eric Jones.  It was
 * translated from C++ to C by John Hunter.  Bill Spotz has modified
 * it to fix some minor bugs, upgrade from Numeric to numpy (all
 * versions), add some comments and functionality, and convert from
 * direct code insertion to SWIG fragments.
 */

%fragment("NumPy_Macros", "header")
{
/* Macros to extract array attributes.
 */
%#define is_array(a)            ((a) && PyArray_Check((PyArrayObject *)a))
%#define array_type(a)          (int)(PyArray_TYPE(a))
%#define array_numdims(a)       (((PyArrayObject *)a)->nd)
%#define array_dimensions(a)    (((PyArrayObject *)a)->dimensions)
%#define array_size(a,i)        (((PyArrayObject *)a)->dimensions[i])
%#define array_data(a)          (((PyArrayObject *)a)->data)
%#define array_is_contiguous(a) (PyArray_ISCONTIGUOUS(a))
%#define array_is_native(a)     (PyArray_ISNOTSWAPPED(a))
%#define array_is_fortran(a)    (PyArray_ISFORTRAN(a))
}

/**********************************************************************/

%fragment("NumPy_Utilities", "header")
{
  /* Given a PyObject, return a string describing its type.
   */
  const char* pytype_string(PyObject* py_obj) {
    if (py_obj == NULL          ) return "C NULL value";
    if (py_obj == Py_None       ) return "Python None" ;
    if (PyCallable_Check(py_obj)) return "callable"    ;
    if (PyString_Check(  py_obj)) return "string"      ;
    if (PyInt_Check(     py_obj)) return "int"         ;
    if (PyFloat_Check(   py_obj)) return "float"       ;
    if (PyDict_Check(    py_obj)) return "dict"        ;
    if (PyList_Check(    py_obj)) return "list"        ;
    if (PyTuple_Check(   py_obj)) return "tuple"       ;
    if (PyModule_Check(  py_obj)) return "module"      ;
%#if PY_MAJOR_VERSION < 3
    if (PyFile_Check(    py_obj)) return "file"        ;
    if (PyInstance_Check(py_obj)) return "instance"    ;
%#endif

    return "unkown type";
  }

  /* Given a NumPy typecode, return a string describing the type.
   */
  const char* typecode_string(int typecode) {
    static const char* type_names[25] = {"bool", "byte", "unsigned byte",
                                   "short", "unsigned short", "int",
                                   "unsigned int", "long", "unsigned long",
                                   "long long", "unsigned long long",
                                   "float", "double", "long double",
                                   "complex float", "complex double",
                                   "complex long double", "object",
                                   "string", "unicode", "void", "ntypes",
                                   "notype", "char", "unknown"};
    return typecode < 24 ? type_names[typecode] : type_names[24];
  }

  /* Make sure input has correct numpy type.  Allow character and byte
   * to match.  Also allow int and long to match.  This is deprecated.
   * You should use PyArray_EquivTypenums() instead.
   */
  int type_match(int actual_type, int desired_type) {
    return PyArray_EquivTypenums(actual_type, desired_type);
  }
}

/**********************************************************************/

%fragment("NumPy_Object_to_Array", "header",
          fragment="NumPy_Backward_Compatibility",
          fragment="NumPy_Macros",
          fragment="NumPy_Utilities")
{
  /* Given a PyObject pointer, cast it to a PyArrayObject pointer if
   * legal.  If not, set the python error string appropriately and
   * return NULL.
   */
  PyArrayObject* obj_to_array_no_conversion(PyObject* input, int typecode)
  {
    PyArrayObject* ary = NULL;
    if (is_array(input) && (typecode == NPY_NOTYPE ||
                            PyArray_EquivTypenums(array_type(input), typecode)))
    {
      ary = (PyArrayObject*) input;
    }
    else if is_array(input)
    {
      const char* desired_type = typecode_string(typecode);
      const char* actual_type  = typecode_string(array_type(input));
      PyErr_Format(PyExc_TypeError,
                   "Array of type '%s' required.  Array of type '%s' given",
                   desired_type, actual_type);
      ary = NULL;
    }
    else
    {
      const char * desired_type = typecode_string(typecode);
      const char * actual_type  = pytype_string(input);
      PyErr_Format(PyExc_TypeError,
                   "Array of type '%s' required.  A '%s' was given",
                   desired_type, actual_type);
      ary = NULL;
    }
    return ary;
  }

  /* Convert the given PyObject to a NumPy array with the given
   * typecode.  On success, return a valid PyArrayObject* with the
   * correct type.  On failure, the python error string will be set and
   * the routine returns NULL.
   */
  PyArrayObject* obj_to_array_allow_conversion(PyObject* input, int typecode,
                                               int* is_new_object)
  {
    PyArrayObject* ary = NULL;
    PyObject* py_obj;
    if (is_array(input) && (typecode == NPY_NOTYPE ||
                            PyArray_EquivTypenums(array_type(input),typecode)))
    {
      ary = (PyArrayObject*) input;
      *is_new_object = 0;
    }
    else
    {
      py_obj = PyArray_FROMANY(input, typecode, 0, 0, NPY_DEFAULT);
      /* If NULL, PyArray_FromObject will have set python error value.*/
      ary = (PyArrayObject*) py_obj;
      *is_new_object = 1;
    }
    return ary;
  }

  /* Given a PyArrayObject, check to see if it is contiguous.  If so,
   * return the input pointer and flag it as not a new object.  If it is
   * not contiguous, create a new PyArrayObject using the original data,
   * flag it as a new object and return the pointer.
   */
  PyArrayObject* make_contiguous(PyArrayObject* ary, int* is_new_object,
                                 int min_dims, int max_dims)
  {
    PyArrayObject* result;
    if (array_is_contiguous(ary))
    {
      result = ary;
      *is_new_object = 0;
    }
    else
    {
      result = (PyArrayObject*) PyArray_ContiguousFromObject((PyObject*)ary,
                                                             array_type(ary),
                                                             min_dims,
                                                             max_dims);
      *is_new_object = 1;
    }
    return result;
  }

  /* Given a PyArrayObject, check to see if it is Fortran-contiguous.
   * If so, return the input pointer, but do not flag it as not a new
   * object.  If it is not Fortran-contiguous, create a new
   * PyArrayObject using the original data, flag it as a new object
   * and return the pointer.
   */
  PyArrayObject* make_fortran(PyArrayObject* ary, int* is_new_object,
                              int min_dims, int max_dims)
  {
    PyArrayObject* result;
    if (array_is_fortran(ary))
    {
      result = ary;
      *is_new_object = 0;
    }
    else
    {
      Py_INCREF(ary->descr);
      result = (PyArrayObject*) PyArray_FromArray(ary, ary->descr, NPY_FORTRAN);
      *is_new_object = 1;
    }
    return result;
  }

  /* Convert a given PyObject to a contiguous PyArrayObject of the
   * specified type.  If the input object is not a contiguous
   * PyArrayObject, a new one will be created and the new object flag
   * will be set.
   */
  PyArrayObject* obj_to_array_contiguous_allow_conversion(PyObject* input,
                                                          int typecode,
                                                          int* is_new_object)
  {
    int is_new1 = 0;
    int is_new2 = 0;
    PyArrayObject* ary2;
    PyArrayObject* ary1 = obj_to_array_allow_conversion(input, typecode,
                                                        &is_new1);
    if (ary1)
    {
      ary2 = make_contiguous(ary1, &is_new2, 0, 0);
      if ( is_new1 && is_new2)
      {
        Py_DECREF(ary1);
      }
      ary1 = ary2;
    }
    *is_new_object = is_new1 || is_new2;
    return ary1;
  }

  /* Convert a given PyObject to a Fortran-ordered PyArrayObject of the
   * specified type.  If the input object is not a Fortran-ordered
   * PyArrayObject, a new one will be created and the new object flag
   * will be set.
   */
  PyArrayObject* obj_to_array_fortran_allow_conversion(PyObject* input,
                                                       int typecode,
                                                       int* is_new_object)
  {
    int is_new1 = 0;
    int is_new2 = 0;
    PyArrayObject* ary2;
    PyArrayObject* ary1 = obj_to_array_allow_conversion(input, typecode,
                                                        &is_new1);
    if (ary1)
    {
      ary2 = make_fortran(ary1, &is_new2, 0, 0);
      if (is_new1 && is_new2)
      {
        Py_DECREF(ary1);
      }
      ary1 = ary2;
    }
    *is_new_object = is_new1 || is_new2;
    return ary1;
  }

} /* end fragment */


/**********************************************************************/

%fragment("NumPy_Array_Requirements", "header",
          fragment="NumPy_Backward_Compatibility",
          fragment="NumPy_Macros")
{
  /* Test whether a python object is contiguous.  If array is
   * contiguous, return 1.  Otherwise, set the python error string and
   * return 0.
   */
  int require_contiguous(PyArrayObject* ary)
  {
    int contiguous = 1;
    if (!array_is_contiguous(ary))
    {
      PyErr_SetString(PyExc_TypeError,
                      "Array must be contiguous.  A non-contiguous array was given");
      contiguous = 0;
    }
    return contiguous;
  }

  /* Require that a numpy array is not byte-swapped.  If the array is
   * not byte-swapped, return 1.  Otherwise, set the python error string
   * and return 0.
   */
  int require_native(PyArrayObject* ary)
  {
    int native = 1;
    if (!array_is_native(ary))
    {
      PyErr_SetString(PyExc_TypeError,
                      "Array must have native byteorder.  "
                      "A byte-swapped array was given");
      native = 0;
    }
    return native;
  }

  /* Require the given PyArrayObject to have a specified number of
   * dimensions.  If the array has the specified number of dimensions,
   * return 1.  Otherwise, set the python error string and return 0.
   */
  int require_dimensions(PyArrayObject* ary, int exact_dimensions)
  {
    int success = 1;
    if (array_numdims(ary) != exact_dimensions)
    {
      PyErr_Format(PyExc_TypeError,
                   "Array must have %d dimensions.  Given array has %d dimensions",
                   exact_dimensions, array_numdims(ary));
      success = 0;
    }
    return success;
  }

  /* Require the given PyArrayObject to have one of a list of specified
   * number of dimensions.  If the array has one of the specified number
   * of dimensions, return 1.  Otherwise, set the python error string
   * and return 0.
   */
  int require_dimensions_n(PyArrayObject* ary, int* exact_dimensions, int n)
  {
    int success = 0;
    int i;
    char dims_str[255] = "";
    char s[255];
    for (i = 0; i < n && !success; i++)
    {
      if (array_numdims(ary) == exact_dimensions[i])
      {
        success = 1;
      }
    }
    if (!success)
    {
      for (i = 0; i < n-1; i++)
      {
        sprintf(s, "%d, ", exact_dimensions[i]);
        strcat(dims_str,s);
      }
      sprintf(s, " or %d", exact_dimensions[n-1]);
      strcat(dims_str,s);
      PyErr_Format(PyExc_TypeError,
                   "Array must have %s dimensions.  Given array has %d dimensions",
                   dims_str, array_numdims(ary));
    }
    return success;
  }

  /* Require the given PyArrayObject to have a specified shape.  If the
   * array has the specified shape, return 1.  Otherwise, set the python
   * error string and return 0.
   */
  int require_size(PyArrayObject* ary, npy_intp* size, int n)
  {
    int i;
    int success = 1;
    int len;
    char desired_dims[255] = "[";
    char s[255];
    char actual_dims[255] = "[";
    for(i=0; i < n;i++)
    {
      if (size[i] != -1 &&  size[i] != array_size(ary,i))
      {
        success = 0;
      }
    }
    if (!success)
    {
      for (i = 0; i < n; i++)
      {
        if (size[i] == -1)
        {
          sprintf(s, "*,");
        }
        else
        {
          sprintf(s, "%ld,", (long int)size[i]);
        }
        strcat(desired_dims,s);
      }
      len = strlen(desired_dims);
      desired_dims[len-1] = ']';
      for (i = 0; i < n; i++)
      {
        sprintf(s, "%ld,", (long int)array_size(ary,i));
        strcat(actual_dims,s);
      }
      len = strlen(actual_dims);
      actual_dims[len-1] = ']';
      PyErr_Format(PyExc_TypeError,
                   "Array must have shape of %s.  Given array has shape of %s",
                   desired_dims, actual_dims);
    }
    return success;
  }

  /* Require the given PyArrayObject to to be FORTRAN ordered.  If the
   * the PyArrayObject is already FORTRAN ordered, do nothing.  Else,
   * set the FORTRAN ordering flag and recompute the strides.
   */
  int require_fortran(PyArrayObject* ary)
  {
    int success = 1;
    int nd = array_numdims(ary);
    int i;
    if (array_is_fortran(ary)) return success;
    /* Set the FORTRAN ordered flag */
    ary->flags = NPY_FARRAY;
    /* Recompute the strides */
    ary->strides[0] = ary->strides[nd-1];
    for (i=1; i < nd; ++i)
      ary->strides[i] = ary->strides[i-1] * array_size(ary,i-1);
    return success;
  }
}

/* Combine all NumPy fragments into one for convenience */
%fragment("NumPy_Fragments", "header",
          fragment="NumPy_Backward_Compatibility",
          fragment="NumPy_Macros",
          fragment="NumPy_Utilities",
          fragment="NumPy_Object_to_Array",
          fragment="NumPy_Array_Requirements") { }

/* End John Hunter translation (with modifications by Bill Spotz)
 */

/* %numpy_typemaps() macro
 *
 * This macro defines a family of 41 typemaps that allow C arguments
 * of the form
 *
 *     (DATA_TYPE IN_ARRAY1[ANY])
 *     (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1)
 *     (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1)
 *
 *     (DATA_TYPE IN_ARRAY2[ANY][ANY])
 *     (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
 *     (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2)
 *     (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
 *     (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2)
 *
 *     (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY])
 *     (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
 *     (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3)
 *     (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
 *     (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3)
 *
 *     (DATA_TYPE INPLACE_ARRAY1[ANY])
 *     (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1)
 *     (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1)
 *
 *     (DATA_TYPE INPLACE_ARRAY2[ANY][ANY])
 *     (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
 *     (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2)
 *     (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
 *     (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2)
 *
 *     (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY])
 *     (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
 *     (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_ARRAY3)
 *     (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
 *     (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_FARRAY3)
 *
 *     (DATA_TYPE ARGOUT_ARRAY1[ANY])
 *     (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1)
 *     (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1)
 *
 *     (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY])
 *
 *     (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY])
 *
 *     (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1)
 *     (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEW_ARRAY1)
 *
 *     (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2)
 *     (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_ARRAY2)
 *     (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2)
 *     (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_FARRAY2)
 *
 *     (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3)
 *     (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_ARRAY3)
 *     (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3)
 *     (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_FARRAY3)
 *
 * where "DATA_TYPE" is any type supported by the NumPy module, and
 * "DIM_TYPE" is any int-like type suitable for specifying dimensions.
 * The difference between "ARRAY" typemaps and "FARRAY" typemaps is
 * that the "FARRAY" typemaps expect FORTRAN ordering of
 * multidimensional arrays.  In python, the dimensions will not need
 * to be specified (except for the "DATA_TYPE* ARGOUT_ARRAY1"
 * typemaps).  The IN_ARRAYs can be a numpy array or any sequence that
 * can be converted to a numpy array of the specified type.  The
 * INPLACE_ARRAYs must be numpy arrays of the appropriate type.  The
 * ARGOUT_ARRAYs will be returned as new numpy arrays of the
 * appropriate type.
 *
 * These typemaps can be applied to existing functions using the
 * %apply directive.  For example:
 *
 *     %apply (double* IN_ARRAY1, int DIM1) {(double* series, int length)};
 *     double prod(double* series, int length);
 *
 *     %apply (int DIM1, int DIM2, double* INPLACE_ARRAY2)
 *           {(int rows, int cols, double* matrix        )};
 *     void floor(int rows, int cols, double* matrix, double f);
 *
 *     %apply (double IN_ARRAY3[ANY][ANY][ANY])
 *           {(double tensor[2][2][2]         )};
 *     %apply (double ARGOUT_ARRAY3[ANY][ANY][ANY])
 *           {(double low[2][2][2]                )};
 *     %apply (double ARGOUT_ARRAY3[ANY][ANY][ANY])
 *           {(double upp[2][2][2]                )};
 *     void luSplit(double tensor[2][2][2],
 *                  double low[2][2][2],
 *                  double upp[2][2][2]    );
 *
 * or directly with
 *
 *     double prod(double* IN_ARRAY1, int DIM1);
 *
 *     void floor(int DIM1, int DIM2, double* INPLACE_ARRAY2, double f);
 *
 *     void luSplit(double IN_ARRAY3[ANY][ANY][ANY],
 *                  double ARGOUT_ARRAY3[ANY][ANY][ANY],
 *                  double ARGOUT_ARRAY3[ANY][ANY][ANY]);
 */

%define %numpy_typemaps(DATA_TYPE, DATA_TYPECODE, DIM_TYPE)

/************************/
/* Input Array Typemaps */
/************************/

/* Typemap suite for (DATA_TYPE IN_ARRAY1[ANY])
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE IN_ARRAY1[ANY])
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE IN_ARRAY1[ANY])
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[1] = { $1_dim0 };
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 1) ||
      !require_size(array, size, 1)) SWIG_fail;
  $1 = ($1_ltype) array_data(array);
}
%typemap(freearg)
  (DATA_TYPE IN_ARRAY1[ANY])
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1)
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1)
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[1] = { -1 };
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 1) ||
      !require_size(array, size, 1)) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
  $2 = (DIM_TYPE) array_size(array,0);
}
%typemap(freearg)
  (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1)
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1)
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1)
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[1] = {-1};
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 1) ||
      !require_size(array, size, 1)) SWIG_fail;
  $1 = (DIM_TYPE) array_size(array,0);
  $2 = (DATA_TYPE*) array_data(array);
}
%typemap(freearg)
  (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1)
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DATA_TYPE IN_ARRAY2[ANY][ANY])
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE IN_ARRAY2[ANY][ANY])
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE IN_ARRAY2[ANY][ANY])
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[2] = { $1_dim0, $1_dim1 };
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 2) ||
      !require_size(array, size, 2)) SWIG_fail;
  $1 = ($1_ltype) array_data(array);
}
%typemap(freearg)
  (DATA_TYPE IN_ARRAY2[ANY][ANY])
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[2] = { -1, -1 };
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 2) ||
      !require_size(array, size, 2)) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
  $2 = (DIM_TYPE) array_size(array,0);
  $3 = (DIM_TYPE) array_size(array,1);
}
%typemap(freearg)
  (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2)
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2)
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[2] = { -1, -1 };
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 2) ||
      !require_size(array, size, 2)) SWIG_fail;
  $1 = (DIM_TYPE) array_size(array,0);
  $2 = (DIM_TYPE) array_size(array,1);
  $3 = (DATA_TYPE*) array_data(array);
}
%typemap(freearg)
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2)
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[2] = { -1, -1 };
  array = obj_to_array_fortran_allow_conversion($input, DATA_TYPECODE,
                                                &is_new_object);
  if (!array || !require_dimensions(array, 2) ||
      !require_size(array, size, 2) || !require_fortran(array)) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
  $2 = (DIM_TYPE) array_size(array,0);
  $3 = (DIM_TYPE) array_size(array,1);
}
%typemap(freearg)
  (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2)
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2)
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[2] = { -1, -1 };
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 2) ||
      !require_size(array, size, 2) || !require_fortran(array)) SWIG_fail;
  $1 = (DIM_TYPE) array_size(array,0);
  $2 = (DIM_TYPE) array_size(array,1);
  $3 = (DATA_TYPE*) array_data(array);
}
%typemap(freearg)
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2)
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY])
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY])
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY])
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[3] = { $1_dim0, $1_dim1, $1_dim2 };
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 3) ||
      !require_size(array, size, 3)) SWIG_fail;
  $1 = ($1_ltype) array_data(array);
}
%typemap(freearg)
  (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY])
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2,
 *                    DIM_TYPE DIM3)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[3] = { -1, -1, -1 };
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 3) ||
      !require_size(array, size, 3)) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
  $2 = (DIM_TYPE) array_size(array,0);
  $3 = (DIM_TYPE) array_size(array,1);
  $4 = (DIM_TYPE) array_size(array,2);
}
%typemap(freearg)
  (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3,
 *                    DATA_TYPE* IN_ARRAY3)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3)
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3)
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[3] = { -1, -1, -1 };
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 3) ||
      !require_size(array, size, 3)) SWIG_fail;
  $1 = (DIM_TYPE) array_size(array,0);
  $2 = (DIM_TYPE) array_size(array,1);
  $3 = (DIM_TYPE) array_size(array,2);
  $4 = (DATA_TYPE*) array_data(array);
}
%typemap(freearg)
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3)
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2,
 *                    DIM_TYPE DIM3)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[3] = { -1, -1, -1 };
  array = obj_to_array_fortran_allow_conversion($input, DATA_TYPECODE,
                                                &is_new_object);
  if (!array || !require_dimensions(array, 3) ||
      !require_size(array, size, 3) | !require_fortran(array)) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
  $2 = (DIM_TYPE) array_size(array,0);
  $3 = (DIM_TYPE) array_size(array,1);
  $4 = (DIM_TYPE) array_size(array,2);
}
%typemap(freearg)
  (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3,
 *                    DATA_TYPE* IN_FARRAY3)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3)
{
  $1 = is_array($input) || PySequence_Check($input);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3)
  (PyArrayObject* array=NULL, int is_new_object=0)
{
  npy_intp size[3] = { -1, -1, -1 };
  array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE,
                                                   &is_new_object);
  if (!array || !require_dimensions(array, 3) ||
      !require_size(array, size, 3) || !require_fortran(array)) SWIG_fail;
  $1 = (DIM_TYPE) array_size(array,0);
  $2 = (DIM_TYPE) array_size(array,1);
  $3 = (DIM_TYPE) array_size(array,2);
  $4 = (DATA_TYPE*) array_data(array);
}
%typemap(freearg)
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3)
{
  if (is_new_object$argnum && array$argnum)
    { Py_DECREF(array$argnum); }
}

/***************************/
/* In-Place Array Typemaps */
/***************************/

/* Typemap suite for (DATA_TYPE INPLACE_ARRAY1[ANY])
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE INPLACE_ARRAY1[ANY])
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE INPLACE_ARRAY1[ANY])
  (PyArrayObject* array=NULL)
{
  npy_intp size[1] = { $1_dim0 };
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,1) || !require_size(array, size, 1) ||
      !require_contiguous(array) || !require_native(array)) SWIG_fail;
  $1 = ($1_ltype) array_data(array);
}

/* Typemap suite for (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1)
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1)
  (PyArrayObject* array=NULL, int i=1)
{
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,1) || !require_contiguous(array)
      || !require_native(array)) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
  $2 = 1;
  for (i=0; i < array_numdims(array); ++i) $2 *= array_size(array,i);
}

/* Typemap suite for (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1)
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1)
  (PyArrayObject* array=NULL, int i=0)
{
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,1) || !require_contiguous(array)
      || !require_native(array)) SWIG_fail;
  $1 = 1;
  for (i=0; i < array_numdims(array); ++i) $1 *= array_size(array,i);
  $2 = (DATA_TYPE*) array_data(array);
}

/* Typemap suite for (DATA_TYPE INPLACE_ARRAY2[ANY][ANY])
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE INPLACE_ARRAY2[ANY][ANY])
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE INPLACE_ARRAY2[ANY][ANY])
  (PyArrayObject* array=NULL)
{
  npy_intp size[2] = { $1_dim0, $1_dim1 };
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,2) || !require_size(array, size, 2) ||
      !require_contiguous(array) || !require_native(array)) SWIG_fail;
  $1 = ($1_ltype) array_data(array);
}

/* Typemap suite for (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
  (PyArrayObject* array=NULL)
{
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,2) || !require_contiguous(array)
      || !require_native(array)) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
  $2 = (DIM_TYPE) array_size(array,0);
  $3 = (DIM_TYPE) array_size(array,1);
}

/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2)
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2)
  (PyArrayObject* array=NULL)
{
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,2) || !require_contiguous(array) ||
      !require_native(array)) SWIG_fail;
  $1 = (DIM_TYPE) array_size(array,0);
  $2 = (DIM_TYPE) array_size(array,1);
  $3 = (DATA_TYPE*) array_data(array);
}

/* Typemap suite for (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2)
  (PyArrayObject* array=NULL)
{
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,2) || !require_contiguous(array)
      || !require_native(array) || !require_fortran(array)) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
  $2 = (DIM_TYPE) array_size(array,0);
  $3 = (DIM_TYPE) array_size(array,1);
}

/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2)
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2)
  (PyArrayObject* array=NULL)
{
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,2) || !require_contiguous(array) ||
      !require_native(array) || !require_fortran(array)) SWIG_fail;
  $1 = (DIM_TYPE) array_size(array,0);
  $2 = (DIM_TYPE) array_size(array,1);
  $3 = (DATA_TYPE*) array_data(array);
}

/* Typemap suite for (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY])
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY])
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY])
  (PyArrayObject* array=NULL)
{
  npy_intp size[3] = { $1_dim0, $1_dim1, $1_dim2 };
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,3) || !require_size(array, size, 3) ||
      !require_contiguous(array) || !require_native(array)) SWIG_fail;
  $1 = ($1_ltype) array_data(array);
}

/* Typemap suite for (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2,
 *                    DIM_TYPE DIM3)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
  (PyArrayObject* array=NULL)
{
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,3) || !require_contiguous(array) ||
      !require_native(array)) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
  $2 = (DIM_TYPE) array_size(array,0);
  $3 = (DIM_TYPE) array_size(array,1);
  $4 = (DIM_TYPE) array_size(array,2);
}

/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3,
 *                    DATA_TYPE* INPLACE_ARRAY3)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_ARRAY3)
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_ARRAY3)
  (PyArrayObject* array=NULL)
{
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,3) || !require_contiguous(array)
      || !require_native(array)) SWIG_fail;
  $1 = (DIM_TYPE) array_size(array,0);
  $2 = (DIM_TYPE) array_size(array,1);
  $3 = (DIM_TYPE) array_size(array,2);
  $4 = (DATA_TYPE*) array_data(array);
}

/* Typemap suite for (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2,
 *                    DIM_TYPE DIM3)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3)
  (PyArrayObject* array=NULL)
{
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,3) || !require_contiguous(array) ||
      !require_native(array) || !require_fortran(array)) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
  $2 = (DIM_TYPE) array_size(array,0);
  $3 = (DIM_TYPE) array_size(array,1);
  $4 = (DIM_TYPE) array_size(array,2);
}

/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3,
 *                    DATA_TYPE* INPLACE_FARRAY3)
 */
%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY,
           fragment="NumPy_Macros")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_FARRAY3)
{
  $1 = is_array($input) && PyArray_EquivTypenums(array_type($input),
                                                 DATA_TYPECODE);
}
%typemap(in,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_FARRAY3)
  (PyArrayObject* array=NULL)
{
  array = obj_to_array_no_conversion($input, DATA_TYPECODE);
  if (!array || !require_dimensions(array,3) || !require_contiguous(array)
      || !require_native(array) || !require_fortran(array)) SWIG_fail;
  $1 = (DIM_TYPE) array_size(array,0);
  $2 = (DIM_TYPE) array_size(array,1);
  $3 = (DIM_TYPE) array_size(array,2);
  $4 = (DATA_TYPE*) array_data(array);
}

/*************************/
/* Argout Array Typemaps */
/*************************/

/* Typemap suite for (DATA_TYPE ARGOUT_ARRAY1[ANY])
 */
%typemap(in,numinputs=0,
         fragment="NumPy_Backward_Compatibility,NumPy_Macros")
  (DATA_TYPE ARGOUT_ARRAY1[ANY])
  (PyObject * array = NULL)
{
  npy_intp dims[1] = { $1_dim0 };
  array = PyArray_SimpleNew(1, dims, DATA_TYPECODE);
  if (!array) SWIG_fail;
  $1 = ($1_ltype) array_data(array);
}
%typemap(argout)
  (DATA_TYPE ARGOUT_ARRAY1[ANY])
{
  $result = SWIG_Python_AppendOutput($result,array$argnum);
}

/* Typemap suite for (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1)
 */
%typemap(in,numinputs=1,
         fragment="NumPy_Fragments")
  (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1)
  (PyObject * array = NULL)
{
  npy_intp dims[1];
  if (!PyInt_Check($input))
  {
    const char* typestring = pytype_string($input);
    PyErr_Format(PyExc_TypeError,
                 "Int dimension expected.  '%s' given.",
                 typestring);
    SWIG_fail;
  }
  $2 = (DIM_TYPE) PyInt_AsLong($input);
  dims[0] = (npy_intp) $2;
  array = PyArray_SimpleNew(1, dims, DATA_TYPECODE);
  if (!array) SWIG_fail;
  $1 = (DATA_TYPE*) array_data(array);
}
%typemap(argout)
  (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1)
{
  $result = SWIG_Python_AppendOutput($result,array$argnum);
}

/* Typemap suite for (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1)
 */
%typemap(in,numinputs=1,
         fragment="NumPy_Fragments")
  (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1)
  (PyObject * array = NULL)
{
  npy_intp dims[1];
  if (!PyInt_Check($input))
  {
    const char* typestring = pytype_string($input);
    PyErr_Format(PyExc_TypeError,
                 "Int dimension expected.  '%s' given.",
                 typestring);
    SWIG_fail;
  }
  $1 = (DIM_TYPE) PyInt_AsLong($input);
  dims[0] = (npy_intp) $1;
  array = PyArray_SimpleNew(1, dims, DATA_TYPECODE);
  if (!array) SWIG_fail;
  $2 = (DATA_TYPE*) array_data(array);
}
%typemap(argout)
  (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1)
{
  $result = SWIG_Python_AppendOutput($result,array$argnum);
}

/* Typemap suite for (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY])
 */
%typemap(in,numinputs=0,
         fragment="NumPy_Backward_Compatibility,NumPy_Macros")
  (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY])
  (PyObject * array = NULL)
{
  npy_intp dims[2] = { $1_dim0, $1_dim1 };
  array = PyArray_SimpleNew(2, dims, DATA_TYPECODE);
  if (!array) SWIG_fail;
  $1 = ($1_ltype) array_data(array);
}
%typemap(argout)
  (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY])
{
  $result = SWIG_Python_AppendOutput($result,array$argnum);
}

/* Typemap suite for (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY])
 */
%typemap(in,numinputs=0,
         fragment="NumPy_Backward_Compatibility,NumPy_Macros")
  (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY])
  (PyObject * array = NULL)
{
  npy_intp dims[3] = { $1_dim0, $1_dim1, $1_dim2 };
  array = PyArray_SimpleNew(3, dims, DATA_TYPECODE);
  if (!array) SWIG_fail;
  $1 = ($1_ltype) array_data(array);
}
%typemap(argout)
  (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY])
{
  $result = SWIG_Python_AppendOutput($result,array$argnum);
}

/*****************************/
/* Argoutview Array Typemaps */
/*****************************/

/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1)
 */
%typemap(in,numinputs=0)
  (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1    )
  (DATA_TYPE*  data_temp        , DIM_TYPE  dim_temp)
{
  $1 = &data_temp;
  $2 = &dim_temp;
}
%typemap(argout,
         fragment="NumPy_Backward_Compatibility")
  (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1)
{
  npy_intp dims[1] = { *$2 };
  PyObject * array = PyArray_SimpleNewFromData(1, dims, DATA_TYPECODE, (void*)(*$1));
  if (!array) SWIG_fail;
  $result = SWIG_Python_AppendOutput($result,array);
}

/* Typemap suite for (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEW_ARRAY1)
 */
%typemap(in,numinputs=0)
  (DIM_TYPE* DIM1    , DATA_TYPE** ARGOUTVIEW_ARRAY1)
  (DIM_TYPE  dim_temp, DATA_TYPE*  data_temp        )
{
  $1 = &dim_temp;
  $2 = &data_temp;
}
%typemap(argout,
         fragment="NumPy_Backward_Compatibility")
  (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEW_ARRAY1)
{
  npy_intp dims[1] = { *$1 };
  PyObject * array = PyArray_SimpleNewFromData(1, dims, DATA_TYPECODE, (void*)(*$2));
  if (!array) SWIG_fail;
  $result = SWIG_Python_AppendOutput($result,array);
}

/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2)
 */
%typemap(in,numinputs=0)
  (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1     , DIM_TYPE* DIM2     )
  (DATA_TYPE*  data_temp        , DIM_TYPE  dim1_temp, DIM_TYPE  dim2_temp)
{
  $1 = &data_temp;
  $2 = &dim1_temp;
  $3 = &dim2_temp;
}
%typemap(argout,
         fragment="NumPy_Backward_Compatibility")
  (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2)
{
  npy_intp dims[2] = { *$2, *$3 };
  PyObject * array = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$1));
  if (!array) SWIG_fail;
  $result = SWIG_Python_AppendOutput($result,array);
}

/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_ARRAY2)
 */
%typemap(in,numinputs=0)
  (DIM_TYPE* DIM1     , DIM_TYPE* DIM2     , DATA_TYPE** ARGOUTVIEW_ARRAY2)
  (DIM_TYPE  dim1_temp, DIM_TYPE  dim2_temp, DATA_TYPE*  data_temp        )
{
  $1 = &dim1_temp;
  $2 = &dim2_temp;
  $3 = &data_temp;
}
%typemap(argout,
         fragment="NumPy_Backward_Compatibility")
  (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_ARRAY2)
{
  npy_intp dims[2] = { *$1, *$2 };
  PyObject * array = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$3));
  if (!array) SWIG_fail;
  $result = SWIG_Python_AppendOutput($result,array);
}

/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2)
 */
%typemap(in,numinputs=0)
  (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1     , DIM_TYPE* DIM2     )
  (DATA_TYPE*  data_temp        , DIM_TYPE  dim1_temp, DIM_TYPE  dim2_temp)
{
  $1 = &data_temp;
  $2 = &dim1_temp;
  $3 = &dim2_temp;
}
%typemap(argout,
         fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements")
  (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2)
{
  npy_intp dims[2] = { *$2, *$3 };
  PyObject * obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$1));
  PyArrayObject * array = (PyArrayObject*) obj;
  if (!array || !require_fortran(array)) SWIG_fail;
  $result = SWIG_Python_AppendOutput($result,obj);
}

/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_FARRAY2)
 */
%typemap(in,numinputs=0)
  (DIM_TYPE* DIM1     , DIM_TYPE* DIM2     , DATA_TYPE** ARGOUTVIEW_FARRAY2)
  (DIM_TYPE  dim1_temp, DIM_TYPE  dim2_temp, DATA_TYPE*  data_temp        )
{
  $1 = &dim1_temp;
  $2 = &dim2_temp;
  $3 = &data_temp;
}
%typemap(argout,
         fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements")
  (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_FARRAY2)
{
  npy_intp dims[2] = { *$1, *$2 };
  PyObject * obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$3));
  PyArrayObject * array = (PyArrayObject*) obj;
  if (!array || !require_fortran(array)) SWIG_fail;
  $result = SWIG_Python_AppendOutput($result,obj);
}

/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2,
                      DIM_TYPE* DIM3)
 */
%typemap(in,numinputs=0)
  (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3)
  (DATA_TYPE* data_temp, DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp)
{
  $1 = &data_temp;
  $2 = &dim1_temp;
  $3 = &dim2_temp;
  $4 = &dim3_temp;
}
%typemap(argout,
         fragment="NumPy_Backward_Compatibility")
  (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3)
{
  npy_intp dims[3] = { *$2, *$3, *$4 };
  PyObject * array = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$1));
  if (!array) SWIG_fail;
  $result = SWIG_Python_AppendOutput($result,array);
}

/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3,
                      DATA_TYPE** ARGOUTVIEW_ARRAY3)
 */
%typemap(in,numinputs=0)
  (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_ARRAY3)
  (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DATA_TYPE* data_temp)
{
  $1 = &dim1_temp;
  $2 = &dim2_temp;
  $3 = &dim3_temp;
  $4 = &data_temp;
}
%typemap(argout,
         fragment="NumPy_Backward_Compatibility")
  (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_ARRAY3)
{
  npy_intp dims[3] = { *$1, *$2, *$3 };
  PyObject * array = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$3));
  if (!array) SWIG_fail;
  $result = SWIG_Python_AppendOutput($result,array);
}

/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2,
                      DIM_TYPE* DIM3)
 */
%typemap(in,numinputs=0)
  (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3)
  (DATA_TYPE* data_temp, DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp)
{
  $1 = &data_temp;
  $2 = &dim1_temp;
  $3 = &dim2_temp;
  $4 = &dim3_temp;
}
%typemap(argout,
         fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements")
  (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3)
{
  npy_intp dims[3] = { *$2, *$3, *$4 };
  PyObject * obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$1));
  PyArrayObject * array = (PyArrayObject*) obj;
  if (!array || require_fortran(array)) SWIG_fail;
  $result = SWIG_Python_AppendOutput($result,obj);
}

/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3,
                      DATA_TYPE** ARGOUTVIEW_FARRAY3)
 */
%typemap(in,numinputs=0)
  (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_FARRAY3)
  (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DATA_TYPE* data_temp)
{
  $1 = &dim1_temp;
  $2 = &dim2_temp;
  $3 = &dim3_temp;
  $4 = &data_temp;
}
%typemap(argout,
         fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements")
  (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_FARRAY3)
{
  npy_intp dims[3] = { *$1, *$2, *$3 };
  PyObject * obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$3));
  PyArrayObject * array = (PyArrayObject*) obj;
  if (!array || require_fortran(array)) SWIG_fail;
  $result = SWIG_Python_AppendOutput($result,obj);
}

%enddef    /* %numpy_typemaps() macro */
/* *************************************************************** */

/* Concrete instances of the %numpy_typemaps() macro: Each invocation
 * below applies all of the typemaps above to the specified data type.
 */
%numpy_typemaps(signed char       , NPY_BYTE     , int)
%numpy_typemaps(unsigned char     , NPY_UBYTE    , int)
%numpy_typemaps(short             , NPY_SHORT    , int)
%numpy_typemaps(unsigned short    , NPY_USHORT   , int)
%numpy_typemaps(int               , NPY_INT      , int)
%numpy_typemaps(unsigned int      , NPY_UINT     , int)
%numpy_typemaps(long              , NPY_LONG     , int)
%numpy_typemaps(unsigned long     , NPY_ULONG    , int)
%numpy_typemaps(long long         , NPY_LONGLONG , int)
%numpy_typemaps(unsigned long long, NPY_ULONGLONG, int)
%numpy_typemaps(float             , NPY_FLOAT    , int)
%numpy_typemaps(double            , NPY_DOUBLE   , int)

/* ***************************************************************
 * The follow macro expansion does not work, because C++ bool is 4
 * bytes and NPY_BOOL is 1 byte
 *
 *    %numpy_typemaps(bool, NPY_BOOL, int)
 */

/* ***************************************************************
 * On my Mac, I get the following warning for this macro expansion:
 * 'swig/python detected a memory leak of type 'long double *', no destructor found.'
 *
 *    %numpy_typemaps(long double, NPY_LONGDOUBLE, int)
 */

/* ***************************************************************
 * Swig complains about a syntax error for the following macro
 * expansions:
 *
 *    %numpy_typemaps(complex float,  NPY_CFLOAT , int)
 *
 *    %numpy_typemaps(complex double, NPY_CDOUBLE, int)
 *
 *    %numpy_typemaps(complex long double, NPY_CLONGDOUBLE, int)
 */

#endif /* SWIGPYTHON */


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/python/rwt.i
================================================
%pythonbegin %{
from __future__ import division
%}

%define MODDOCSTRING
"The Rice Wavelet Toolbox (RWT) is a collection of functions for 1D and 2D
wavelet and filter bank design, analysis, and processing."
%enddef

%module(docstring=MODDOCSTRING) rwt

/* The C functions for the transforms are not suitable for direct use from python so let's rename them. */

%rename(_c_dwt)     dwt;
%rename(_c_idwt)   idwt;
%rename(_c_rdwt)   rdwt;
%rename(_c_irdwt) irdwt;

%rename(_find_levels) rwt_find_levels;
%rename(_check_levels) rwt_check_levels;

%{
  #define SWIG_FILE_WITH_INIT
  #include "../lib/inc/rwt_transforms.h"
  #include "../lib/inc/rwt_init.h"
%}

%include "../lib/inc/rwt_init.h"
%include "numpy.i"

%init %{
  import_array();
%}

/* Building on the numpy SWIG macros we make wrapper functions for 1D and 2D for each transform */

void _c_dwt_1(  double* INPLACE_ARRAY1, int DIM1,           double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY1, int DIM1);
void _c_dwt_2(  double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY2, int DIM1, int DIM2);
void _c_idwt_1( double* INPLACE_ARRAY1, int DIM1,           double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY1, int DIM1);
void _c_idwt_2( double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY2, int DIM1, int DIM2);
void _c_rdwt_1( double* INPLACE_ARRAY1, int DIM1,           double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY1, int DIM1,           double* INPLACE_ARRAY1, int DIM1);
void _c_rdwt_2( double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY2, int DIM1, int DIM2);
void _c_irdwt_1(double* INPLACE_ARRAY1, int DIM1,           double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY1, int DIM1,           double* INPLACE_ARRAY1, int DIM1);
void _c_irdwt_2(double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY1, int DIM1, int levels, double* INPLACE_ARRAY2, int DIM1, int DIM2, double* INPLACE_ARRAY2, int DIM1, int DIM2);

%inline %{

void _c_dwt_1(double *x, int nrows, double *h, int ncoeff, int levels, double *y, int toss1) {
  dwt(x, nrows, 1, h, ncoeff, levels, y);
}

void _c_idwt_1(double *x, int nrows, double *h, int ncoeff, int levels, double *y, int toss1) {
  idwt(x, nrows, 1, h, ncoeff, levels, y);
}

void _c_rdwt_1(double *x, int nrows, double *h, int ncoeff, int levels, double *yl, int toss1, double *yh, int toss2) {
  rdwt(x, nrows, 1, h, ncoeff, levels, yl, yh);
}

void _c_irdwt_1(double *x, int nrows, double *h, int ncoeff, int levels, double *yl, int toss1, double *yh, int toss2) {
  irdwt(x, nrows, 1, h, ncoeff, levels, yl, yh);
}

void _c_dwt_2(double *x, int nrows, int ncols, double *h, int ncoeff, int levels, double *y, int toss1, int toss2) {
  dwt(x, nrows, ncols, h, ncoeff, levels, y);
}

void _c_idwt_2(double *x, int nrows, int ncols, double *h, int ncoeff, int levels, double *y, int toss1, int toss2) {
  idwt(x, nrows, ncols, h, ncoeff, levels, y);
}

void _c_rdwt_2(double *x, int nrows, int ncols, double *h, int ncoeff, int levels, double *yl, int toss1, int toss2, double *yh, int toss3, int toss4) {
  rdwt(x, nrows, ncols, h, ncoeff, levels, yl, yh);
}

void _c_irdwt_2(double *x, int nrows, int ncols, double *h, int ncoeff, int levels, double *yl, int toss1, int toss2, double *yh, int toss3, int toss4) {
  irdwt(x, nrows, ncols, h, ncoeff, levels, yl, yh);
}

%}

%pythoncode %{

import numpy as np

def _levels(x, L):
  dim = len(x.shape) # Determine the dimensions of our input
  m = x.shape[0]
  if (dim == 2):
    n = x.shape[1]
  else:
    n = 1
  if (L == 0): # If the number of levels was not specified then use the max
    L = _find_levels(m, n)
  _check_levels(L, m, n) # Sanity check the number of levels
  return L

def dwt(x, h, L = 0):
  """
Function computes the discrete wavelet transform y for a 1D or 2D input
signal x using the scaling filter h.

Input:
   x : finite length 1D or 2D signal (implicitly periodized)
   h : scaling filter
   L : number of levels. In the case of a 1D signal, length(x) must be
       divisible by 2^L; in the case of a 2D signal, the row and the
       column dimension must be divisible by 2^L. If no argument is
       specified, a full DWT is returned for maximal possible L.

Output:
   y : the wavelet transform of the signal 
       (see example to understand the coefficients)
   L : number of decomposition levels

1D Example:
   x = makesig('LinChirp', 8)
   h = daubcqf(4, 'min')[0]
   L = 2
   y,L = dwt(x,h,L)

1D Example's output and explanation:

   y = array([1.1097,0.8767,0.8204,-0.5201,-0.0339,0.1001,0.2201,-0.1401])
   L = 2

The coefficients in output y are arranged as follows

   y(0) and y(1) : Scaling coefficients (lowest frequency)
   y(2) and y(3) : Band pass wavelet coefficients
   y(4) to y(7)  : Finest scale wavelet coefficients (highest frequency)

2D Example:

   load test_image        
   h = daubcqf(4,'min')[0]
   L = 1
   y,L = dwt(test_image,h,L)

2D Example's output and explanation:

   The coefficients in y are arranged as follows.

          .------------------.
          |         |        |
          |    4    |   2    |
          |         |        |
          |   L,L   |   H,L  |
          |         |        |
          --------------------
          |         |        |
          |    3    |   1    |
          |         |        |
          |   L,H   |  H,H   |
          |         |        |
          `------------------'
   
   where 
        1 : High pass vertically and high pass horizontally
        2 : Low pass vertically and high pass horizontally
        3 : High pass vertically and low  pass horizontally
        4 : Low pass vertically and Low pass horizontally 
            (scaling coefficients)
  """
  if (x.dtype != 'float'):
    x = x * 1.0
  L = _levels(x, L)
  y = np.ascontiguousarray(np.zeros(x.shape))
  dim = len(x.shape)
  x = np.ascontiguousarray(x)
  if (dim == 1):
    _rwt._c_dwt_1(x, h, L, y)
  if (dim == 2):
    _rwt._c_dwt_2(x, h, L, y)
  return y, L

def idwt(y, h, L = 0):
  """
Function computes the inverse discrete wavelet transform x for a 1D or
2D input signal y using the scaling filter h.

Input:
   y : finite length 1D or 2D input signal (implicitly periodized)
       (see function mdwt to find the structure of y)
   h : scaling filter
   L : number of levels. In the case of a 1D signal, length(x) must be
       divisible by 2^L; in the case of a 2D signal, the row and the
       column dimension must be divisible by 2^L.  If no argument is
       specified, a full inverse DWT is returned for maximal possible
       L.

Output:
   x : periodic reconstructed signal
   L : number of decomposition levels

1D Example:
   xin = makesig('LinChirp', 8)
   h = daubcqf(4, 'min')[0]
   L = 1
   y, L = mdwt(xin, h, L)
   x, L = midwt(y, h, L)

1D Example's output:

   x = array([0.0491,0.1951,0.4276,0.7071,0.9415,0.9808,0.6716,0.0000])
   L = 1
  """
  if (y.dtype != 'float'):
    y = y * 1.0
  L = _levels(y, L)
  x = np.ascontiguousarray(np.zeros(y.shape))
  y = np.ascontiguousarray(y)
  dim = len(x.shape)
  if (dim == 1):
    _rwt._c_idwt_1(x, h, L, y)
  if (dim == 2):
    _rwt._c_idwt_2(x, h, L, y)
  return x, L

def rdwt(x, h, L = 0):
  """
Function computes the redundant discrete wavelet transform y
for a 1D  or 2D input signal. (Redundant means here that the
sub-sampling after each stage is omitted.) yl contains the
lowpass and yh the highpass components. In the case of a 2D
signal, the ordering in yh is 
[lh hl hh lh hl ... ] (first letter refers to row, second to
column filtering). 

Input:
   x : finite length 1D or 2D signal (implicitly periodized)
   h : scaling filter
   L : number of levels. In the case of a 1D 
       length(x) must be  divisible by 2^L;
       in the case of a 2D signal, the row and the
       column dimension must be divisible by 2^L.
       If no argument is
       specified, a full DWT is returned for maximal possible L.

Output:
   yl : lowpass component
   yh : highpass components
   L  : number of levels

Example:
  x = makesig('Leopold', 8)
  h = daubcqf(4, 'min')[0]
  L = 1
  yl, yh, L = mrdwt(x,h,L)

Example's output:
  yl =  0.8365  0.4830 0 0 0 0 -0.1294 0.2241
  yh = -0.2241 -0.1294 0 0 0 0 -0.4830 0.8365
  L = 1
  """
  if (x.dtype != 'float'):
    x = x * 1.0
  L = _levels(x, L)
  yl = np.ascontiguousarray(np.zeros(x.shape))
  dim = len(x.shape)
  x = np.ascontiguousarray(x)
  if (dim == 1):
    yh = np.ascontiguousarray(np.zeros(x.shape[0] * L))
    _rwt._c_rdwt_1(x, h, L, yl, yh)
  if (dim == 2):
    yh = np.ascontiguousarray(np.zeros((x.shape[0], x.shape[1] * L * 3)))
    _rwt._c_rdwt_2(x, h, L, yl, yh)
  return yl, yh, L

def irdwt(yl, yh, h, L = 0):
  """
Function computes the inverse redundant discrete wavelet
transform x  for a 1D or 2D input signal. (Redundant means here
that the sub-sampling after each stage of the forward transform
has been omitted.) yl contains the lowpass and yl the highpass
components as computed, e.g., by mrdwt. In the case of a 2D
signal, the ordering in
yh is [lh hl hh lh hl ... ] (first letter refers to row, second
to column filtering).  

Input:
   yl : lowpass component
   yh : highpass components
   h  : scaling filter
   L  : number of levels. In the case of a 1D signal, 
        length(yl) must  be divisible by 2^L;
        in the case of a 2D signal, the row and
        the column dimension must be divisible by 2^L.

Output:
        x : finite length 1D or 2D signal
        L : number of levels

Example:
  xin = makesig('Leopold', 8)
  h = daubcqf(4, 'min')[0]
  L = 1
  yl, yh, L = mrdwt(xin, h, L)
  x, L = mirdwt(yl, yh, h, L)

Example Output:
  x = array([0.0000,1.0000,0.0000,-0.0000,0,0,0,-0.0000])
  L = 1
  """
  if (yl.dtype != 'float'):
    yl = yl * 1.0
  if (yh.dtype != 'float'):
    yh = yh * 1.0
  L = _levels(yl, L)
  x = np.ascontiguousarray(np.zeros(yl.shape))
  yl = np.ascontiguousarray(yl)
  yh = np.ascontiguousarray(yh)
  dim = len(x.shape)
  if (dim == 1):
    _rwt._c_irdwt_1(x, h, L, yl, yh)
  if (dim == 2):
    _rwt._c_irdwt_2(x, h, L, yl, yh)
  return x, L

def daubcqf(n, dtype = 'min'):
  """
Function computes the Daubechies' scaling and wavelet filters
(normalized to sqrt(2)).

Input: 
   n     : Length of filter (must be even)
   dtype : Optional parameter that distinguishes the minimum phase,
           maximum phase and mid-phase solutions ('min', 'max', or
           'mid'). If no argument is specified, the minimum phase
           solution is used.

Output: 
   h_0 : Minimal phase Daubechies' scaling filter 
   h_1 : Minimal phase Daubechies' wavelet filter 

Example:
   n = 4
   dtype = 'min'
   h_0, h_1 = daubcqf(n, dtype)

Example Result:
   h_0 = array([0.4830, 0.8365, 0.2241, -0.1294])
   h_1 = array([0.1294, 0.2241, -0.8365, 0.4830])

Reference: \"Orthonormal Bases of Compactly Supported Wavelets\",
            CPAM, Oct.89 
  """
  if (n % 2 != 0):
    raise Exception("No Daubechies filter exists for ODD length")
  k = n // 2
  a = p = q = 1
  h_0 = np.array([1, 1])
  for j in range(1, k):
    a = -a * 0.25 * (j + k - 1) / j
    h_0 = np.hstack((0, h_0)) + np.hstack((h_0, 0))
    p = np.hstack((0, -p)) + np.hstack((p, 0))
    p = np.hstack((0, -p)) + np.hstack((p, 0))
    q = np.hstack((0, q, 0)) + a*p
  q = np.sort(np.roots(q))
  qt = q[0:k-1]
  if (dtype == 'mid'):
    if (k % 2 == 1):
      qt = np.hstack((q[0:n-2:4], q[1:n-2:4]))
    else:
      qt = np.hstack((q[0], q[3:k-1:4], q[4:k-1:4], q[n-4:k:-4],
                      q[n-5:k:-4]))
  h_0 = np.convolve(h_0, np.real(np.poly(qt)))
  h_0 = np.sqrt(2)*h_0 / sum(h_0)
  if (dtype == 'max'):
    h_0 = np.flipud(h_0)
  if (np.abs(sum(np.power(h_0, 2))) -1 > 1e-4):
    raise Exception("Numerically unstable for this value of n")
  h_1 = np.copy(np.flipud(h_0))
  h_1[0:n-1:2] = -h_1[0:n-1:2]
  return h_0, h_1

def hard_th(y, thld):
  """
HARDTH hard thresholds the input signal y with the threshold value
thld.

Input:  
   y    : 1D or 2D signal to be thresholded
   thld : threshold value

Output: 
   x : Hard thresholded output (x = (abs(y)>thld) * y)

Example:
   y = makesig('WernerSorrows', 8)
   thld = 1
   x = HardTh(y, thld)

Example Output:
  x = array([1.5545, 5.3175, 0, 1.6956, -1.2678, 0, 1.7332, 0])
  """
  return (np.abs(y) > thld) * y

def soft_th(y, thld):
  """
Soft thresholds the input signal y with the threshold value thld.

Input:  
   y    : 1D or 2D signal to be thresholded
   thld : Threshold value

Output: 
   x : Soft thresholded output (sign(y) * (x >= thld) * (x - thld))

Example:
   y = makesig('Doppler', 8)
   thld = 0.2
   x = soft_th(y, thld)

Example Output:
   x = array([0, 0, 0, -0.0703, 0, 0.2001, 0.0483, 0])

Reference: 
   \"De-noising via Soft-Thresholding\" Tech. Rept. Statistics,
   Stanford, 1992. D.L. Donoho.
  """
  x = np.abs(y)
  return np.sign(y) * (x >= thld) * (x - thld)

def makesig(signame, n = 512):
  """
Creates artificial test signal identical to the standard test 
signals proposed and used by D. Donoho and I. Johnstone in
WaveLab (- a matlab toolbox developed by Donoho et al. the statistics
department at Stanford University).

Input:  signame - Name of the desired signal
                    'HeaviSine'
                    'Bumps'
                    'Blocks'
                    'Doppler'
                    'Ramp'
                    'Cusp'
                    'Sing'
                    'HiSine'
                    'LoSine'
                    'LinChirp'
                    'TwoChirp'
                    'QuadChirp'
                    'MishMash'
                    'WernerSorrows' (Heisenberg)
                    'Leopold' (Kronecker)
        n       - Length in samples of the desired signal (Default 512)

Output: x   - resulting test signal

References:
        WaveLab can be accessed at
        www_url: http://playfair.stanford.edu/~wavelab/
        Also see various articles by D.L. Donoho et al. at
        web_url: http://playfair.stanford.edu/
  """
  t = np.array(range(1, n + 1)) / float(n)
  if (signame == 'HeaviSine'):
    y = 4 * np.sin(4 * np.pi * t)
    return y - np.sign(t - .3) - np.sign(.72 - t)
  if (signame == 'Bumps'):
    pos = np.array([.1, .13, .15, .23, .25, .40, .44, .65, .76, .78, .81])
    hgt = np.array([4, 5, 3, 4, 5, 4.2, 2.1, 4.3, 3.1, 5.1, 4.2])
    wth = np.array(
      [.005, .005, .006, .01, .01, .03, .01, .01, .005, .008, .005])
    y = np.zeros(n)
    for j in range(0, pos.size):
      y = y + hgt[j] / pow((1 + np.abs((t - pos[j]) / wth[j])), 4)
    return y
  if (signame == 'Blocks'):
    pos = np.array([.1, .13, .15, .23, .25, .40, .44, .65, .76, .78, .81])
    hgt = np.array([4, (-5), 3, (-4), 5, (-4.2), 2.1, 4.3, (-3.1),
                    2.1, (-4.2)])
    y = np.zeros(n)
    for j in range(0, pos.size):
      y = y + (1 + np.sign(t - pos[j])) * (hgt[j]/2)
    return y
  if (signame == 'Doppler'):
    return np.sqrt(t * (1-t)) * np.sin((2 * np.pi * 1.05) / (t+.05))
  if (signame == 'Ramp'):
    return t - (t >= .37)
  if (signame == 'Cusp'):
    return np.sqrt(np.abs(t - .37))
  if (signame == 'Sing'):
    k = np.floor(n * .37)
    return 1 / np.abs(t - (k + .5)/n)
  if (signame == 'HiSine'):
    return np.sin(np.pi * (n * .6902) * t)
  if (signame == 'LoSine'):
    return np.sin(np.pi * (n * .3333) * t)
  if (signame == 'LinChirp'):
    return np.sin(np.pi * t * ((n * .125) * t))
  if (signame == 'TwoChirp'):
    return np.sin(np.pi * t * (n * t)) + np.sin((np.pi / 3) * t * (n * t))
  if (signame == 'QuadChirp'):
    return np.sin((np.pi/3) * t * (n * pow(t,2)))
  if (signame == 'MishMash'):
    y = np.sin((np.pi/3) * t * (n * pow(t,2)))
    y = y + np.sin(np.pi * (n * .6902) * t)
    return y + np.sin(np.pi * t * (n * .125 * t))
  if (signame == 'WernerSorrows'):
    y = np.sin(np.pi * t * (n/2 * pow(t, 2)))
    y = y + np.sin(np.pi * (n * .6902) * t)
    y = y + np.sin(np.pi * t * (n * t))
    pos = np.array([.1, .13, .15, .23, .25, .40, .44, .65, .76, .78, .81])
    hgt = np.array([4, 5, 3, 4, 5, 4.2, 2.1, 4.3, 3.1, 5.1, 4.2])
    wth = np.array(
      [.005, .005, .006, .01, .01, .03, .01, .01, .005, .008, .005])
    for j in range(0, pos.size):
      y = y + hgt[j] / pow((1 + np.abs((t - pos[j]) / wth[j])), 4)
    return y
  if (signame == 'Leopold'):
    return (t == np.floor(.37 * n)/n) * 1.0

def denoise(x, h, denoise_type = 0, option = None):
  """
DENOISE is a generic routine for wavelet based denoising.
The routine will denoise the signal x using the 2-band wavelet
system described by the filter h using either the traditional 
discrete wavelet transform (DWT) or the linear shift invariant 
discrete wavelet transform (also known as the undecimated DWT
(UDWT)). 

Input:  
   x            : 1D or 2D signal to be denoised
   h            : Scaling filter to be applied
   denoise_type : Type of transform (Default: type = 0)
                  0 --> Discrete wavelet transform (DWT)
                  1 --> Undecimated DWT (UDWT)
   option       : Default settings is marked with '*':
                  *type = 0 --> option = [0 3.0 0 0 0 0]
                  type = 1 --> option = [0 3.6 0 1 0 0]
   option(1)    : Whether to threshold low-pass part
                  0 --> Don't threshold low pass component 
                  1 --> Threshold low pass component
   option(2)    : Threshold multiplier, c. The threshold is
                  computed as: 
                    thld = c*MAD(noise_estimate)). 
                  The default values are:
                    c = 3.0 for the DWT based denoising
                    c = 3.6 for the UDWT based denoising
   option(3)    : Type of variance estimator
                  0 --> MAD (mean absolute deviation)
                  1 --> STD (classical numerical std estimate)
   option(4)    : Type of thresholding
                  2 --> Soft thresholding
                  1 --> Hard thresholding
   option(5)    : Number of levels, L, in wavelet decomposition. By
                  setting this to the default value '0' a maximal
                  decomposition is used.
   option(6)    : Actual threshold to use (setting this to
                  anything but 0 will mean that option(3)
                  is ignored)

Output: 
   xd           : Estimate of noise free signal 
   xn           : The estimated noise signal (x-xd)
   option       : A vector of actual parameters used by the
                  routine. The vector is configured the same way as
                  the input option vector with one added element
                  option(7) = type.

Example 1: 
   from numpy.random import randn
   N = 16
   h = daubcqf(6)[0]
   s = makesig('Doppler', N)
   n = randn(1,N)
   x = s + n/10 # (approximately 10dB SNR)
   %Denoise x with the default method based on the DWT
   xd, xn, opt1 = denoise(x,h)
   %Denoise x using the undecimated (LSI) wavelet transform
   yd, yn, opt2 = denoise(x,h,1)

Example 2: (on an image)  
   from scipy.io import loadmat
   from numpy.random import random_sample
   lena = loadmat('../tests/lena512.mat')['lena512']
   h = daubcqf(6)[0]
   noisyLena = lena + 25 * random_sample(lena.shape)
   denoisedLena, xn, opt1 = denoise(noisyLena, h)
  """
  if (option is None and denoise_type == 0):
    option = [0, 3.0, 0, 2, 0, 0]
  if (option is None and denoise_type == 1):
    option = [0, 3.6, 0, 1, 0, 0]
  if (not isinstance(option, list)):
    option = list(option)
  mx = x.shape[0]
  nx = 1
  if (len(x.shape) > 1):
    nx = x.shape[1]
  dim = min(mx, nx)
  n = dim
  if (dim == 1):
    n = max(mx, nx)
  if (option[4] == 0):
    L = np.int(np.floor(np.log2(n)))
  else:
    L = option[4]
  if (denoise_type == 0):
    xd = dwt(x, h, L)[0]
    if (option[5] == 0):
      if (nx > 1):
        tmp = xd[mx // 2:mx, nx // 2:nx]
      else:
        tmp = xd[mx // 2:mx]
      if (option[2] == 0):
        thld = option[1] * np.median(np.abs(tmp)) / .67
      elif (option[2] == 1):
        thld = option[1] * np.std(tmp, ddof=1)
    else:
      thld = option[5]
    if (dim == 1):
      ix = np.array(range(0, (n // (np.power(2, L)))))
      if (ix.size == 1):
        ix = ix[0]
      ykeep = xd[ix]
    else:
      ix = np.array(range(0, (mx // (np.power(2, L)))))
      jx = np.array(range(0, (nx // (np.power(2, L)))))
      if (ix.size == 1):
        ix = ix[0]
      if (jx.size == 1):
        jx = jx[0]
      ykeep = xd[ix, jx]
    if (option[3] == 2):
      xd = soft_th(xd, thld)
    elif (option[3] == 1):
      xd = hard_th(xd, thld)
    if (option[0] == 0):
      if (dim == 1):
        xd[ix] = ykeep
      else:
        xd[ix, jx] = ykeep
    xd = idwt(xd, h, L)[0]
  elif (denoise_type == 1):
    (xl, xh, L) = rdwt(x, h, L)
    easter_egg = 23
    if (dim == 1):
      c_offset = 0
    else:
      c_offset = 2 * nx
    if (option[5] == 0):
      if (nx > 1):
        tmp = xh[:,c_offset:c_offset+mx] 
      else:
        tmp = xh[c_offset:c_offset+mx:1] 
      if (option[2] == 0):
        thld = option[1] * np.median(np.abs(tmp)) / .67
      elif (option[2] == 1):
        thld = option[1] * np.std(tmp, ddof=1)
    else:
      thld = option[5]
    if (option[3] == 2):
      xh = soft_th(xh, thld)
      if (option[0] == 1):
        xl = soft_th(xl, thld)
    elif (option[3] == 1):
      xh = hard_th(xh, thld)
      if (option[0] == 1):
        xl = hard_th(xl, thld)
    xd = irdwt(xl, xh, h, L)[0]
  option[5] = (thld)
  option.append(denoise_type)
  xn = x - xd
  return xd, xn, option

%}


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/python/test_rwt.py
================================================
#!/usr/bin/python

import unittest
from numpy import *
from scipy.io import loadmat
from rwt import *

class TestRWT(unittest.TestCase):

  def setUp(self):
      pass

  def test_dwt(self):
    x = makesig('LinChirp', 8)
    h = daubcqf(4, 'min')[0]
    L = 2
    y, L = dwt(x, h, L)
    y_corr = array([1.109692262737501,0.876661822959323,0.820391852106669,-0.520074093642583,-0.033927668247206,0.100110695461285,0.220088240246095,-0.140081604397608])
    self.assertTrue(allclose(y, y_corr, 0.0001))

  def test_dwt_2d(self):
    x = array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16.0]])
    h = daubcqf(4)[0]
    L = 2
    y, L = dwt(x, h, L)
    y_corr = array([[34.0000, -3.4641, 0.0000, -2.0000], [-13.8564, 0.0000, 0.0000, -2.0000], [-0.0000, 0.0000, -0.0000, -0.0000], [-8.0000, -8.0000, 0.0000, -0.0000]])
    self.assertTrue(allclose(y, y_corr, 0.0000005))

  def test_idwt(self):
    x = makesig('LinChirp', 8)
    h = daubcqf(4, 'min')[0]
    L = 2
    y, L = dwt(x, h, L)
    x_new, L = idwt(y, h, L)
    self.assertTrue(allclose(x, x_new, 0.0005))

  def test_idwt_2d(self):
    x = loadmat('../tests/lena512.mat')['lena512'] * 1.0
    h = daubcqf(6)[0]
    L = 9
    y, L = dwt(x, h, L)
    x_new, L = idwt(y, h, L)
    self.assertTrue(allclose(x, x_new, 0.0005))

  def test_rdwt(self):
    x = makesig('Leopold', 8)
    h = daubcqf(4, 'min')[0]
    L = 1
    (yl, yh, L) = rdwt(x, h, L)
    yl_corr = [0.8365,  0.4830, 0, 0, 0, 0, -0.1294, 0.2241]
    yh_corr = [-0.2241, -0.1294, 0, 0, 0, 0, -0.4830, 0.8365]
    L_corr = 1
    self.assertTrue(allclose(yl, yl_corr, 0.0005))
    self.assertTrue(allclose(yh, yh_corr, 0.0005))
    self.assertTrue(allclose(L, L_corr, 0.0005))

  def test_rdwt_2(self):
    x = array([[1.0,3,5,2],[3,4,8,1],[3,9,2,0],[1,2,3,0]])
    h = daubcqf(4, 'min')[0]
    yl, yh, L = rdwt(x, h, 1)
    yl_corr = array([
      [9.0111, 10.7799, 5.8795, 4.1107],
      [11.1393, 8.7766, 2.5502, 4.9130],
      [6.9465, 5.7578, 1.6630, 2.8517],
      [4.8182, 7.7611, 4.9922, 2.0494]])
    yh_corr = array([
      [4.5724, 0.4285, -1.8828, 2.2611, 4.8714, -3.1026, -1.7978, 0.0290, -2.9620, -1.1818, -1.1295, 5.2733],
      [-2.4441, -2.4318, -1.4465, -1.4587, 1.8861, -4.2488, -1.9776, 4.3403, -0.0233, 0.0356, 0.9498, -0.9620],
      [-1.7488, -0.5870, 0.5592, -0.6026, 1.1663, -2.3550, -1.7398, 2.9285, -0.6965, 1.8583, -0.7120, -0.4498],
      [-0.3795, 2.5903, 2.7700, -0.1998, 4.1516, -1.2087, -1.5601, -1.3828, 3.6818, -0.7120, 0.8917, -3.8615]])
    self.assertTrue(allclose(yl, yl_corr, 0.001))
    self.assertTrue(allclose(yh, yh_corr, 0.001))

  def test_rdwt_2L2(self):
    x = array([[1.0,3,5,2],[3,4,8,1],[3,9,2,0],[1,2,3,0]])
    h = daubcqf(4, 'min')[0]
    yl, yh, L = rdwt(x, h, 2)
    yl_corr = array([
      [11.7500,  11.7500,  11.7500,  11.7500],
      [11.7500,  11.7500,  11.7500,  11.7500],
      [11.7500,  11.7500,  11.7500,  11.7500],
      [11.7500,  11.7500,  11.7500,  11.7500]])
    yh_corr = array([
       [4.5724,   0.4285,  -1.8828,   2.2611,   4.8714,  -3.1026,  -1.7978,   0.0290,  -2.9620,  -1.1818,  -1.1295,   5.2733,
       3.1405,   3.1405,   3.1405,   3.1405,   4.2075,   4.7877,  -4.2075,  -4.7877,  -1.0760,   1.8816,   1.0760,  -1.8816],
       [-2.4441,  -2.4318,  -1.4465,  -1.4587,   1.8861,  -4.2488,  -1.9776,   4.3403,  -0.0233,   0.0356,   0.9498,  -0.9620,
       1.9396,   1.9396,   1.9396,   1.9396,   4.2075,   4.7877,  -4.2075,  -4.7877,   4.3816,  -0.9240,  -4.3816,   0.9240],
       [-1.7488,  -0.5870,   0.5592,  -0.6026,   1.1663,  -2.3550,  -1.7398,   2.9285,  -0.6965,   1.8583,  -0.7120,  -0.4498,
       -3.1405,  -3.1405,  -3.1405,  -3.1405,   4.2075,   4.7877,  -4.2075,  -4.7877,   1.0760,  -1.8816,  -1.0760,   1.8816],
       [-0.3795,   2.5903,   2.7700,  -0.1998,   4.1516,  -1.2087,  -1.5601,  -1.3828,   3.6818,  -0.7120,   0.8917,  -3.8615,
       -1.9396,  -1.9396,  -1.9396,  -1.9396,   4.2075,   4.7877,  -4.2075,  -4.7877,  -4.3816,   0.9240,   4.3816,  -0.9240]])
    self.assertTrue(allclose(yl, yl_corr, 0.001))
    self.assertTrue(allclose(yh, yh_corr, 0.001))

  def test_irdwt(self):
    xin = makesig('Leopold',8)
    h = daubcqf(4, 'min')[0]
    Lin = 1
    (yl, yh, L) = rdwt(xin, h, Lin) 
    (x, L) = irdwt(yl, yh, h, L)
    self.assertTrue(allclose(x, xin, 0.0005))
       
  def test_irdwt_2d(self):
    x = loadmat('../tests/lena512.mat')['lena512'] * 1.0
    h = daubcqf(6)[0]
    L = 9 
    yl, yh, L = rdwt(x, h, L)
    x_new, L = irdwt(yl, yh, h, L)
    self.assertTrue(allclose(x, x_new, 0.0005))

  def test_makesig_heavisine(self):
    x = makesig('HeaviSine', 8)
    y = array([4.0000, 0.0000, -6.0000, -2.0000, 2.0000, 0.0000, -4.0000, -0.0000])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_bumps(self):
    x = around(makesig('Bumps', 8), 4)
    y = array([0.3206, 5.0527, 0.3727, 0.0129, 0.0295, 0.0489, 0.0004, 0.0000])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_blocks(self):
    x = makesig('Blocks', 8)
    y = array([4.0000, 0.5000, 3.0000, 0.9000, 0.9000, 5.2000, -0.0000, -0.0000])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_doppler(self):
    x = makesig('Doppler', 12)
    y = array([-0.1954, -0.3067, 0.0000, -0.4703, 0.4930, -0.2703, -0.4127, 0.1025, 0.4001, 0.3454, 0.1425, 0])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_ramp(self):
    x = makesig('Ramp', 8)
    y = array([0.1250, 0.2500, -0.6250, -0.5000, -0.3750, -0.2500, -0.1250, 0])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_cusp(self):
    x = makesig('Cusp', 8)
    y = array([0.4950, 0.3464, 0.0707, 0.3606, 0.5050, 0.6164, 0.7106, 0.7937])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_sing(self):
    x = makesig('Sing', 8)
    y = array([5.3333, 16.0000, 16.0000, 5.3333, 3.2000, 2.2857, 1.7778, 1.4545])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_hisine(self):
    x = makesig('HiSine', 8)
    y = array([0.8267, -0.9302, 0.2200, 0.6827, -0.9882, 0.4292, 0.5053, -0.9977])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_losine(self):
    x = makesig('LoSine', 8)
    y = array([0.865973039158459,0.866130104544730,0.000314159260191,-0.865815888304075,-0.866287084447387,-0.000628318489377,0.865658651997088,0.866443978850937])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_linchirp(self):
    x = makesig('LinChirp', 8)
    y = array([0.049067674327418,0.195090322016128,0.427555093430282,0.707106781186547,0.941544065183021,0.980785280403230,0.671558954847019,0.000000000000000])
    self.assertTrue(allclose(x, y, 0.0001))
  
  def test_makesig_twochirp(self):
    x = makesig('TwoChirp', 8)
    y = array([0.5132, 1.5000, 0.5412, 0.8660, -0.5132, 0, 0.5132, 0.8660])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_quadchirp(self):
    x = makesig('QuadChirp', 8)
    y = array([0.016361731626487,0.130526192220052,0.427555093430282,0.866025403784439,0.889516075421856,-0.382683432365090,-0.621660573370077,0.866025403784439])
    self.assertTrue(allclose(x, y, 0.0001))
  
  def test_makesig_mishmash(self):
    x = makesig('MishMash', 8)
    y = array([0.8922, -0.6046, 1.0751, 2.2558, 0.8429, 1.0273, 0.5551, -0.1317])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_wernersorrows(self):
    x = makesig('WernerSorrows', 8)
    y = array([1.5545, 5.3175, 0.8252, 1.6956, -1.2678, 0.6466, 1.7332, -0.9977])
    self.assertTrue(allclose(x, y, 0.0005))
  
  def test_makesig_leopold(self):
    x = makesig('Leopold', 8)
    y = array([0, 1, 0, 0, 0, 0, 0, 0])
    self.assertTrue(allclose(x, y, 0.0005))

  def test_denoise_default(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h)
    signal_denoised_corr = array([0.0741827688375062,0.0791701902526268,0.0760842615272340,0.0750476831774179,0.111279774779568,0.163475053283544,-0.0498263815350539,0.0946073088237311,0.135126562486911,-0.0186090620958193,-0.0748812479991294,-0.103470206059426,0.0234254843251780,0.239772540836257,0.0920583398962312,-0.152180640366891,-0.116682073306156,-0.0459389850762785,-0.00245240039778375,0.0755739164104836,0.102548333512214,0.121099911744184,0.177390507921620,0.240386041553093,0.231105933317157,0.198210924493273,0.175672812990725,0.138822049613034,0.127491615387826,0.121409597186325,0.0994935320130783,0.0760019340865427])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))

  def test_denoise_2d(self):
    x = array([[1,2,3,4],[5,6,7,8],[9,10.09,11,12],[13,13.91,15,16]])
    h = daubcqf(4)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(x, h)
    signal_denoised_corr = array([[1.093495801587334,2.052784169768518,3.036985129109070,4.014510779767102],[5.037416383975946,6.006178652683398,6.994963120759174,7.978382656683513],[9.047593546684929,10.003998510025589,10.977825887256145,11.94698494275469],[13.009489364401729,13.937038667522501,14.939852728547271,15.9224996584731398]])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))

  def test_denoise_udwt(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1)
    signal_denoised_corr = array([0.126244615385152,0.0952319712425300,0.0671343607152503,0.0513902979722585,0.0430402732682634,0.0586932575131794,0.0861069751902698,0.0989949047763016,0.0908418658128637,-0.0141454670119059,-0.144791527437026,-0.0185533166035902,0.278351613782131,0.279033706376659,-0.0205012032054263,-0.212367658407976,-0.241484343697995,-0.248582298831059,-0.213374214781743,-0.101963712141109,0.0454248851310567,0.181104333949749,0.275294407293259,0.309076259882059,0.298600450385073,0.259080737796607,0.211123535801718,0.183021783525739,0.171966340866576,0.171616812586097,0.168720006300193,0.151066428184072])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))

  def test_denoise_udwt_2d(self):
    x = array([[1,2,3,4],[5,6,7,8],[9,10.09,11,12],[13,13.91,15,16]])
    h = daubcqf(4)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(x, h, 1)
    signal_denoised_corr = array([[1.007040488866197,1.993405274521765,3.006268404030089,3.996424654030090],[4.995935171857875,6.002401216530091,7.001252328142127,8.005847881693983],[9.009508189685661,10.059981743374523,11.001190131625481,11.999030274521770],[12.987516149590270,13.944211765573623,14.991289136202310,15.998697189754166]])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_threshold_low(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [1,3.0,0,2,0,0])
    signal_denoised_corr = array([0.0187742354278351,0.0237616568429558,0.0206757281175629,0.0196391497677469,0.0558712413698966,0.108066519873873,-0.105234914944725,0.0391987754140600,0.0797180290772401,-0.0740175955054904,-0.130289781408801,-0.158878739469097,-0.0319830490844931,0.184364007426586,0.0366498064865601,-0.207589173776562,-0.172090606715827,-0.101347518485950,-0.0578609338074549,0.0201653830008125,0.0471398001025425,0.0656913783345127,0.121981974511949,0.184977508143422,0.175697399907486,0.142802391083602,0.120264279581054,0.0834135162033633,0.0720830819781554,0.0660010637766539,0.0440849986034073,0.0205934006768717])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_thresh_multiplier(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [1,3.5,0,2,0,0])
    signal_denoised_corr = array([0.00563527074803461,0.0110853052404048,0.0101590193471916,0.0116789518546074,0.0354625658443208,0.0691904606426981,-0.0647010252187970,0.0393485097012034,0.0302297746478269,-0.0658230296401878,-0.0947938063374137,-0.147943151851009,-0.0355607514547514,0.143027827800490,0.0126752977970079,-0.200577663821584,-0.149059259007655,-0.0564432101940217,-0.0281365070661950,0.0201021371871464,0.0438412772787373,0.0596866399869512,0.0967101937989458,0.136451641917565,0.130716307107088,0.109146914388131,0.0925200849653435,0.0657607417363412,0.0550584910898860,0.0469636231448182,0.0277268486177313,0.00667135407398081])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_std(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [0,3.0,1,2,0,0])
    signal_denoised_corr = array([0.0686926069658060,0.0706216045196474,0.0719769032529757,0.0743568305131058,0.0754251996534692,0.0763549103855611,0.0783972750744446,0.0807092136475563,0.0763109954998047,0.0693017683604205,0.0628697537191382,0.0547492531677562,0.0755519478401559,0.107931256046656,0.0859959791464885,0.0494376118339224,0.0602059364595448,0.0785077229738383,0.0791999606842265,0.0809410605777517,0.0844652184548917,0.0873749084881920,0.0911535278085727,0.0952027332951270,0.0936316016468421,0.0898878427420561,0.0866734185917041,0.0820709685744921,0.0793481432323076,0.0768306965269240,0.0727995727792393,0.0684196591566048])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_hard(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [0,3.0,0,1,0,0])
    signal_denoised_corr = array([0.0977394160103721,0.0994161560983385,0.0832447407807381,0.0666983311697188,0.177420971595413,0.340230583897110,-0.354597069671295,0.0250017872275015,0.394418485343238,-0.0595745304374512,-0.452401570793399,-0.175707560852101,-0.00622320325130765,0.437867065411816,0.187485346584306,-0.241060664687049,-0.306285896120773,-0.373946536466370,-0.246165924475657,0.00210496326791051,0.0528629966064817,0.0967383656953347,0.275410693617439,0.487298926169970,0.454985253718689,0.348603331393631,0.288205743942248,0.186806596496260,0.172147260405660,0.180050851714681,0.142136445826288,0.104484725401481])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_levels(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [0,3.0,0,2,4,0])
    signal_denoised_corr = array([0.164259992817262,0.156379071218712,0.142212685671703,0.125038963573761,0.150297815252073,0.191536767978636,-0.0381639580765735,0.0881092032192094,0.119629284458486,-0.0406090725365491,-0.105645426731493,-0.141820831994602,-0.0280318977202704,0.173171960129832,0.0117537437282443,-0.247115729957293,-0.206759297285911,-0.123147866042363,-0.0685808245422524,0.0255826360141400,0.0635302930397082,0.0930381970490923,0.165728084463140,0.246884147157615,0.246603211345582,0.220210934934003,0.206436991723089,0.177172675548210,0.178948997433275,0.188010177892750,0.179798128181065,0.170937023676945])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_actual_thresh(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 0, [0,3.0,0,2,0,0.5])
    signal_denoised_corr = array([0.0607099183942295,0.0654351521193524,0.0684154759800610,0.0742018934148454,0.0758845005390013,0.0769511530643110,0.0810856606730252,0.0858023375316036,0.0704706443350518,0.0472060906047587,0.0254329679518446,-0.00154590940405266,0.0598455182579352,0.156556707841878,0.0864272987162393,-0.0287835335280487,0.00606017120154721,0.0659592575432934,0.0713958080495586,0.0812891735076492,0.0953701981347179,0.107554576791239,0.123739146895592,0.141180422640726,0.137085044622601,0.124838366760086,0.114852957437233,0.0997294000571788,0.0922174665178409,0.0857758976557685,0.0737052631031342,0.0605470542090229])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))

  def test_denoise_udwt_threshold_low(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [1,3,0,1,0,0])
    signal_denoised_corr = array([0.135039400483741,0.117805175604609,0.0967709584177031,0.0142060292567307,-0.0239840294603812,0.323425861331697,-0.212285200125643,0.166066657685731,0.136653739821785,-0.0361708285655289,-0.244622217319313,-0.0751486112344819,0.279128997196628,0.299915294672821,0.00822389077239383,-0.232180770499244,-0.330137263335199,-0.293955318206172,-0.175538926380835,-0.0733568677543535,0.049241196655251,0.200165899490694,0.304615650610263,0.337325376378116,0.325593984310807,0.282048956150932,0.228861081870546,0.196656880842149,0.180959366486141,0.175210410022406,0.169828050229736,0.155033256209497])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_udwt_thresh_multiplier(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [1,3.5,0,1,0,0])
    signal_denoised_corr = array([0.0479478506866607,0.0160653046305043,-0.012660890293452,-0.0292521383561941,-0.0383355043751224,-0.0239494802109215,0.00200042536526626,0.0135636610003902,0.00399637041195728,-0.100521378500944,-0.229923524965501,-0.102614225576592,0.195850596270724,0.197593413336102,-0.100882406775293,-0.291163630119251,-0.318524834100706,-0.324752887320235,-0.288916218874243,-0.176658530913858,-0.028536592326759,0.108409816572649,0.204063702017061,0.239170248556769,0.230108690684778,0.190119394184444,0.14091827822899,0.11174543739754,0.0991301032767805,0.0977198505254529,0.0937639547688583,0.0745251447941448])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_udwt_std(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [0,3.0,1,1,0,0])
    signal_denoised_corr = array([0.0847626939447046,0.0648669375488877,0.0505127048998841,0.0431477690668965,0.0443458995091662,0.0638361516754724,0.0926698200065443,0.122716357496751,0.135591683864019,0.0377466753027189,-0.0889166586897228,-0.0310700016943258,0.16530654803759,0.237349858169585,0.0577692051497442,-0.137751577705709,-0.18354744395111,-0.188205427540335,-0.157902857480421,-0.055391323576937,0.0791892398460303,0.198068185997372,0.271471422836112,0.282275886815228,0.246689293630916,0.205546705496588,0.16546007731141,0.145130898382968,0.1471329636038,0.142472749823065,0.132163448290946,0.111958195551385])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_udwt_soft(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [0,3.0,0,2,0,0])
    signal_denoised_corr = array([0.086668016749428,0.078090652632278,0.070455842749544,0.062824684205684,0.064249795534642,0.086899924318641,0.053549539548214,0.100644175366308,0.100726560037458,0.051479406046214,-0.011299945211104,0.036115394710961,0.147624998547612,0.159516308766960,0.059119062682569,-0.020817294484415,-0.042170912413038,-0.046825168298822,-0.027179285827824,0.017379645805457,0.071225126011476,0.123532780238470,0.153926034241219,0.160138755049699,0.153562168658336,0.138748019440599,0.123707805352361,0.115223425612607,0.110890877355381,0.107909648973443,0.103630954238181,0.095849084980685])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_udwt_levels(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [0,3.0,0,1,4,0])
    signal_denoised_corr = array([0.137633389000662,0.120676804147327,0.0997827582151432,0.0156985740202669,-0.0251180988153785,0.319788331991522,-0.217919217670089,0.160238201773756,0.131270340429534,-0.0414158027972923,-0.249853610380694,-0.0801267408837784,0.275034335985338,0.296982831400265,0.00620014657281041,-0.234309647934845,-0.33273125185212,-0.296826946748889,-0.178550726178275,-0.0748494125178897,0.0503752660102483,0.203803428830869,0.310249668154709,0.343153832290091,0.330977383703058,0.287293930382695,0.234092474931927,0.201635010491445,0.185054027697432,0.178142873294961,0.171851794429319,0.157162133645098])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))
  
  def test_denoise_udwt_actual_thresh(self):
    signal = makesig('Doppler', 32)
    noise = array([1.54421189550395,0.0859311331754255,-1.49159031063761,-0.742301837259857,-1.06158173331999,2.35045722400204,-0.615601881466894,0.748076783703985,-0.192418510588264,0.888610425420721,-0.764849236567874,-1.40226896933876,-1.42237592509150,0.488193909859941,-0.177375156618825,-0.196053487807333,1.41931015064255,0.291584373984183,0.197811053464361,1.58769908997406,-0.804465956349547,0.696624415849607,0.835088165072682,-0.243715140377952,0.215670086403744,-1.16584393148205,-1.14795277889859,0.104874716016494,0.722254032225002,2.58549125261624,-0.666890670701386,0.187331024578940])
    with_noise = signal + noise / 10
    h = daubcqf(6)[0]
    signal_denoised, subtracted_noise, actual_options = denoise(with_noise, h, 1, [0,3.0,0,1,0,0.5])
    signal_denoised_corr = array([0.126244615385152,0.09523197124253,0.0671343607152503,0.0513902979722585,0.0430402732682634,0.0586932575131794,0.0861069751902698,0.0989949047763016,0.0908418658128637,-0.0141454670119059,-0.144791527437026,-0.0185533166035902,0.278351613782131,0.279033706376659,-0.0205012032054263,-0.212367658407976,-0.241484343697995,-0.248582298831059,-0.213374214781743,-0.101963712141109,0.0454248851310567,0.181104333949749,0.275294407293258,0.309076259882059,0.298600450385073,0.259080737796607,0.211123535801717,0.183021783525739,0.171966340866576,0.171616812586097,0.168720006300193,0.151066428184072])
    self.assertTrue(allclose(signal_denoised, signal_denoised_corr, 0.01))

  def test_daubcqf_min(self):
    (a, b) = daubcqf(4)
    ax = [0.482962913144534,0.836516303737808,0.224143868042013,-0.129409522551260]
    bx = [0.129409522551260,0.224143868042013,-0.836516303737808,0.482962913144534]
    self.assertTrue(allclose(a, ax, 0.000001))
    self.assertTrue(allclose(b, bx, 0.000001))
    
  def test_daubcqf_max(self):
    (a, b) = daubcqf(4, 'max')
    ax = [-0.129409522551260,0.224143868042013,0.836516303737808,0.482962913144534]
    bx = [-0.482962913144534,0.836516303737808,-0.224143868042013,-0.129409522551260]
    self.assertTrue(allclose(a, ax, 0.000001))
    self.assertTrue(allclose(b, bx, 0.000001))
    
  def test_daubcqf_mid_even_k(self):
    (a, b) = daubcqf(4, 'mid')
    ax = [0.482962913144534,0.836516303737808,0.224143868042013,-0.129409522551260]
    bx = [0.129409522551260,0.224143868042013,-0.836516303737808,0.482962913144534]
    self.assertTrue(allclose(a, ax, 0.000001))
    self.assertTrue(allclose(b, bx, 0.000001))
    
  def test_daubcqf_mid_odd_k(self):
    (a, b) = daubcqf(6, 'mid')
    ax = [0.332670552950083,0.806891509311093,0.459877502118491,-0.135011020010255,-0.085441273882027,0.035226291885710]
    bx = [-0.035226291885710,-0.085441273882027,0.135011020010255,0.459877502118491,-0.806891509311093,0.332670552950083]
    self.assertTrue(allclose(a, ax, 0.000001))
    self.assertTrue(allclose(b, bx, 0.000001))
  
if __name__ == '__main__':
    unittest.main()


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/readme
================================================
test


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/Readme.html
================================================
<html>
    <head>
        <meta http-equiv="REFRESH" content="0;url=doc/xunit_product_page.html">
    </head>
    <body>
    </body>
</html>


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/architecture/html/matlab_xunit_architecture.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   
      <!--
This HTML is auto-generated from an M-file.
To make changes, update the M-file and republish this document.
      -->
      <title>MATLAB xUnit Test Framework: Architectural Notes</title>
      <meta name="generator" content="MATLAB 7.8">
      <meta name="date" content="2009-06-05">
      <meta name="m-file" content="mtest_architecture"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head>
   <body>
      <div class="content">
         <h1>MATLAB xUnit Test Framework: Architectural Notes</h1>
         <!--introduction-->
         <p>This document summarizes the key classes and design choices for MATLAB xUnit, a MATLAB unit testing framework based on xUnit
            patterns.
         </p>
         <p>Note: Testing pattern and smell terminology in this document is drawn from <i>xUnit Test Patterns: Refactoring Test Code</i>, by Gerard Meszaros, Addison-Wesley, 2007.
         </p>
         <!--/introduction-->
         <h2>Contents</h2>
         <div>
            <ul>
               <li><a href="#1">TestComponent, TestCase, and TestSuite</a></li>
               <li><a href="#2">TestCase: The Four-Phase Test</a></li>
               <li><a href="#3">Test Case Discovery</a></li>
               <li><a href="#4">FunctionHandleTestCase: For the Procedural World</a></li>
               <li><a href="#5">Writing Procedural Test Cases</a></li>
               <li><a href="#6">TestRunMonitor</a></li>
               <li><a href="#7">File System Test Runner</a></li>
               <li><a href="#8">Test Selection</a></li>
               <li><a href="#9">Assertion Methods</a></li>
               <li><a href="#10">Stack Traces and "Assertion Roulette"</a></li>
               <li><a href="#11">Extending the Framework</a></li>
            </ul>
         </div>
         <h2>TestComponent, TestCase, and TestSuite<a name="1"></a></h2>
         <p><img vspace="5" hspace="5" src="class_diagram_a.gif" alt=""> </p>
         <p>The abstract <tt>TestComponent</tt> class defines an object that has a description (a name and a location) and that can be run.
         </p>
         <p>A <tt>TestCase</tt> object is a test component that defines an individual test case that can be run with a pass or fail result.
         </p>
         <p>A <tt>TestSuite</tt> object is a test component that contains a collection of other test components.  Note the hierarchical nature of test suites;
            they can contain both individual test case objects as well as other test suites. Running a test suite means invoking the <tt>run</tt> method on each test component in its collection.
         </p>
         <h2>TestCase: The Four-Phase Test<a name="2"></a></h2>
         <p>The TestCase class provides the standard xUnit <i>Four-Phase Test</i>, using a <i>Fresh Fixture</i>, <i>Implicit Setup</i>, and <i>Implicit Teardown</i>. These all elements can all be seen in the <tt>run</tt> method of TestCase:
         </p><pre>       function did_pass = run(self, monitor)
           %run Execute the test case
           %    test_case.run(monitor) calls the TestCase object's setUp()
           %    method, then the test method, then the tearDown() method.
           %    observer is a TestRunObserver object.  The testStarted(),
           %    testFailure(), testError(), and testFinished() methods of
           %    observer are called at the appropriate times.  monitor is a
           %    TestRunMonitor object.  Typically it is either a TestRunLogger
           %    subclass or a CommandWindowTestRunDisplay subclass.
           %
           %    test_case.run() automatically uses a
           %    CommandWindowTestRunDisplay object in order to print test
           %    suite execution information to the Command Window.</pre><pre>           if nargin &lt; 2
               monitor = CommandWindowTestRunDisplay();
           end</pre><pre>           did_pass = true;
           monitor.testComponentStarted(self);</pre><pre>           try
               self.setUp();
               f = str2func(self.MethodName);</pre><pre>               try
                   % Call the test method.
                   f(self);
               catch failureException
                   monitor.testCaseFailure(self, failureException);
                   did_pass = false;
               end</pre><pre>               self.tearDown();</pre><pre>           catch errorException
               monitor.testCaseError(self, errorException);
               did_pass = false;
           end</pre><pre>           monitor.testComponentFinished(self, did_pass);
       end</pre><p>Phase 1 sets up the test fixture via the <i>Implicit Setup</i> call, <tt>self.setUp()</tt>. The base class <tt>setUp()</tt> method does nothing.
         </p>
         <p>Phases 2 and 3 (exercising the system under test and verifying the expected outcome) are handled by the test method, which
            is invoked by <tt>f(self)</tt>.
         </p>
         <p>Phase 4 tears down the test fixture via the <i>Implicit Teardown</i> call, <tt>self.tearDown()</tt>.  The base class <tt>tearDown()</tt> method does nothing.
         </p>
         <p>Test failure and test error exceptions are caught and handled by the <tt>run()</tt> method, so test methods do not need to use try-catch.  This facilitates simple, straight-line test-method code.
         </p>
         <p><i>Note: The <tt>monitor</tt> object will be discussed later.</i></p>
         <h2>Test Case Discovery<a name="3"></a></h2>
         <p>The static method <tt>TestSuite.fromName</tt> constructs a test suite based on the name of an M-file.  If the M-file defines a <tt>TestCase</tt> subclass, then <tt>fromName</tt> inspects the methods of the class and constructs a <tt>TestCase</tt> object for each method whose name begins with "[tT]est".  If the M-file does not define a <tt>TestCase</tt> subclass, then <tt>fromName</tt> attempts to construct either a simple procedural test case or a set of subfunction-based test cases.  (See the next section).
         </p>
         <p>The static method <tt>TestSuite.fromPwd</tt> constructs a test suite by discovering all the test cases in the present working directory.  It discovers all <tt>TestCase</tt> subclasses in the directory. In addition, it constructs test suites from all the procedural M-files in the directory beginning
            with "[tT]est".
         </p>
         <p>The <i>File System Test Runner</i>, <tt>runtests</tt>, provides convenient syntaxes for performing test case discovery automatically.
         </p>
         <h2>FunctionHandleTestCase: For the Procedural World<a name="4"></a></h2>
         <p>Most MATLAB users are much more comfortable with procedural programming.  An important design goal for MATLAB xUnit is to
            make it as easy as possible for MATLAB users with little object-oriented programming experience to create and run their own
            tests.  The FunctionHandleTestCase supplies the plumbing necessary to support procedural test functions:
         </p>
         <p><img vspace="5" hspace="5" src="class_diagram_b.gif" alt=""> </p>
         <p>Private properties <tt>SetupFcn</tt>, <tt>TestFcn</tt>, and <tt>TeardownFcn</tt> are procedural <i>function handles</i> (similar to function pointers or function references in other languages).
         </p>
         <p><tt>runTestCase()</tt> is the test method used for constructing a TestCase object.
         </p>
         <p>Managing test fixtures requires special consideration, because procedural function handles don't have access to object instance
            data in order to access a test fixture.
         </p>
         <p>The overridden <tt>setUp()</tt> method looks at the number of outputs of the function handle <tt>SetupFcn</tt>.  If it has an output argument, then the argument is saved in the private <tt>TestData</tt> property, and <tt>TestData</tt> is then passed to both <tt>TestFcn</tt> and <tt>TeardownFcn</tt> for their use.
         </p>
         <h2>Writing Procedural Test Cases<a name="5"></a></h2>
         <p>Procedural test cases can be written in two ways:</p>
         <div>
            <ul>
               <li>A simple M-file function that is treated as a single test case</li>
               <li>An M-file containing multiple subfunctions that are each treated as a test case.</li>
            </ul>
         </div>
         <p>In either case, the test case is considered to pass if it executes without error.</p>
         <p>Writing one test case per file is not ideal; it would lead to either zillions of tiny little test files, or long test methods
            exhibiting various bad test smells (<i>Multiple Test Conditions</i>, <i>Flexible Test</i>, <i>Conditional Test Logic</i>, <i>Eager Test</i>, <i>Obscure Test</i>, etc.)  So we need a way to write multiple test cases in a single procedural M-file.  The natural MATLAB way would be to
            use subfunctions.
         </p>
         <p>However, subfunction-based test cases require special consideration.  Consider the following M-file structure:</p><pre>  === File A.m ===
  function A
     ...</pre><pre>  function B
     ...</pre><pre>  function C
     ...</pre><pre>  function D
     ...</pre><p>The first function in the file, <tt>A</tt>, has the same name as the file.  When other code outside this function calls <tt>A</tt>, it is this first function that gets called.  Functions <tt>B</tt>, <tt>C</tt>, and <tt>D</tt> are called <i>subfunctions</i>. Normally, these subfunctions are only visible to and can only be called by <tt>A</tt>.  The only way that code elsewhere might be able to call <tt>B</tt>, <tt>C</tt>, or <tt>D</tt> is if function <tt>A</tt> forms handles to them and passes those handles out of its scope.  Normally this would be done by returning the function handles
            as output arguments.
         </p>
         <p>Note that no code executing outside the scope of a function in A.m can form function handles to <tt>B</tt>, <tt>C</tt>, or <tt>D</tt>, or can even determine that these functions exist.
         </p>
         <p>This obviously poses a problem for test discovery!</p>
         <p>The MATLAB xUnit solution is to establish the following convention for subfunction-based tests.  The first function in a test
            M-file containing subfunction tests has to begin with these lines:
         </p><pre>  === File A.m ===
  function test_suite = A
  initTestSuite;
  ...</pre><p><tt>initTestSuite</tt> is a <i>script</i> that runs in the scope of the function <tt>A</tt>. <tt>initTestSuite</tt> determines which subfunctions are test functions, as well as setup or teardown functions.  It forms handles to these functions
            and constructs a set of FunctionHandleTestCase objects, which function <tt>A</tt> returns as the output argument <tt>test_suite</tt>.
         </p>
         <h2>TestRunMonitor<a name="6"></a></h2>
         <p>The abstract <tt>TestRunMonitor</tt> class defines the interface for an object that "observe" the in-progress execution of a test suite.  MATLAB xUnit provides
            two subclasses of <tt>TestRunMonitor</tt>:
         </p>
         <div>
            <ul>
               <li><tt>TestRunLogger</tt> silently logs test suite events and captures the details of any test failures or test errors.
               </li>
               <li><tt>CommandWindowTestRunDisplay</tt> prints the progress of an executing test suite to the Command Window.
               </li>
            </ul>
         </div>
         <p><img vspace="5" hspace="5" src="class_diagram_c.gif" alt=""> </p>
         <p>A TestRunMonitor is passed to the <tt>run()</tt> method of a TestComponent object. The <tt>run()</tt> method calls the appropriate notification methods of the monitor.
         </p>
         <p>Here is the output when using the CommandWindowTestRunDisplay object on the MATLAB xUnit's own test suite:</p><pre>  runtests
  Starting test run with 92 test cases.
  ....................
  ....................
  ....................
  ....................
  ............
  PASSED in 7.040 seconds.</pre><h2>File System Test Runner<a name="7"></a></h2>
         <p>MATLAB xUnit provides a command-line <i>File System Test Runner</i> called <tt>runtests</tt>.  When called with no input arguments, <tt>runtests</tt> gathers all the test cases from the current directory and runs them, summarizing the results to the Command Window.  <tt>runtests</tt> can also take a string argument specifying which test file, and optionally which specific test case, to run.
         </p>
         <h2>Test Selection<a name="8"></a></h2>
         <p>Test selection is supported in <tt>runtests</tt> by passing in a string of the form:
         </p><pre>   'Location:Name'</pre><p>or just:</p><pre>   'Location'</pre><p>Both of these forms are handled by <tt>runtests</tt> and by <tt>TestSuite.fromName</tt>.
         </p>
         <p>'Location' is the name of the M-file containing test cases.  'Name' is the name of a specific test case.  Normally, the name
            of the test case is the name of the corresponding TestCase method.  For FunctionHandleTestCase objects, though, 'Name' is
            the subfunction name.
         </p>
         <h2>Assertion Methods<a name="9"></a></h2>
         <p>MATLAB xUnit provides the following assertion methods:</p>
         <div>
            <ul>
               <li><i>Stated Outcome Assertion</i> (<tt>assertTrue</tt>, <tt>assertFalse</tt>)
               </li>
               <li><i>Equality Assertion</i> (<tt>assertEqual</tt>)
               </li>
               <li><i>Fuzzy Equality Assertion</i> (<tt>assertElementsAlmostEqual</tt>, <tt>assertVectorsAlmostEqual</tt>)
               </li>
               <li><i>Expected Exception Assertion</i> (<tt>assertExceptionRaised</tt>)
               </li>
            </ul>
         </div>
         <p>Assertion functions are provided via globally accessible names (e.g., <tt>assertEqual</tt>).  The assertion functions could be moved to the <tt>xunit</tt> package, but MATLAB users are not accustomed yet to packages and package name-scoping syntax.
         </p>
         <p>'message' is the last input to the assertion functions and is optional.  (See below for discussion of <i>Assertion Roulette</i>.)
         </p>
         <p>The <i>Expected Exception Assertion</i>, <tt>assertExceptionRaised</tt> is used by forming an anonymous function handle from an expression that is expected to error, and then passing that function
            handle to <tt>assertExceptionRaised</tt> along with the expected exception identifier. For example:
         </p><pre>  f = @() sin(1,2,3);
  assertExceptionRaised(f, 'MATLAB:maxrhs')</pre><p>By using this mechanism, test writers can verify exceptions without using try-catch logic in their test code.</p>
         <h2>Stack Traces and "Assertion Roulette"<a name="10"></a></h2>
         <p><i>xUnit Test Patterns</i> explains the smell <i>Assertion Roulette</i> this way: "It is hard to tell which of several assertions within the same test method caused a test failure.
         </p>
         <p>MATLAB xUnit mitigates against <i>Assertion Roulette</i> by capturing the entire stack trace, including line numbers, for every test failure and test error.  (The MATLAB MException
            object, which you obtain via the <tt>catch</tt> clause, contains the stack trace.)  The stack trace is displayed to the Command Window, with clickable links that load the
            corresponding M-file into editor at the appropriate line number.
         </p>
         <p>Stack traces can be pretty long, though.  Also, test framework plumbing tends to occupy the trace in between the assertion
            and the user's test code, thus making the trace hard to interpret for less-experienced users.  MATLAB xUnit, therefore, uses
            a stack filtering heuristic for displaying test fault traces: Starting at the deepest call level, once the trace leaves MATLAB
            xUnit framework functions, all further framework functions are filtered out of the stack trace.
         </p>
         <p>Here's an example of stack trace display in the output of <tt>runtests</tt>:
         </p>
         <p><html> <tt> >> runtests testSample<br /> Starting test run with 1 test case.<br /> F<br /> FAILED in 0.081 seconds.<br />
            <br /> ===== Test Case Failure =====<br /> Location: c:\work\matlab_xunit\architecture\testSample.m<br /> Name:     testMyCode<br
            /> <br /> c:\work\matlab_xunit\architecture\testSample.m at <span style="color:blue; text-decoration:underline">line 6</span><br
            /> <br /> Input elements are not all equal within relative tolerance: 1.49012e-008<br /> <br /> First input:<br />      1<br
            /> <br /> Second input:<br />     1.1000<br /> </tt> </html>
         </p>
         <p>Clicking on the blue, underlined link above loads the corresponding file into the editor, positioned at the appropriate line.</p>
         <h2>Extending the Framework<a name="11"></a></h2>
         <p>The MATLAB xUnit framework can be extended primarily by subclassing <tt>TestCase</tt>, <tt>TestSuite</tt>, and <tt>TestMonitor</tt>.
         </p>
         <p><tt>TestCase</tt> can be subclassed to enable a new set of test cases that all share some particular behavior.  The MATLAB xUnit Test Framework
            contains three examples of extending <tt>TestCase</tt> behavior in this way:
         </p>
         <div>
            <ul>
               <li><tt>FunctionHandleTestCase</tt> provides the ability to define test cases based on procedural function handles.
               </li>
               <li><tt>TestCaseInDir</tt> defines a test case that must be run inside a particular directory. The <tt>setUp</tt> and <tt>tearDown</tt> functions are overridden to change the MATLAB working directory before running the test case, and then to restore the original
                  working directory when the test case finished.  The class is used by the framework's own test suite.
               </li>
               <li><tt>TestCaseInPath</tt> defines a test case that must be run with a particular directory temporarily added to the MATLAB path.  Its implementation
                  is similar to <tt>TestCaseInDir</tt>, and it is also used by the framework's own test suite.
               </li>
            </ul>
         </div>
         <p><tt>TestSuite</tt> could be similarly extended by subclassing. This might a provide a way in the future to define a test suite containing collections
            of test components in separate directories, which is not currently supported.
         </p>
         <p>Finally <tt>TestRunMonitor</tt> could be subclassed to support a variety of test monitoring mechanisms, such as what might be required by a <i>Graphical Test Runner</i>.
         </p>
         <p class="footer"><br>
            Published with MATLAB&reg; 7.8<br></p>
      </div>
      <!--
##### SOURCE BEGIN #####
%% MATLAB xUnit Test Framework: Architectural Notes
% This document summarizes the key classes and design choices for MATLAB xUnit,
% a MATLAB unit testing framework based on xUnit patterns.
%
% Note: Testing pattern and smell terminology in this document is drawn from
% _xUnit Test Patterns: Refactoring Test Code_, by Gerard Meszaros,
% Addison-Wesley, 2007.

%% TestComponent, TestCase, and TestSuite
%
% <<class_diagram_a.gif>>
%
% The abstract |TestComponent| class defines an object that has a description (a
% name and a location) and that can be run.
%
% A |TestCase| object is a test component that defines an individual test case
% that can be run with a pass or fail result.
%
% A |TestSuite| object is a test component that contains a collection of other
% test components.  Note the hierarchical nature of test suites; they can
% contain both individual test case objects as well as other test suites.
% Running a test suite means invoking the |run| method on each test component in
% its collection.

%% TestCase: The Four-Phase Test
%
% The TestCase class provides the standard xUnit _Four-Phase Test_, using
% a _Fresh Fixture_, _Implicit Setup_, and _Implicit Teardown_. These all
% elements can all be seen in the |run| method of TestCase:
%
%         function did_pass = run(self, monitor)
%             %run Execute the test case
%             %    test_case.run(monitor) calls the TestCase object's setUp()
%             %    method, then the test method, then the tearDown() method.
%             %    observer is a TestRunObserver object.  The testStarted(),
%             %    testFailure(), testError(), and testFinished() methods of
%             %    observer are called at the appropriate times.  monitor is a
%             %    TestRunMonitor object.  Typically it is either a TestRunLogger
%             %    subclass or a CommandWindowTestRunDisplay subclass.
%             %
%             %    test_case.run() automatically uses a
%             %    CommandWindowTestRunDisplay object in order to print test
%             %    suite execution information to the Command Window.
%             
%             if nargin < 2
%                 monitor = CommandWindowTestRunDisplay();
%             end
%             
%             did_pass = true;
%             monitor.testComponentStarted(self);
%             
%             try
%                 self.setUp();
%                 f = str2func(self.MethodName);
%                 
%                 try
%                     % Call the test method.
%                     f(self);
%                 catch failureException
%                     monitor.testCaseFailure(self, failureException);
%                     did_pass = false;
%                 end
%                 
%                 self.tearDown();
%                 
%             catch errorException
%                 monitor.testCaseError(self, errorException);
%                 did_pass = false;
%             end
%             
%             monitor.testComponentFinished(self, did_pass);
%         end
%
% Phase 1 sets up the test fixture via the _Implicit Setup_ call, |self.setUp()|.
% The base class |setUp()| method does nothing.
%
% Phases 2 and 3 (exercising the system under test and verifying the expected
% outcome) are handled by the test method, which is invoked by |f(self)|.
%
% Phase 4 tears down the test fixture via the _Implicit Teardown_ call,
% |self.tearDown()|.  The base class |tearDown()| method does nothing.
%
% Test failure and test error exceptions are caught and handled by the |run()|
% method, so test methods do not need to use try-catch.  This facilitates
% simple, straight-line test-method code.
%
% _Note: The |monitor| object will be discussed later._

%% Test Case Discovery
% The static method |TestSuite.fromName| constructs a test suite based on the
% name of an M-file.  If the M-file defines a |TestCase| subclass, then |fromName|
% inspects the methods of the class and constructs a |TestCase| object for each
% method whose name begins with "[tT]est".  If the M-file does not define a
% |TestCase| subclass, then |fromName| attempts to construct either a simple
% procedural test case or a set of subfunction-based test cases.  (See the next
% section).
%
% The static method |TestSuite.fromPwd| constructs a test suite by discovering
% all the test cases in the present working directory.  It discovers all
% |TestCase| subclasses in the directory. In addition, it constructs test suites
% from all the procedural M-files in the directory beginning with "[tT]est".
%
% The _File System Test Runner_, |runtests|, provides convenient syntaxes for
% performing test case discovery automatically.

%% FunctionHandleTestCase: For the Procedural World
% Most MATLAB users are much more comfortable with procedural programming.  An
% important design goal for MATLAB xUnit is to make it as easy as possible for MATLAB
% users with little object-oriented programming experience to create and run
% their own tests.  The FunctionHandleTestCase supplies the plumbing necessary
% to support procedural test functions:
%
% <<class_diagram_b.gif>>
%
% Private properties |SetupFcn|, |TestFcn|, and |TeardownFcn| are procedural 
% _function handles_ (similar to function pointers or function references in
% other languages).
%
% |runTestCase()| is the test method used for constructing a TestCase object.
%
% Managing test fixtures requires special consideration, because procedural
% function handles don't have access to object instance data in order to access
% a test fixture.
%
% The overridden |setUp()| method looks at the number of outputs of the function
% handle |SetupFcn|.  If it has an output argument, then the argument is saved
% in the private |TestData| property, and |TestData| is then passed to both
% |TestFcn| and |TeardownFcn| for their use.

%% Writing Procedural Test Cases
% Procedural test cases can be written in two ways: 
%
% * A simple M-file function that is treated as a single test case
% * An M-file containing multiple subfunctions that are each treated as a test case. 
%
% In either case, the test
% case is considered to pass if it executes without error.
%
% Writing one test case per file is not ideal; it would lead to either zillions
% of tiny little test files, or long test methods exhibiting various bad test
% smells (_Multiple Test Conditions_, _Flexible Test_, _Conditional Test Logic_,
% _Eager Test_, _Obscure Test_, etc.)  So we need a way to write multiple test
% cases in a single procedural M-file.  The natural MATLAB way would be to use
% subfunctions.
%
% However, subfunction-based test cases require special consideration.  Consider
% the following M-file structure:
%
%    === File A.m ===
%    function A
%       ...
% 
%    function B
%       ...
% 
%    function C
%       ...
% 
%    function D
%       ...
%
% The first function in the file, |A|, has the same name as the file.  When
% other code outside this function calls |A|, it is this first function that
% gets called.  Functions |B|, |C|, and |D| are called _subfunctions_.
% Normally, these subfunctions are only visible to and can only be called by
% |A|.  The only way that code elsewhere might be able to call |B|, |C|, or |D|
% is if function |A| forms handles to them and passes those handles out of its
% scope.  Normally this would be done by returning the function handles as
% output arguments.
%
% Note that no code executing outside the scope of a function in A.m can form
% function handles to |B|, |C|, or |D|, or can even determine that these
% functions exist.
%
% This obviously poses a problem for test discovery!
%
% The MATLAB xUnit solution is to establish the following convention for
% subfunction-based tests.  The first function in a test M-file containing
% subfunction tests has to begin with these lines:
%
%    === File A.m ===
%    function test_suite = A
%    initTestSuite;
%    ...
%
% |initTestSuite| is a _script_ that runs in the scope of the function |A|.
% |initTestSuite| determines which subfunctions are test functions, as well as setup
% or teardown functions.  It forms handles to these functions and constructs a
% set of FunctionHandleTestCase objects, which function |A| returns as the
% output argument |test_suite|.

%% TestRunMonitor
% The abstract |TestRunMonitor| class defines the interface for an object that
% "observe" the in-progress execution of a test suite.  MATLAB xUnit provides two
% subclasses of |TestRunMonitor|:
%
% * |TestRunLogger| silently logs test suite events and captures the details of
% any test failures or test errors.
% * |CommandWindowTestRunDisplay| prints the progress of an executing test suite
% to the Command Window.
%
% <<class_diagram_c.gif>>
%
% A TestRunMonitor is passed to the |run()| method of a TestComponent object.
% The |run()| method calls the appropriate notification methods of the
% monitor.
%
% Here is the output when using the CommandWindowTestRunDisplay object on the
% MATLAB xUnit's own test suite:
%
%    runtests 
%    Starting test run with 92 test cases.
%    ....................
%    ....................
%    ....................
%    ....................
%    ............
%    PASSED in 7.040 seconds.

%% File System Test Runner
% MATLAB xUnit provides a command-line _File System Test Runner_ called
% |runtests|.  When called with no input arguments, |runtests| gathers all the 
% test cases from the current directory and runs them, summarizing the results
% to the Command Window.  |runtests| can also take a string argument specifying
% which test file, and optionally which specific test case, to run.

%% Test Selection
% Test selection is supported in |runtests| by passing in a string of the form:
%
%     'Location:Name'
%
% or just:
%
%     'Location'
%
% Both of these forms are handled by |runtests| and by |TestSuite.fromName|.
%
% 'Location' is the name of the M-file containing test cases.  'Name' is the
% name of a specific test case.  Normally, the name of the test case is the name
% of the corresponding TestCase method.  For FunctionHandleTestCase objects,
% though, 'Name' is the subfunction name.

%% Assertion Methods
% MATLAB xUnit provides the following assertion methods:
%
% * _Stated Outcome Assertion_ (|assertTrue|, |assertFalse|)
% * _Equality Assertion_ (|assertEqual|)
% * _Fuzzy Equality Assertion_ (|assertElementsAlmostEqual|, |assertVectorsAlmostEqual|)
% * _Expected Exception Assertion_ (|assertExceptionRaised|)
%
% Assertion functions are provided via globally accessible names (e.g.,
% |assertEqual|).  The assertion functions could be moved to the |xunit|
% package, but MATLAB users are not accustomed yet to packages and package
% name-scoping syntax.
%
% 'message' is the last input to the assertion functions and is optional.  (See
% below for discussion of _Assertion Roulette_.)
%
% The _Expected Exception Assertion_, |assertExceptionRaised| is used by forming
% an anonymous function handle from an expression that is expected to error, and
% then passing that function handle to |assertExceptionRaised| along with the
% expected exception identifier. For example:
%
%    f = @() sin(1,2,3);
%    assertExceptionRaised(f, 'MATLAB:maxrhs')
%
% By using this mechanism, test writers can verify exceptions without using
% try-catch logic in their test code.

%% Stack Traces and "Assertion Roulette"
% _xUnit Test Patterns_ explains the smell _Assertion Roulette_ this way: "It is
% hard to tell which of several assertions within the same test method caused a
% test failure.
%
% MATLAB xUnit mitigates against _Assertion Roulette_ by capturing the entire stack
% trace, including line numbers, for every test failure and test error.  (The
% MATLAB MException object, which you obtain via the |catch| clause, contains
% the stack trace.)  The stack trace is displayed to the Command Window, with
% clickable links that load the corresponding M-file into editor at the
% appropriate line number.
%
% Stack traces can be pretty long, though.  Also, test framework plumbing tends
% to occupy the trace in between the assertion and the user's test code, thus
% making the trace hard to interpret for less-experienced users.  MATLAB xUnit,
% therefore, uses a stack filtering heuristic for displaying test fault traces:
% Starting at the deepest call level, once the trace leaves MATLAB xUnit framework
% functions, all further framework functions are filtered out of the stack
% trace.
%
% Here's an example of stack trace display in the output of |runtests|:
%
% <html>
% <tt>
% >> runtests testSample<br />
% Starting test run with 1 test case.<br />
% F<br />
% FAILED in 0.081 seconds.<br />
% <br />
% ===== Test Case Failure =====<br />
% Location: c:\work\matlab_xunit\architecture\testSample.m<br />
% Name:     testMyCode<br />
% <br />
% c:\work\matlab_xunit\architecture\testSample.m at <span style="color:blue; 
% text-decoration:underline">line 6</span><br />
% <br />
% Input elements are not all equal within relative tolerance: 1.49012e-008<br />
% <br />
% First input:<br />
%      1<br />
% <br />
% Second input:<br />
%     1.1000<br />
% </tt>
% </html>
%
% Clicking on the blue, underlined link above loads the corresponding file into
% the editor, positioned at the appropriate line.

%% Extending the Framework
% The MATLAB xUnit framework can be extended primarily by subclassing |TestCase|,
% |TestSuite|, and |TestMonitor|.
%
% |TestCase| can be subclassed to enable a new set of test cases that all share
% some particular behavior.  The MATLAB xUnit Test Framework contains three
% examples of extending |TestCase| behavior in this way:
%
% * |FunctionHandleTestCase| provides the ability to define test cases based on
% procedural function handles.
% * |TestCaseInDir| defines a test case that must be run inside a particular
% directory. The |setUp| and |tearDown| functions are overridden to change the
% MATLAB working directory before running the test case, and then to restore the
% original working directory when the test case finished.  The class is used by
% the framework's own test suite.
% * |TestCaseInPath| defines a test case that must be run with a particular
% directory temporarily added to the MATLAB path.  Its implementation is similar
% to |TestCaseInDir|, and it is also used by the framework's own test suite.
%
% |TestSuite| could be similarly extended by subclassing. This might a provide a
% way in the future to define a test suite containing collections of test
% components in separate directories, which is not currently supported.
%
% Finally |TestRunMonitor| could be subclassed to support a variety of test
% monitoring mechanisms, such as what might be required by a _Graphical Test
% Runner_.
##### SOURCE END #####
-->
   </body>
</html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/architecture/matlab_xunit_architecture.m
================================================
%% MATLAB xUnit Test Framework: Architectural Notes
% This document summarizes the key classes and design choices for MATLAB xUnit,
% a MATLAB unit testing framework based on xUnit patterns.
%
% Note: Testing pattern and smell terminology in this document is drawn from
% _xUnit Test Patterns: Refactoring Test Code_, by Gerard Meszaros,
% Addison-Wesley, 2007.

%% TestComponent, TestCase, and TestSuite
%
% <<class_diagram_a.gif>>
%
% The abstract |TestComponent| class defines an object that has a description (a
% name and a location) and that can be run.
%
% A |TestCase| object is a test component that defines an individual test case
% that can be run with a pass or fail result.
%
% A |TestSuite| object is a test component that contains a collection of other
% test components.  Note the hierarchical nature of test suites; they can
% contain both individual test case objects as well as other test suites.
% Running a test suite means invoking the |run| method on each test component in
% its collection.

%% TestCase: The Four-Phase Test
%
% The TestCase class provides the standard xUnit _Four-Phase Test_, using
% a _Fresh Fixture_, _Implicit Setup_, and _Implicit Teardown_. These all
% elements can all be seen in the |run| method of TestCase:
%
%         function did_pass = run(self, monitor)
%             %run Execute the test case
%             %    test_case.run(monitor) calls the TestCase object's setUp()
%             %    method, then the test method, then the tearDown() method.
%             %    observer is a TestRunObserver object.  The testStarted(),
%             %    testFailure(), testError(), and testFinished() methods of
%             %    observer are called at the appropriate times.  monitor is a
%             %    TestRunMonitor object.  Typically it is either a TestRunLogger
%             %    subclass or a CommandWindowTestRunDisplay subclass.
%             %
%             %    test_case.run() automatically uses a
%             %    CommandWindowTestRunDisplay object in order to print test
%             %    suite execution information to the Command Window.
%             
%             if nargin < 2
%                 monitor = CommandWindowTestRunDisplay();
%             end
%             
%             did_pass = true;
%             monitor.testComponentStarted(self);
%             
%             try
%                 self.setUp();
%                 f = str2func(self.MethodName);
%                 
%                 try
%                     % Call the test method.
%                     f(self);
%                 catch failureException
%                     monitor.testCaseFailure(self, failureException);
%                     did_pass = false;
%                 end
%                 
%                 self.tearDown();
%                 
%             catch errorException
%                 monitor.testCaseError(self, errorException);
%                 did_pass = false;
%             end
%             
%             monitor.testComponentFinished(self, did_pass);
%         end
%
% Phase 1 sets up the test fixture via the _Implicit Setup_ call, |self.setUp()|.
% The base class |setUp()| method does nothing.
%
% Phases 2 and 3 (exercising the system under test and verifying the expected
% outcome) are handled by the test method, which is invoked by |f(self)|.
%
% Phase 4 tears down the test fixture via the _Implicit Teardown_ call,
% |self.tearDown()|.  The base class |tearDown()| method does nothing.
%
% Test failure and test error exceptions are caught and handled by the |run()|
% method, so test methods do not need to use try-catch.  This facilitates
% simple, straight-line test-method code.
%
% _Note: The |monitor| object will be discussed later._

%% Test Case Discovery
% The static method |TestSuite.fromName| constructs a test suite based on the
% name of an M-file.  If the M-file defines a |TestCase| subclass, then |fromName|
% inspects the methods of the class and constructs a |TestCase| object for each
% method whose name begins with "[tT]est".  If the M-file does not define a
% |TestCase| subclass, then |fromName| attempts to construct either a simple
% procedural test case or a set of subfunction-based test cases.  (See the next
% section).
%
% The static method |TestSuite.fromPwd| constructs a test suite by discovering
% all the test cases in the present working directory.  It discovers all
% |TestCase| subclasses in the directory. In addition, it constructs test suites
% from all the procedural M-files in the directory beginning with "[tT]est".
%
% The _File System Test Runner_, |runtests|, provides convenient syntaxes for
% performing test case discovery automatically.

%% FunctionHandleTestCase: For the Procedural World
% Most MATLAB users are much more comfortable with procedural programming.  An
% important design goal for MATLAB xUnit is to make it as easy as possible for MATLAB
% users with little object-oriented programming experience to create and run
% their own tests.  The FunctionHandleTestCase supplies the plumbing necessary
% to support procedural test functions:
%
% <<class_diagram_b.gif>>
%
% Private properties |SetupFcn|, |TestFcn|, and |TeardownFcn| are procedural 
% _function handles_ (similar to function pointers or function references in
% other languages).
%
% |runTestCase()| is the test method used for constructing a TestCase object.
%
% Managing test fixtures requires special consideration, because procedural
% function handles don't have access to object instance data in order to access
% a test fixture.
%
% The overridden |setUp()| method looks at the number of outputs of the function
% handle |SetupFcn|.  If it has an output argument, then the argument is saved
% in the private |TestData| property, and |TestData| is then passed to both
% |TestFcn| and |TeardownFcn| for their use.

%% Writing Procedural Test Cases
% Procedural test cases can be written in two ways: 
%
% * A simple M-file function that is treated as a single test case
% * An M-file containing multiple subfunctions that are each treated as a test case. 
%
% In either case, the test
% case is considered to pass if it executes without error.
%
% Writing one test case per file is not ideal; it would lead to either zillions
% of tiny little test files, or long test methods exhibiting various bad test
% smells (_Multiple Test Conditions_, _Flexible Test_, _Conditional Test Logic_,
% _Eager Test_, _Obscure Test_, etc.)  So we need a way to write multiple test
% cases in a single procedural M-file.  The natural MATLAB way would be to use
% subfunctions.
%
% However, subfunction-based test cases require special consideration.  Consider
% the following M-file structure:
%
%    === File A.m ===
%    function A
%       ...
% 
%    function B
%       ...
% 
%    function C
%       ...
% 
%    function D
%       ...
%
% The first function in the file, |A|, has the same name as the file.  When
% other code outside this function calls |A|, it is this first function that
% gets called.  Functions |B|, |C|, and |D| are called _subfunctions_.
% Normally, these subfunctions are only visible to and can only be called by
% |A|.  The only way that code elsewhere might be able to call |B|, |C|, or |D|
% is if function |A| forms handles to them and passes those handles out of its
% scope.  Normally this would be done by returning the function handles as
% output arguments.
%
% Note that no code executing outside the scope of a function in A.m can form
% function handles to |B|, |C|, or |D|, or can even determine that these
% functions exist.
%
% This obviously poses a problem for test discovery!
%
% The MATLAB xUnit solution is to establish the following convention for
% subfunction-based tests.  The first function in a test M-file containing
% subfunction tests has to begin with these lines:
%
%    === File A.m ===
%    function test_suite = A
%    initTestSuite;
%    ...
%
% |initTestSuite| is a _script_ that runs in the scope of the function |A|.
% |initTestSuite| determines which subfunctions are test functions, as well as setup
% or teardown functions.  It forms handles to these functions and constructs a
% set of FunctionHandleTestCase objects, which function |A| returns as the
% output argument |test_suite|.

%% TestRunMonitor
% The abstract |TestRunMonitor| class defines the interface for an object that
% "observe" the in-progress execution of a test suite.  MATLAB xUnit provides two
% subclasses of |TestRunMonitor|:
%
% * |TestRunLogger| silently logs test suite events and captures the details of
% any test failures or test errors.
% * |CommandWindowTestRunDisplay| prints the progress of an executing test suite
% to the Command Window.
%
% <<class_diagram_c.gif>>
%
% A TestRunMonitor is passed to the |run()| method of a TestComponent object.
% The |run()| method calls the appropriate notification methods of the
% monitor.
%
% Here is the output when using the CommandWindowTestRunDisplay object on the
% MATLAB xUnit's own test suite:
%
%    runtests 
%    Starting test run with 92 test cases.
%    ....................
%    ....................
%    ....................
%    ....................
%    ............
%    PASSED in 7.040 seconds.

%% File System Test Runner
% MATLAB xUnit provides a command-line _File System Test Runner_ called
% |runtests|.  When called with no input arguments, |runtests| gathers all the 
% test cases from the current directory and runs them, summarizing the results
% to the Command Window.  |runtests| can also take a string argument specifying
% which test file, and optionally which specific test case, to run.

%% Test Selection
% Test selection is supported in |runtests| by passing in a string of the form:
%
%     'Location:Name'
%
% or just:
%
%     'Location'
%
% Both of these forms are handled by |runtests| and by |TestSuite.fromName|.
%
% 'Location' is the name of the M-file containing test cases.  'Name' is the
% name of a specific test case.  Normally, the name of the test case is the name
% of the corresponding TestCase method.  For FunctionHandleTestCase objects,
% though, 'Name' is the subfunction name.

%% Assertion Methods
% MATLAB xUnit provides the following assertion methods:
%
% * _Stated Outcome Assertion_ (|assertTrue|, |assertFalse|)
% * _Equality Assertion_ (|assertEqual|)
% * _Fuzzy Equality Assertion_ (|assertElementsAlmostEqual|, |assertVectorsAlmostEqual|)
% * _Expected Exception Assertion_ (|assertExceptionRaised|)
%
% Assertion functions are provided via globally accessible names (e.g.,
% |assertEqual|).  The assertion functions could be moved to the |xunit|
% package, but MATLAB users are not accustomed yet to packages and package
% name-scoping syntax.
%
% 'message' is the last input to the assertion functions and is optional.  (See
% below for discussion of _Assertion Roulette_.)
%
% The _Expected Exception Assertion_, |assertExceptionRaised| is used by forming
% an anonymous function handle from an expression that is expected to error, and
% then passing that function handle to |assertExceptionRaised| along with the
% expected exception identifier. For example:
%
%    f = @() sin(1,2,3);
%    assertExceptionRaised(f, 'MATLAB:maxrhs')
%
% By using this mechanism, test writers can verify exceptions without using
% try-catch logic in their test code.

%% Stack Traces and "Assertion Roulette"
% _xUnit Test Patterns_ explains the smell _Assertion Roulette_ this way: "It is
% hard to tell which of several assertions within the same test method caused a
% test failure.
%
% MATLAB xUnit mitigates against _Assertion Roulette_ by capturing the entire stack
% trace, including line numbers, for every test failure and test error.  (The
% MATLAB MException object, which you obtain via the |catch| clause, contains
% the stack trace.)  The stack trace is displayed to the Command Window, with
% clickable links that load the corresponding M-file into editor at the
% appropriate line number.
%
% Stack traces can be pretty long, though.  Also, test framework plumbing tends
% to occupy the trace in between the assertion and the user's test code, thus
% making the trace hard to interpret for less-experienced users.  MATLAB xUnit,
% therefore, uses a stack filtering heuristic for displaying test fault traces:
% Starting at the deepest call level, once the trace leaves MATLAB xUnit framework
% functions, all further framework functions are filtered out of the stack
% trace.
%
% Here's an example of stack trace display in the output of |runtests|:
%
% <html>
% <tt>
% >> runtests testSample<br />
% Starting test run with 1 test case.<br />
% F<br />
% FAILED in 0.081 seconds.<br />
% <br />
% ===== Test Case Failure =====<br />
% Location: c:\work\matlab_xunit\architecture\testSample.m<br />
% Name:     testMyCode<br />
% <br />
% c:\work\matlab_xunit\architecture\testSample.m at <span style="color:blue; 
% text-decoration:underline">line 6</span><br />
% <br />
% Input elements are not all equal within relative tolerance: 1.49012e-008<br />
% <br />
% First input:<br />
%      1<br />
% <br />
% Second input:<br />
%     1.1000<br />
% </tt>
% </html>
%
% Clicking on the blue, underlined link above loads the corresponding file into
% the editor, positioned at the appropriate line.

%% Extending the Framework
% The MATLAB xUnit framework can be extended primarily by subclassing |TestCase|,
% |TestSuite|, and |TestMonitor|.
%
% |TestCase| can be subclassed to enable a new set of test cases that all share
% some particular behavior.  The MATLAB xUnit Test Framework contains three
% examples of extending |TestCase| behavior in this way:
%
% * |FunctionHandleTestCase| provides the ability to define test cases based on
% procedural function handles.
% * |TestCaseInDir| defines a test case that must be run inside a particular
% directory. The |setUp| and |tearDown| functions are overridden to change the
% MATLAB working directory before running the test case, and then to restore the
% original working directory when the test case finished.  The class is used by
% the framework's own test suite.
% * |TestCaseInPath| defines a test case that must be run with a particular
% directory temporarily added to the MATLAB path.  Its implementation is similar
% to |TestCaseInDir|, and it is also used by the framework's own test suite.
%
% |TestSuite| could be similarly extended by subclassing. This might a provide a
% way in the future to define a test suite containing collections of test
% components in separate directories, which is not currently supported.
%
% Finally |TestRunMonitor| could be subclassed to support a variety of test
% monitoring mechanisms, such as what might be required by a _Graphical Test
% Runner_.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/architecture/testSample.m
================================================
function test_suite = testSample
initTestSuite;

function testMyCode
assertEqual(1, 1);
assertElementsAlmostEqual(1, 1.1);
assertTrue(10 == 10);

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/+abc/+tests/test_that.m
================================================
% Do-nothing test used in the examples for organizing tests inside packages.
%
% Steven L. Eddins
% Copyright 2010 The MathWorks, Inc.

function test_that


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/+abc/+tests/test_this.m
================================================
% Do-nothing test used in the examples for organizing tests inside packages.
%
% Steven L. Eddins
% Copyright 2010 The MathWorks, Inc.

function test_this


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/+abc_tests/test_that.m
================================================
% Do-nothing test used in the examples for organizing tests inside packages.
%
% Steven L. Eddins
% Copyright 2010 The MathWorks, Inc.

function test_that


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/+abc_tests/test_this.m
================================================
% Do-nothing test used in the examples for organizing tests inside packages.
%
% Steven L. Eddins
% Copyright 2010 The MathWorks, Inc.

function test_this


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exException.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How to Test an Error Message
% It's surprising to most people (but not quality engineers) how
% often programmers make errors in error-handling code.  Because of
% this unfortunate truth, it is useful to write unit tests that
% verify that your MATLAB code throws the proper error, at the
% proper time.
%
% The assertion function that makes this task easy is
% |assertExceptionThrown|.  This example shows how to write a unit
% test that verifies the "Too many input arguments" error for the
% |cos| function.
%
% Your first step is to determine the _error identifier_ associated
% with the error message.  You can find out the error identifier by
% using the |lasterror| function.
%
% If you call |cos| with two input arguments, like this:
%
%   cos(1, 2)
%
% you get this error message:
%
%   Error using ==> cos
%   Too many input arguments. 
%
% Then if you call |lasterror|, you get this output:
%
%   ans = 
%   
%          message: [1x45 char]
%       identifier: 'MATLAB:maxrhs'
%            stack: [0x1 struct]
%
% So the _identifier_ associated with this error message is
% |'MATLAB:maxrhs'|.
%
% When you write your test function, you'll form an anonymous
% function handle that calls |cos| with the erroneous additional
% input argument.

f = @() cos(1, 2)

%%
% You then pass this function to |assertExceptionThrown|, along with
% the expected error identifier.

assertExceptionThrown(f, 'MATLAB:maxrhs');

%%
% |assertExceptionThrown| verifies that when |f()| is called, an
% error results with the specified error identifier.
%
% Here's our error condition test for the |cos| function.

cd examples_general
type testCos

%%
% Run the test using |runtests|.

runtests testCos

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exQuickStart.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How to Write and Run Tests 
% This example shows how to write and run a couple of test cases for the MATLAB
% |fliplr| function.

%% Make a folder for your tests
% To get started, create a folder (directory) that will contain your tests, and
% then make that your working folder.  The test directory in this example is
% example_quick_start.

cd example_quick_start

%% Write each test case as a simple M-file
% Write each test case as an M-file function that returns no output arguments.
% The function name should start or end with "test" or "Test".  The test case
% passes if the function runs with no error.
%
% Here's a test-case M-file that verifies the correct output for a vector input.

type testFliplrVector

%%
% The function |testFliplrVector| calls the function being tested and checks the
% output against the expected output.  If the output is different than expected,
% the function calls |error|.
%
% Here's another test-case M-file that verifies the correct |fliplr| output for
% a matrix input.

type testFliplrMatrix

%%
% This function is simpler than |testFliplrVector| because it uses the utility
% testing function |assertEqual|.  |assertEqual| checks to see whether its two
% inputs are equal. If they are equal, |assertEqual| simply returns silently.
% If they are not equal, |assertEqual| calls |error|.

%% Run all the tests using |runtests|
% To run all your test cases, simply call |runtests|.  |runtests| automatically finds
% all the test cases in the current directory, runs them, and reports the
% results to the Command Window.

runtests

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exRunSpecificTest.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How to Run a Specific Test
% To run all the test cases in just one M-file, ignoring other test
% cases that might be in other files in the same directory, give
% the name of the file (without the ".m" extension) as an argument
% to |runtests|.
%
% For example

cd example_subfunction_tests

runtests testFliplr

%%
% To run a single test case, add the name of the test case using a
% colon (":"), like this:

runtests testFliplr:testFliplrVector

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exRunTestsInADirectory.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How to Run Tests in Specific Directories
% To run all the test cases in a specific directory, give the name of the
% directory as an argument to |runtests|.
%
% For example

runtests example_subfunction_tests

%%
% To run tests in multiple directories, give each directory name as a separate
% argument to |runtests|.

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exRunTestsInPackage.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How to Run Tests in a Package
% To run all the test cases in a package, give the name of the
% package as an argument to |runtests|. *Note:* Running tests in a package
% requires MATLAB R2009a or later.
%
% For example, suppose you are distributing a set of MATLAB files called the
% "ABC Toolbox." Then you could put your tests inside a package called abc_tests
% and run them like this:

runtests abc_tests

%%
% (Note that the initial "+" character in the name of the package folder on disk
% is not part of the package name.)
%
% Or you could put your tests inside a subpackage called abc.tests and run them
% like this:

runtests abc.tests

%%
% You should not use a generic top-level package name such "tests" because then
% your package might be unintentionally combined with packages with the same
% name created by other people.  

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2010 The MathWorks, Inc.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exSilentRunning.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How to Run Tests Silently and Query the Results
% When you run a test suite using |runtests|, the results are
% summarized in the Command Window.  This example shows you how to
% run a test suite so that nothing prints to the Command Window, and
% it shows you how to write a program to automatically determine the
% results of running the test suite.
%
% There are four steps to follow.
%
% 1. Construct a |TestSuite| object.  In this example we'll use the |fromPwd|
% method of the |TestSuite| class to construct a test suite using all the test
% cases found in the |examples_general| directory.

cd examples_general
suite = TestSuite.fromPwd();

%%
% You can look up information about the individual test cases.

suite.TestComponents{1}

%%
% You can see above that the first test component in the test suite is itself
% another test suite, which contains the test cases defined by the M-file named
% TestUsingTestCase. Here's what one of these individual test cases looks like:

suite.TestComponents{1}.TestComponents{1}

%%
% 2. Construct a TestLogger object.  This object can receive
% notifications about what happens when a test suite is executed.

logger = TestRunLogger;

%%
% 3. Call the |run| method of the |TestSuite| object, passing it the
% logger.

suite.run(logger);

%%
% The |TestLogger| object can now be queried to determine what
% happened during the test.

logger

%%
% There were eight test cases run (logger.NumTestCases), resulting in
% one test failure and one test error.  Detailed information about
% what went wrong can be found in |logger.Faults|.

logger.Faults(1)

%%

logger.Faults(2)

%%
% You can drill further to determine the names of the failing tests,
% as well as the complete stack trace associated with each failure.

logger.Faults(1).TestCase

%%

logger.Faults(1).Exception.stack(1)

%%

logger.Faults(1).Exception.stack(2)

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exSubfunctionTests.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How to Put Multiple Test Cases in One M-file
% The Quick Start example showed how you can write a simple M-file
% to be a single test case.  This example shows you how to put multiple
% test cases in one M-file.
%
% Name your M-file beginning or ending with "test", like
% "testMyFunc".  Start by putting the following two lines at the
% beginning of the file.  It's important that the output variable
% name on line 1 be |test_suite|.
%
%    function test_suite = testMyFunc
%    initTestSuite;
%
% Next, add subfunctions to the file.  Each subfunction beginning
% or ending with "test" becomes an individual test case.
%
% The directory example_subfunction_tests contains a test M-file
% containing subfunction test cases for the |fliplr| function.

cd example_subfunction_tests

type testFliplr

%%
% As usual, run the test cases using |runtests|:

runtests

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exTestCase.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How to Write xUnit-Style Tests by Subclassing TestCase
% The MATLAB xUnit architecture is based closely on the xUnit style, in
% which each test case is an instance of a subclass of the base
% TestCase class.  Programmers who are familiar with this style may
% want to write their own TestCase subclasses instead of using
% <./exSubfunctionTests.html subfunction-based tests>.
%
% This example shows a TestCase subclass containing test case
% methods and test fixture methods.  If you are not familiar with
% defining your own classes in MATLAB, you might want to review the
% MATLAB documentation on 
% <http://www.mathworks.com/access/helpdesk/help/techdoc/matlab_oop/ug_intropage.html 
% classes and object-oriented programming>,
% or you can simply stick to using subfunction-based tests.
%
% The sample M-file begins with the |classdef| statement, which sets
% the name of the class and indicates that it is a subclass of
% |TestCase|.

cd examples_general
dbtype TestUsingTestCase 1

%%
% The properties block contains a field that is initialized by the
% setup method and is used by the two test methods.

dbtype TestUsingTestCase 3:5

%%
% The first method in the methods block is the constructor.  It
% takes the desired test method name as its input argument, and it
% passes that input along to the base class constructor.

dbtype TestUsingTestCase 7:10

%%
% The |setUp| method creates a figure window and stores its handle in
% the field |fh|.

dbtype TestUsingTestCase 12:14

%%
% Test methods are those beginning with "test".

dbtype TestUsingTestCase 20:26

%%
% The |tearDown| method cleans up by deleting the figure window.

dbtype TestUsingTestCase 16:18

%%
% Run the test cases in the class by calling |runtests| with the name
% of the class.

runtests TestUsingTestCase

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exTestCaseSearching.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How RUNTESTS Searches for Test Cases
% When you call |runtests| with no input arguments:
%
%   >> runtests
%
% it automatically searches for all the test cases in the current directory.  It
% looks for test cases in three types of M-files:
%
% 1. An M-file function whose name begins or ends with "test" or "Test" and that does
% not return an output argument.  Such a function is considered to be a single
% test case. 
%
% 2. An M-file function whose name begins or ends with "test" or "Test" and that returns
% an output argument that is a test suite.  Such a function is considered to contain
% subfunction-style test cases.  Each subfunction whose name begins or ends with "test"
% or "Test" is a test case. 
%
% 3. An M-file that defines a subclass of TestCase.  Each method beginning or ending with
% "test" or "Test" is a test case.
%
% |runtests| uses the |TestSuite| static methods |fromName| and |fromPwd| to
% automatically construct the test suites.
%
% Here are a couple of examples.
%
% |TestSuite.fromName| takes an M-file name, determines what
% kind of test file it is, and returns a cell array of test case objects.

cd examples_general
test_suite_1 = TestSuite.fromName('testSetupExample')

%%
% |TestSuite.fromPwd| returns a test suite based on all the test files in the
% current directory.

test_suite_2 = TestSuite.fromPwd()

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exTestFixtures.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How to Write Tests That Share Common Set-Up Code
% Sometimes you want to write a set of test cases in which the same
% set of initialization steps is performed before each test case, or
% in which the same set of cleanup steps is performed after each
% test case.  This set of common _setup_ and _teardown_ code is
% called a _test fixture_.
%
% In subfunction-based test files, you can add subfunctions whose
% names begin with "setup" and "teardown".  These functions will be
% called before and after every test-case subfunction is called.  If
% the setup function returns an output argument, that value is saved
% and passed to every test-case subfunction and also to the teardown
% function.
%
% This example shows a setup function that creates a figure and 
% returns its handle.  The figure handle is passed to each test-case
% subfunction.  The figure handle is also passed to the teardown
% function, which cleans up after each test case by deleting the
% figure.

cd examples_general
type testSetupExample

%%
% Run the tests using |runtests|.

runtests testSetupExample

%%
% You might also want to see the 
% <./exTestCase.html example on writing test cases by
% subclassing TestCase>.

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/exTolerance.m
================================================
%% <../index.html MATLAB xUnit Test Framework>: How to Test Using a Floating-Point Tolerance
% MATLAB performs arithmetic operations using the floating-point
% hardware instructions on your processor. Because
% almost all floating-point operations are subject to round-off
% error, arithmetic operations can sometimes produce surprising
% results. Here's an example.

a = 1 + 0.1 + 0.1 + 0.1

%%
a == 1.3

%%
% So why doesn't |a| equal 1.3? Because 0.1, 1.3, and most other
% decimal fractions do not have exact representations in the binary
% floating-point number representation your computer uses.  The
% first line above is doing an approximate addition of 1 plus an
% approximation of 0.1, plus an approximation of 0.1, plus an
% approximation of 0.1.  The second line compares the result of all
% that with an approximation of 1.3.
%
% If you subtract 1.3 from |a|, you can see that the computed result
% for |a| is _extremely close_ to the floating-point approximation
% of 1.3, but it is not exactly the same.

a - 1.3

%%
% As a general rule, when comparing the results of floating-point
% calculations for equality, it is necessary to use a tolerance
% value.  Two types of tolerance comparisons are commonly used: absolute
% tolerance and relative tolerance.  An absolute tolerance comparison of _a_ and _b_ 
% looks like:
%
% $$|a-b| \leq T$$
%
% A relative tolerance comparison looks like:
%
% $$|a-b| \leq T\max(|a|,|b|) + T_f$$
%
% where _Tf_ is called the _floor tolerance_. It acts as an absolute tolerance
% when _a_ and _b_ are very close to 0.
%
% For example, suppose that _a_ is 100, _b_ is 101, and T is 0.1.  Then _a_ and
% _b_ would not be considered equal using an absolute tolerance, because 1 >
% 0.1.  However, _a_ and _b_ would be considered equal using a relative
% tolerance, because they differ by only 1 part in 100.
%
% MATLAB xUnit provides the utility assertion functions called
% |assertElementsAlmostEqual| and |assertVectorAlmostEqual|. These functions
% make it easy to write tests involving floating-point tolerances.
%
% |assertElementsAlmostEqual(A,B)| applies the tolerance test independently to
% every element of |A| and |B|.  The function uses a relative tolerance test by
% default, but you make it use an absolute tolerance test, or change the
% tolerance values used, by passing additional arguments to it.
%
% |assertVectorsAlmostEqual(A,B)| applies the tolerance test to the vectors |A|
% and |B| in the L2-norm sense.  For example, suppose |A| is |[1 1e10|], |B|
% is |[2 1e10]|, and the tolerance is 1e-8.  Then |A| and |B| would fail an
% elementwise relative tolerance comparison, because the relative difference
% between the first elements is 0.5.  However, they would pass a vector relative
% tolerance comparison, because the relative vector difference between |A| and
% |B| is only about 1 part in 1e10.
%
% The |examples_general| directory contains a portion of a unit test for the
% |sin| function.  The output of |sin| can sometimes be a bit surprising because
% of floating-point issues.  For example:

sin(pi)

%%
% That's very close but not exactly equal to 0.  Here's how the
% |sin| unit test uses |assertElementsAlmostEqual| to write the |sin(pi)|
% test with a minimum of fuss.

cd examples_general
type testSin

%%
% Run the test using |runtests|.

runtests testSin

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/example_quick_start/testFliplrMatrix.m
================================================
function testFliplrMatrix
%testFliplrMatrix Unit test for fliplr with matrix input

in = magic(3);
assertEqual(fliplr(in), in(:, [3 2 1]));


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/example_quick_start/testFliplrVector.m
================================================
function testFliplrVector
%testFliplrVector Unit test for fliplr with vector input

in = [1 4 10];
out = fliplr(in);
expected_out = [10 4 1];

if ~isequal(out, expected_out)
    error('testFliplrVector:notEqual', 'Incorrect output for vector.');
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/example_subfunction_tests/testFliplr.m
================================================
function test_suite = testFliplr
initTestSuite;

function testFliplrMatrix
in = magic(3);
assertEqual(fliplr(in), in(:, [3 2 1]));

function testFliplrVector
assertEqual(fliplr([1 4 10]), [10 4 1]);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/TestUsingTestCase.m
================================================
classdef TestUsingTestCase < TestCase

    properties
        fh
    end

    methods
        function self = TestUsingTestCase(name)
            self = self@TestCase(name);
        end

        function setUp(self)
            self.fh = figure;
        end

        function tearDown(self)
            delete(self.fh);
        end

        function testColormapColumns(self)
            assertEqual(size(get(self.fh, 'Colormap'), 2), 3);
        end

        function testPointer(self)
            assertEqual(get(self.fh, 'Pointer'), 'arrow');
        end
    end
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/testBadSinTest.m
================================================
function test_suite = testBadSinTest
initTestSuite;

function testSinPi
% Example of a failing test case.  The test writer should have used
% assertAlmostEqual here.
assertEqual(sin(pi), 0);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/testCos.m
================================================
function test_suite = testCos
initTestSuite;

function testTooManyInputs
assertExceptionThrown(@() cos(1, 2), 'MATLAB:maxrhs');

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/testSetupExample.m
================================================
function test_suite = testSetupExample
initTestSuite;

function fh = setup
fh = figure;

function teardown(fh)
delete(fh);

function testColormapColumns(fh)
assertEqual(size(get(fh, 'Colormap'), 2), 3);

function testPointer(fh)
assertEqual(get(fh, 'Pointer'), 'arrow');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/testSin.m
================================================
function testSin

assertElementsAlmostEqual(sin(pi), 0);

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/examples_general/testWithSetupError.m
================================================
function test_suite = testWithSetupError
%Example of a test with an error.  The setup function calls cos with
%too many input arguments.

initTestSuite;

function testData = setup
testData = cos(1, 2);

function testMyFeature(testData)
assertEqual(1, 1);

function teardown(testData)


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/file_exchange_description.txt
================================================
MATLAB xUnit Test Framework is a unit test framework for MATLAB code.

MATLAB xUnit is designed to be easy to use for MATLAB users with a wide range of experience. Users can write tests using ordinary M-files that are very simple in structure.

MATLAB xUnit comes with extensive documentation that ranges in scope from a "Getting Started" section to advanced techniques and architectural notes. You can view this documentation online without downloading the package. For example, scroll down to the "Published M Files" section on this page and click on "MATLAB xUnit Quick Start - How to write and run tests."  To see all the MATLAB xUnit documentation online, scroll down to the "HTML Files" section on this page and click on "Readme.html."

Only the "xunit" directory is needed to use the framework.  The "tests" directory contains the framework's own test suite.  The "architecture" directory contains architectural notes on the framework's design and how it might be extended.

MATLAB xUnit can be used with MATLAB releases R2008a and later. MATLAB xUnit relies heavily on object-oriented language features introduced in R2008a and will not work with earlier releases.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exException.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML is auto-generated from an M-file.
To make changes, update the M-file and republish this document.
      --><title>MATLAB xUnit Test Framework: How to Test an Error Message</title><meta name="generator" content="MATLAB 7.10"><meta name="date" content="2010-07-29"><meta name="m-file" content="exException"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How to Test an Error Message</h1><p>It's surprising to most people (but not quality engineers) how often programmers make errors in error-handling code.  Because of this unfortunate truth, it is useful to write unit tests that verify that your MATLAB code throws the proper error, at the proper time.</p><p>The assertion function that makes this task easy is <tt>assertExceptionThrown</tt>.  This example shows how to write a unit test that verifies the "Too many input arguments" error for the <tt>cos</tt> function.</p><p>Your first step is to determine the <i>error identifier</i> associated with the error message.  You can find out the error identifier by using the <tt>lasterror</tt> function.</p><p>If you call <tt>cos</tt> with two input arguments, like this:</p><pre> cos(1, 2)</pre><p>you get this error message:</p><pre> Error using ==&gt; cos
 Too many input arguments.</pre><p>Then if you call <tt>lasterror</tt>, you get this output:</p><pre> ans =</pre><pre>        message: [1x45 char]
     identifier: 'MATLAB:maxrhs'
          stack: [0x1 struct]</pre><p>So the <i>identifier</i> associated with this error message is <tt>'MATLAB:maxrhs'</tt>.</p><p>When you write your test function, you'll form an anonymous function handle that calls <tt>cos</tt> with the erroneous additional input argument.</p><pre class="codeinput">f = @() cos(1, 2)
</pre><pre class="codeoutput">
f = 

    @()cos(1,2)

</pre><p>You then pass this function to <tt>assertExceptionThrown</tt>, along with the expected error identifier.</p><pre class="codeinput">assertExceptionThrown(f, <span class="string">'MATLAB:maxrhs'</span>);
</pre><p><tt>assertExceptionThrown</tt> verifies that when <tt>f()</tt> is called, an error results with the specified error identifier.</p><p>Here's our error condition test for the <tt>cos</tt> function.</p><pre class="codeinput">cd <span class="string">examples_general</span>
type <span class="string">testCos</span>
</pre><pre class="codeoutput">
function test_suite = testCos
initTestSuite;

function testTooManyInputs
assertExceptionThrown(@() cos(1, 2), 'MATLAB:maxrhs');
</pre><p>Run the test using <tt>runtests</tt>.</p><pre class="codeinput">runtests <span class="string">testCos</span>
</pre><pre class="codeoutput">Starting test run with 1 test case.
.
PASSED in 0.018 seconds.
</pre><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2008-2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.10<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How to Test an Error Message
% It's surprising to most people (but not quality engineers) how
% often programmers make errors in error-handling code.  Because of
% this unfortunate truth, it is useful to write unit tests that
% verify that your MATLAB code throws the proper error, at the
% proper time.
%
% The assertion function that makes this task easy is
% |assertExceptionThrown|.  This example shows how to write a unit
% test that verifies the "Too many input arguments" error for the
% |cos| function.
%
% Your first step is to determine the _error identifier_ associated
% with the error message.  You can find out the error identifier by
% using the |lasterror| function.
%
% If you call |cos| with two input arguments, like this:
%
%   cos(1, 2)
%
% you get this error message:
%
%   Error using ==> cos
%   Too many input arguments. 
%
% Then if you call |lasterror|, you get this output:
%
%   ans = 
%   
%          message: [1x45 char]
%       identifier: 'MATLAB:maxrhs'
%            stack: [0x1 struct]
%
% So the _identifier_ associated with this error message is
% |'MATLAB:maxrhs'|.
%
% When you write your test function, you'll form an anonymous
% function handle that calls |cos| with the erroneous additional
% input argument.

f = @() cos(1, 2)

%%
% You then pass this function to |assertExceptionThrown|, along with
% the expected error identifier.

assertExceptionThrown(f, 'MATLAB:maxrhs');

%%
% |assertExceptionThrown| verifies that when |f()| is called, an
% error results with the specified error identifier.
%
% Here's our error condition test for the |cos| function.

cd examples_general
type testCos

%%
% Run the test using |runtests|.

runtests testCos

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.
##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exQuickStart.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML is auto-generated from an M-file.
To make changes, update the M-file and republish this document.
      --><title>MATLAB xUnit Test Framework: How to Write and Run Tests</title><meta name="generator" content="MATLAB 7.10"><meta name="date" content="2010-07-29"><meta name="m-file" content="exQuickStart"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How to Write and Run Tests</h1><!--introduction--><p>This example shows how to write and run a couple of test cases for the MATLAB <tt>fliplr</tt> function.</p><!--/introduction--><h2>Contents</h2><div><ul><li><a href="#1">Make a folder for your tests</a></li><li><a href="#2">Write each test case as a simple M-file</a></li><li><a href="#5">Run all the tests using <tt>runtests</tt></a></li></ul></div><h2>Make a folder for your tests<a name="1"></a></h2><p>To get started, create a folder (directory) that will contain your tests, and then make that your working folder.  The test directory in this example is example_quick_start.</p><pre class="codeinput">cd <span class="string">example_quick_start</span>
</pre><h2>Write each test case as a simple M-file<a name="2"></a></h2><p>Write each test case as an M-file function that returns no output arguments. The function name should start or end with "test" or "Test".  The test case passes if the function runs with no error.</p><p>Here's a test-case M-file that verifies the correct output for a vector input.</p><pre class="codeinput">type <span class="string">testFliplrVector</span>
</pre><pre class="codeoutput">
function testFliplrVector
%testFliplrVector Unit test for fliplr with vector input

in = [1 4 10];
out = fliplr(in);
expected_out = [10 4 1];

if ~isequal(out, expected_out)
    error('testFliplrVector:notEqual', 'Incorrect output for vector.');
end

</pre><p>The function <tt>testFliplrVector</tt> calls the function being tested and checks the output against the expected output.  If the output is different than expected, the function calls <tt>error</tt>.</p><p>Here's another test-case M-file that verifies the correct <tt>fliplr</tt> output for a matrix input.</p><pre class="codeinput">type <span class="string">testFliplrMatrix</span>
</pre><pre class="codeoutput">
function testFliplrMatrix
%testFliplrMatrix Unit test for fliplr with matrix input

in = magic(3);
assertEqual(fliplr(in), in(:, [3 2 1]));

</pre><p>This function is simpler than <tt>testFliplrVector</tt> because it uses the utility testing function <tt>assertEqual</tt>.  <tt>assertEqual</tt> checks to see whether its two inputs are equal. If they are equal, <tt>assertEqual</tt> simply returns silently. If they are not equal, <tt>assertEqual</tt> calls <tt>error</tt>.</p><h2>Run all the tests using <tt>runtests</tt><a name="5"></a></h2><p>To run all your test cases, simply call <tt>runtests</tt>.  <tt>runtests</tt> automatically finds all the test cases in the current directory, runs them, and reports the results to the Command Window.</p><pre class="codeinput">runtests
</pre><pre class="codeoutput">Starting test run with 2 test cases.
..
PASSED in 0.002 seconds.
</pre><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2008-2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.10<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How to Write and Run Tests 
% This example shows how to write and run a couple of test cases for the MATLAB
% |fliplr| function.

%% Make a folder for your tests
% To get started, create a folder (directory) that will contain your tests, and
% then make that your working folder.  The test directory in this example is
% example_quick_start.

cd example_quick_start

%% Write each test case as a simple M-file
% Write each test case as an M-file function that returns no output arguments.
% The function name should start or end with "test" or "Test".  The test case
% passes if the function runs with no error.
%
% Here's a test-case M-file that verifies the correct output for a vector input.

type testFliplrVector

%%
% The function |testFliplrVector| calls the function being tested and checks the
% output against the expected output.  If the output is different than expected,
% the function calls |error|.
%
% Here's another test-case M-file that verifies the correct |fliplr| output for
% a matrix input.

type testFliplrMatrix

%%
% This function is simpler than |testFliplrVector| because it uses the utility
% testing function |assertEqual|.  |assertEqual| checks to see whether its two
% inputs are equal. If they are equal, |assertEqual| simply returns silently.
% If they are not equal, |assertEqual| calls |error|.

%% Run all the tests using |runtests|
% To run all your test cases, simply call |runtests|.  |runtests| automatically finds
% all the test cases in the current directory, runs them, and reports the
% results to the Command Window.

runtests

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.
##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exRunSpecificTest.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML is auto-generated from an M-file.
To make changes, update the M-file and republish this document.
      --><title>MATLAB xUnit Test Framework: How to Run a Specific Test</title><meta name="generator" content="MATLAB 7.10"><meta name="date" content="2010-07-29"><meta name="m-file" content="exRunSpecificTest"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How to Run a Specific Test</h1><p>To run all the test cases in just one M-file, ignoring other test cases that might be in other files in the same directory, give the name of the file (without the ".m" extension) as an argument to <tt>runtests</tt>.</p><p>For example</p><pre class="codeinput">cd <span class="string">example_subfunction_tests</span>

runtests <span class="string">testFliplr</span>
</pre><pre class="codeoutput">Starting test run with 2 test cases.
..
PASSED in 0.023 seconds.
</pre><p>To run a single test case, add the name of the test case using a colon (":"), like this:</p><pre class="codeinput">runtests <span class="string">testFliplr:testFliplrVector</span>
</pre><pre class="codeoutput">Starting test run with 1 test case.
.
PASSED in 0.001 seconds.
</pre><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2008-2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.10<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How to Run a Specific Test
% To run all the test cases in just one M-file, ignoring other test
% cases that might be in other files in the same directory, give
% the name of the file (without the ".m" extension) as an argument
% to |runtests|.
%
% For example

cd example_subfunction_tests

runtests testFliplr

%%
% To run a single test case, add the name of the test case using a
% colon (":"), like this:

runtests testFliplr:testFliplrVector

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.

##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exRunTestsInADirectory.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML is auto-generated from an M-file.
To make changes, update the M-file and republish this document.
      --><title>MATLAB xUnit Test Framework: How to Run Tests in Specific Directories</title><meta name="generator" content="MATLAB 7.10"><meta name="date" content="2010-07-29"><meta name="m-file" content="exRunTestsInADirectory"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How to Run Tests in Specific Directories</h1><p>To run all the test cases in a specific directory, give the name of the directory as an argument to <tt>runtests</tt>.</p><p>For example</p><pre class="codeinput">runtests <span class="string">example_subfunction_tests</span>
</pre><pre class="codeoutput">Starting test run with 2 test cases.
..
PASSED in 0.062 seconds.
</pre><p>To run tests in multiple directories, give each directory name as a separate argument to <tt>runtests</tt>.</p><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2008-2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.10<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How to Run Tests in Specific Directories
% To run all the test cases in a specific directory, give the name of the
% directory as an argument to |runtests|.
%
% For example

runtests example_subfunction_tests

%%
% To run tests in multiple directories, give each directory name as a separate
% argument to |runtests|.

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.

##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exRunTestsInPackage.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML was auto-generated from MATLAB code.
To make changes, update the MATLAB code and republish this document.
      --><title>MATLAB xUnit Test Framework: How to Run Tests in a Package</title><meta name="generator" content="MATLAB 7.11"><link rel="schema.DC" href="http://purl.org/dc/elements/1.1/"><meta name="DC.date" content="2010-11-19"><meta name="DC.source" content="exRunTestsInPackage.m"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How to Run Tests in a Package</h1><p>To run all the test cases in a package, give the name of the package as an argument to <tt>runtests</tt>. <b>Note:</b> Running tests in a package requires MATLAB R2009a or later.</p><p>For example, suppose you are distributing a set of MATLAB files called the "ABC Toolbox." Then you could put your tests inside a package called abc_tests and run them like this:</p><pre class="codeinput">runtests <span class="string">abc_tests</span>
</pre><pre class="codeoutput">Test suite: abc_tests
Test suite location: Package
19-Nov-2010 14:14:36

Starting test run with 2 test cases.
..
PASSED in 0.028 seconds.
</pre><p>(Note that the initial "+" character in the name of the package folder on disk is not part of the package name.)</p><p>Or you could put your tests inside a subpackage called abc.tests and run them like this:</p><pre class="codeinput">runtests <span class="string">abc.tests</span>
</pre><pre class="codeoutput">Test suite: abc.tests
Test suite location: Package
19-Nov-2010 14:14:36

Starting test run with 2 test cases.
..
PASSED in 0.001 seconds.
</pre><p>You should not use a generic top-level package name such "tests" because then your package might be unintentionally combined with packages with the same name created by other people.</p><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.11<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How to Run Tests in a Package
% To run all the test cases in a package, give the name of the
% package as an argument to |runtests|. *Note:* Running tests in a package
% requires MATLAB R2009a or later.
%
% For example, suppose you are distributing a set of MATLAB files called the
% "ABC Toolbox." Then you could put your tests inside a package called abc_tests
% and run them like this:

runtests abc_tests

%%
% (Note that the initial "+" character in the name of the package folder on disk
% is not part of the package name.)
%
% Or you could put your tests inside a subpackage called abc.tests and run them
% like this:

runtests abc.tests

%%
% You should not use a generic top-level package name such "tests" because then
% your package might be unintentionally combined with packages with the same
% name created by other people.  

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2010 The MathWorks, Inc.

##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exSilentRunning.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML is auto-generated from an M-file.
To make changes, update the M-file and republish this document.
      --><title>MATLAB xUnit Test Framework: How to Run Tests Silently and Query the Results</title><meta name="generator" content="MATLAB 7.10"><meta name="date" content="2010-07-29"><meta name="m-file" content="exSilentRunning"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How to Run Tests Silently and Query the Results</h1><p>When you run a test suite using <tt>runtests</tt>, the results are summarized in the Command Window.  This example shows you how to run a test suite so that nothing prints to the Command Window, and it shows you how to write a program to automatically determine the results of running the test suite.</p><p>There are four steps to follow.</p><p>1. Construct a <tt>TestSuite</tt> object.  In this example we'll use the <tt>fromPwd</tt> method of the <tt>TestSuite</tt> class to construct a test suite using all the test cases found in the <tt>examples_general</tt> directory.</p><pre class="codeinput">cd <span class="string">examples_general</span>
suite = TestSuite.fromPwd();
</pre><p>You can look up information about the individual test cases.</p><pre class="codeinput">suite.TestComponents{1}
</pre><pre class="codeoutput">
ans = 

  TestSuite handle

  Properties:
    TestComponents: {[1x1 TestUsingTestCase]  [1x1 TestUsingTestCase]}
              Name: 'TestUsingTestCase'
          Location: [1x80 char]


</pre><p>You can see above that the first test component in the test suite is itself another test suite, which contains the test cases defined by the M-file named TestUsingTestCase. Here's what one of these individual test cases looks like:</p><pre class="codeinput">suite.TestComponents{1}.TestComponents{1}
</pre><pre class="codeoutput">
ans = 

  TestUsingTestCase handle

  Properties:
            fh: []
    MethodName: 'testPointer'
          Name: 'testPointer'
      Location: 'C:\Users\eddins\local-work\matlab_xunit\doc\examples_general\TestUsingTestCase.m'


</pre><p>2. Construct a TestLogger object.  This object can receive notifications about what happens when a test suite is executed.</p><pre class="codeinput">logger = TestRunLogger;
</pre><p>3. Call the <tt>run</tt> method of the <tt>TestSuite</tt> object, passing it the logger.</p><pre class="codeinput">suite.run(logger);
</pre><p>The <tt>TestLogger</tt> object can now be queried to determine what happened during the test.</p><pre class="codeinput">logger
</pre><pre class="codeoutput">
logger = 

  TestRunLogger handle

  Properties:
             Log: {1x34 cell}
     NumFailures: 1
       NumErrors: 1
    NumTestCases: 8
          Faults: [1x2 struct]


</pre><p>There were eight test cases run (logger.NumTestCases), resulting in one test failure and one test error.  Detailed information about what went wrong can be found in <tt>logger.Faults</tt>.</p><pre class="codeinput">logger.Faults(1)
</pre><pre class="codeoutput">
ans = 

         Type: 'failure'
     TestCase: [1x1 FunctionHandleTestCase]
    Exception: [1x1 MException]

</pre><pre class="codeinput">logger.Faults(2)
</pre><pre class="codeoutput">
ans = 

         Type: 'error'
     TestCase: [1x1 FunctionHandleTestCase]
    Exception: [1x1 MException]

</pre><p>You can drill further to determine the names of the failing tests, as well as the complete stack trace associated with each failure.</p><pre class="codeinput">logger.Faults(1).TestCase
</pre><pre class="codeoutput">
ans = 

  FunctionHandleTestCase handle

  Properties:
    MethodName: 'runTestCase'
          Name: 'testSinPi'
      Location: 'C:\Users\eddins\local-work\matlab_xunit\doc\examples_general\testBadSinTest.m'


</pre><pre class="codeinput">logger.Faults(1).Exception.stack(1)
</pre><pre class="codeoutput">
ans = 

    file: 'C:\Users\eddins\local-work\matlab_xunit\doc\examples_general\testBadSinTest.m'
    name: 'testSinPi'
    line: 7

</pre><pre class="codeinput">logger.Faults(1).Exception.stack(2)
</pre><pre class="codeoutput">
ans = 

    file: 'C:\Users\eddins\local-work\matlab_xunit\xunit\FunctionHandleTestCase.m'
    name: 'FunctionHandleTestCase.runTestCase'
    line: 112

</pre><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2008-2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.10<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How to Run Tests Silently and Query the Results
% When you run a test suite using |runtests|, the results are
% summarized in the Command Window.  This example shows you how to
% run a test suite so that nothing prints to the Command Window, and
% it shows you how to write a program to automatically determine the
% results of running the test suite.
%
% There are four steps to follow.
%
% 1. Construct a |TestSuite| object.  In this example we'll use the |fromPwd|
% method of the |TestSuite| class to construct a test suite using all the test
% cases found in the |examples_general| directory.

cd examples_general
suite = TestSuite.fromPwd();

%%
% You can look up information about the individual test cases.

suite.TestComponents{1}

%%
% You can see above that the first test component in the test suite is itself
% another test suite, which contains the test cases defined by the M-file named
% TestUsingTestCase. Here's what one of these individual test cases looks like:

suite.TestComponents{1}.TestComponents{1}

%%
% 2. Construct a TestLogger object.  This object can receive
% notifications about what happens when a test suite is executed.

logger = TestRunLogger;

%%
% 3. Call the |run| method of the |TestSuite| object, passing it the
% logger.

suite.run(logger);

%%
% The |TestLogger| object can now be queried to determine what
% happened during the test.

logger

%%
% There were eight test cases run (logger.NumTestCases), resulting in
% one test failure and one test error.  Detailed information about
% what went wrong can be found in |logger.Faults|.

logger.Faults(1)

%%

logger.Faults(2)

%%
% You can drill further to determine the names of the failing tests,
% as well as the complete stack trace associated with each failure.

logger.Faults(1).TestCase

%%

logger.Faults(1).Exception.stack(1)

%%

logger.Faults(1).Exception.stack(2)

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.

##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exSubfunctionTests.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML is auto-generated from an M-file.
To make changes, update the M-file and republish this document.
      --><title>MATLAB xUnit Test Framework: How to Put Multiple Test Cases in One M-file</title><meta name="generator" content="MATLAB 7.10"><meta name="date" content="2010-07-29"><meta name="m-file" content="exSubfunctionTests"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How to Put Multiple Test Cases in One M-file</h1><p>The Quick Start example showed how you can write a simple M-file to be a single test case.  This example shows you how to put multiple test cases in one M-file.</p><p>Name your M-file beginning or ending with "test", like "testMyFunc".  Start by putting the following two lines at the beginning of the file.  It's important that the output variable name on line 1 be <tt>test_suite</tt>.</p><pre>  function test_suite = testMyFunc
  initTestSuite;</pre><p>Next, add subfunctions to the file.  Each subfunction beginning or ending with "test" becomes an individual test case.</p><p>The directory example_subfunction_tests contains a test M-file containing subfunction test cases for the <tt>fliplr</tt> function.</p><pre class="codeinput">cd <span class="string">example_subfunction_tests</span>

type <span class="string">testFliplr</span>
</pre><pre class="codeoutput">
function test_suite = testFliplr
initTestSuite;

function testFliplrMatrix
in = magic(3);
assertEqual(fliplr(in), in(:, [3 2 1]));

function testFliplrVector
assertEqual(fliplr([1 4 10]), [10 4 1]);


</pre><p>As usual, run the test cases using <tt>runtests</tt>:</p><pre class="codeinput">runtests
</pre><pre class="codeoutput">Starting test run with 2 test cases.
..
PASSED in 0.027 seconds.
</pre><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2008-2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.10<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How to Put Multiple Test Cases in One M-file
% The Quick Start example showed how you can write a simple M-file
% to be a single test case.  This example shows you how to put multiple
% test cases in one M-file.
%
% Name your M-file beginning or ending with "test", like
% "testMyFunc".  Start by putting the following two lines at the
% beginning of the file.  It's important that the output variable
% name on line 1 be |test_suite|.
%
%    function test_suite = testMyFunc
%    initTestSuite;
%
% Next, add subfunctions to the file.  Each subfunction beginning
% or ending with "test" becomes an individual test case.
%
% The directory example_subfunction_tests contains a test M-file
% containing subfunction test cases for the |fliplr| function.

cd example_subfunction_tests

type testFliplr

%%
% As usual, run the test cases using |runtests|:

runtests

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.
##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exTestCase.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML is auto-generated from an M-file.
To make changes, update the M-file and republish this document.
      --><title>MATLAB xUnit Test Framework: How to Write xUnit-Style Tests by Subclassing TestCase</title><meta name="generator" content="MATLAB 7.10"><meta name="date" content="2010-07-29"><meta name="m-file" content="exTestCase"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How to Write xUnit-Style Tests by Subclassing TestCase</h1><p>The MATLAB xUnit architecture is based closely on the xUnit style, in which each test case is an instance of a subclass of the base TestCase class.  Programmers who are familiar with this style may want to write their own TestCase subclasses instead of using <a href="./exSubfunctionTests.html">subfunction-based tests</a>.</p><p>This example shows a TestCase subclass containing test case methods and test fixture methods.  If you are not familiar with defining your own classes in MATLAB, you might want to review the MATLAB documentation on <a href="http://www.mathworks.com/access/helpdesk/help/techdoc/matlab_oop/ug_intropage.html">classes and object-oriented programming</a>, or you can simply stick to using subfunction-based tests.</p><p>The sample M-file begins with the <tt>classdef</tt> statement, which sets the name of the class and indicates that it is a subclass of <tt>TestCase</tt>.</p><pre class="codeinput">cd <span class="string">examples_general</span>
dbtype <span class="string">TestUsingTestCase</span> <span class="string">1</span>
</pre><pre class="codeoutput">
1     classdef TestUsingTestCase &lt; TestCase

</pre><p>The properties block contains a field that is initialized by the setup method and is used by the two test methods.</p><pre class="codeinput">dbtype <span class="string">TestUsingTestCase</span> <span class="string">3:5</span>
</pre><pre class="codeoutput">
3         properties
4             fh
5         end

</pre><p>The first method in the methods block is the constructor.  It takes the desired test method name as its input argument, and it passes that input along to the base class constructor.</p><pre class="codeinput">dbtype <span class="string">TestUsingTestCase</span> <span class="string">7:10</span>
</pre><pre class="codeoutput">
7         methods
8             function self = TestUsingTestCase(name)
9                 self = self@TestCase(name);
10            end

</pre><p>The <tt>setUp</tt> method creates a figure window and stores its handle in the field <tt>fh</tt>.</p><pre class="codeinput">dbtype <span class="string">TestUsingTestCase</span> <span class="string">12:14</span>
</pre><pre class="codeoutput">
12            function setUp(self)
13                self.fh = figure;
14            end

</pre><p>Test methods are those beginning with "test".</p><pre class="codeinput">dbtype <span class="string">TestUsingTestCase</span> <span class="string">20:26</span>
</pre><pre class="codeoutput">
20            function testColormapColumns(self)
21                assertEqual(size(get(self.fh, 'Colormap'), 2), 3);
22            end
23    
24            function testPointer(self)
25                assertEqual(get(self.fh, 'Pointer'), 'arrow');
26            end

</pre><p>The <tt>tearDown</tt> method cleans up by deleting the figure window.</p><pre class="codeinput">dbtype <span class="string">TestUsingTestCase</span> <span class="string">16:18</span>
</pre><pre class="codeoutput">
16            function tearDown(self)
17                delete(self.fh);
18            end

</pre><p>Run the test cases in the class by calling <tt>runtests</tt> with the name of the class.</p><pre class="codeinput">runtests <span class="string">TestUsingTestCase</span>
</pre><pre class="codeoutput">Starting test run with 2 test cases.
..
PASSED in 0.095 seconds.
</pre><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2008-2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.10<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How to Write xUnit-Style Tests by Subclassing TestCase
% The MATLAB xUnit architecture is based closely on the xUnit style, in
% which each test case is an instance of a subclass of the base
% TestCase class.  Programmers who are familiar with this style may
% want to write their own TestCase subclasses instead of using
% <./exSubfunctionTests.html subfunction-based tests>.
%
% This example shows a TestCase subclass containing test case
% methods and test fixture methods.  If you are not familiar with
% defining your own classes in MATLAB, you might want to review the
% MATLAB documentation on 
% <http://www.mathworks.com/access/helpdesk/help/techdoc/matlab_oop/ug_intropage.html 
% classes and object-oriented programming>,
% or you can simply stick to using subfunction-based tests.
%
% The sample M-file begins with the |classdef| statement, which sets
% the name of the class and indicates that it is a subclass of
% |TestCase|.

cd examples_general
dbtype TestUsingTestCase 1

%%
% The properties block contains a field that is initialized by the
% setup method and is used by the two test methods.

dbtype TestUsingTestCase 3:5

%%
% The first method in the methods block is the constructor.  It
% takes the desired test method name as its input argument, and it
% passes that input along to the base class constructor.

dbtype TestUsingTestCase 7:10

%%
% The |setUp| method creates a figure window and stores its handle in
% the field |fh|.

dbtype TestUsingTestCase 12:14

%%
% Test methods are those beginning with "test".

dbtype TestUsingTestCase 20:26

%%
% The |tearDown| method cleans up by deleting the figure window.

dbtype TestUsingTestCase 16:18

%%
% Run the test cases in the class by calling |runtests| with the name
% of the class.

runtests TestUsingTestCase

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.
##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exTestCaseSearching.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML was auto-generated from MATLAB code.
To make changes, update the MATLAB code and republish this document.
      --><title>MATLAB xUnit Test Framework: How RUNTESTS Searches for Test Cases</title><meta name="generator" content="MATLAB 7.11"><link rel="schema.DC" href="http://purl.org/dc/elements/1.1/"><meta name="DC.date" content="2010-11-19"><meta name="DC.source" content="exTestCaseSearching.m"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How RUNTESTS Searches for Test Cases</h1><p>When you call <tt>runtests</tt> with no input arguments:</p><pre> &gt;&gt; runtests</pre><p>it automatically searches for all the test cases in the current directory.  It looks for test cases in three types of M-files:</p><p>1. An M-file function whose name begins or ends with "test" or "Test" and that does not return an output argument.  Such a function is considered to be a single test case.</p><p>2. An M-file function whose name begins or ends with "test" or "Test" and that returns an output argument that is a test suite.  Such a function is considered to contain subfunction-style test cases.  Each subfunction whose name begins or ends with "test" or "Test" is a test case.</p><p>3. An M-file that defines a subclass of TestCase.  Each method beginning or ending with "test" or "Test" is a test case.</p><p><tt>runtests</tt> uses the <tt>TestSuite</tt> static methods <tt>fromName</tt> and <tt>fromPwd</tt> to automatically construct the test suites.</p><p>Here are a couple of examples.</p><p><tt>TestSuite.fromName</tt> takes an M-file name, determines what kind of test file it is, and returns a cell array of test case objects.</p><pre class="codeinput">cd <span class="string">examples_general</span>
test_suite_1 = TestSuite.fromName(<span class="string">'testSetupExample'</span>)
</pre><pre class="codeoutput">
test_suite_1 = 

  TestSuite handle

  Properties:
    TestComponents: {[1x1 FunctionHandleTestCase]  [1x1 FunctionHandleTestCase]}
              Name: 'testSetupExample'
          Location: [1x79 char]


</pre><p><tt>TestSuite.fromPwd</tt> returns a test suite based on all the test files in the current directory.</p><pre class="codeinput">test_suite_2 = TestSuite.fromPwd()
</pre><pre class="codeoutput">
test_suite_2 = 

  TestSuite handle

  Properties:
    TestComponents: {1x6 cell}
              Name: 'C:\Users\eddins\local-work\matlab_xunit\doc\examples_general'
          Location: 'C:\Users\eddins\local-work\matlab_xunit\doc\examples_general'


</pre><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2008-2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.11<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How RUNTESTS Searches for Test Cases
% When you call |runtests| with no input arguments:
%
%   >> runtests
%
% it automatically searches for all the test cases in the current directory.  It
% looks for test cases in three types of M-files:
%
% 1. An M-file function whose name begins or ends with "test" or "Test" and that does
% not return an output argument.  Such a function is considered to be a single
% test case. 
%
% 2. An M-file function whose name begins or ends with "test" or "Test" and that returns
% an output argument that is a test suite.  Such a function is considered to contain
% subfunction-style test cases.  Each subfunction whose name begins or ends with "test"
% or "Test" is a test case. 
%
% 3. An M-file that defines a subclass of TestCase.  Each method beginning or ending with
% "test" or "Test" is a test case.
%
% |runtests| uses the |TestSuite| static methods |fromName| and |fromPwd| to
% automatically construct the test suites.
%
% Here are a couple of examples.
%
% |TestSuite.fromName| takes an M-file name, determines what
% kind of test file it is, and returns a cell array of test case objects.

cd examples_general
test_suite_1 = TestSuite.fromName('testSetupExample')

%%
% |TestSuite.fromPwd| returns a test suite based on all the test files in the
% current directory.

test_suite_2 = TestSuite.fromPwd()

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.
##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exTestFixtures.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML is auto-generated from an M-file.
To make changes, update the M-file and republish this document.
      --><title>MATLAB xUnit Test Framework: How to Write Tests That Share Common Set-Up Code</title><meta name="generator" content="MATLAB 7.10"><meta name="date" content="2010-07-29"><meta name="m-file" content="exTestFixtures"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How to Write Tests That Share Common Set-Up Code</h1><p>Sometimes you want to write a set of test cases in which the same set of initialization steps is performed before each test case, or in which the same set of cleanup steps is performed after each test case.  This set of common <i>setup</i> and <i>teardown</i> code is called a <i>test fixture</i>.</p><p>In subfunction-based test files, you can add subfunctions whose names begin with "setup" and "teardown".  These functions will be called before and after every test-case subfunction is called.  If the setup function returns an output argument, that value is saved and passed to every test-case subfunction and also to the teardown function.</p><p>This example shows a setup function that creates a figure and returns its handle.  The figure handle is passed to each test-case subfunction.  The figure handle is also passed to the teardown function, which cleans up after each test case by deleting the figure.</p><pre class="codeinput">cd <span class="string">examples_general</span>
type <span class="string">testSetupExample</span>
</pre><pre class="codeoutput">
function test_suite = testSetupExample
initTestSuite;

function fh = setup
fh = figure;

function teardown(fh)
delete(fh);

function testColormapColumns(fh)
assertEqual(size(get(fh, 'Colormap'), 2), 3);

function testPointer(fh)
assertEqual(get(fh, 'Pointer'), 'arrow');

</pre><p>Run the tests using <tt>runtests</tt>.</p><pre class="codeinput">runtests <span class="string">testSetupExample</span>
</pre><pre class="codeoutput">Starting test run with 2 test cases.
..
PASSED in 0.095 seconds.
</pre><p>You might also want to see the <a href="./exTestCase.html">example on writing test cases by subclassing TestCase</a>.</p><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2008-2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.10<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How to Write Tests That Share Common Set-Up Code
% Sometimes you want to write a set of test cases in which the same
% set of initialization steps is performed before each test case, or
% in which the same set of cleanup steps is performed after each
% test case.  This set of common _setup_ and _teardown_ code is
% called a _test fixture_.
%
% In subfunction-based test files, you can add subfunctions whose
% names begin with "setup" and "teardown".  These functions will be
% called before and after every test-case subfunction is called.  If
% the setup function returns an output argument, that value is saved
% and passed to every test-case subfunction and also to the teardown
% function.
%
% This example shows a setup function that creates a figure and 
% returns its handle.  The figure handle is passed to each test-case
% subfunction.  The figure handle is also passed to the teardown
% function, which cleans up after each test case by deleting the
% figure.

cd examples_general
type testSetupExample

%%
% Run the tests using |runtests|.

runtests testSetupExample

%%
% You might also want to see the 
% <./exTestCase.html example on writing test cases by
% subclassing TestCase>.

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.
##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/html/exTolerance.html
================================================

<!DOCTYPE html
  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <!--
This HTML is auto-generated from an M-file.
To make changes, update the M-file and republish this document.
      --><title>MATLAB xUnit Test Framework: How to Test Using a Floating-Point Tolerance</title><meta name="generator" content="MATLAB 7.10"><meta name="date" content="2010-07-29"><meta name="m-file" content="exTolerance"><style type="text/css">

body {
  background-color: white;
  margin:10px;
}

h1 {
  color: #990000; 
  font-size: x-large;
}

h2 {
  color: #990000;
  font-size: medium;
}

/* Make the text shrink to fit narrow windows, but not stretch too far in 
wide windows. */ 
p,h1,h2,div.content div {
  max-width: 600px;
  /* Hack for IE6 */
  width: auto !important; width: 600px;
}

pre.codeinput {
  background: #EEEEEE;
  padding: 10px;
}
@media print {
  pre.codeinput {word-wrap:break-word; width:100%;}
} 

span.keyword {color: #0000FF}
span.comment {color: #228B22}
span.string {color: #A020F0}
span.untermstring {color: #B20000}
span.syscmd {color: #B28C00}

pre.codeoutput {
  color: #666666;
  padding: 10px;
}

pre.error {
  color: red;
}

p.footer {
  text-align: right;
  font-size: xx-small;
  font-weight: lighter;
  font-style: italic;
  color: gray;
}

  </style></head><body><div class="content"><h1><a href="../index.html">MATLAB xUnit Test Framework</a>: How to Test Using a Floating-Point Tolerance</h1><p>MATLAB performs arithmetic operations using the floating-point hardware instructions on your processor. Because almost all floating-point operations are subject to round-off error, arithmetic operations can sometimes produce surprising results. Here's an example.</p><pre class="codeinput">a = 1 + 0.1 + 0.1 + 0.1
</pre><pre class="codeoutput">
a =

    1.3000

</pre><pre class="codeinput">a == 1.3
</pre><pre class="codeoutput">
ans =

     0

</pre><p>So why doesn't <tt>a</tt> equal 1.3? Because 0.1, 1.3, and most other decimal fractions do not have exact representations in the binary floating-point number representation your computer uses.  The first line above is doing an approximate addition of 1 plus an approximation of 0.1, plus an approximation of 0.1, plus an approximation of 0.1.  The second line compares the result of all that with an approximation of 1.3.</p><p>If you subtract 1.3 from <tt>a</tt>, you can see that the computed result for <tt>a</tt> is <i>extremely close</i> to the floating-point approximation of 1.3, but it is not exactly the same.</p><pre class="codeinput">a - 1.3
</pre><pre class="codeoutput">
ans =

  2.2204e-016

</pre><p>As a general rule, when comparing the results of floating-point calculations for equality, it is necessary to use a tolerance value.  Two types of tolerance comparisons are commonly used: absolute tolerance and relative tolerance.  An absolute tolerance comparison of <i>a</i> and <i>b</i> looks like:</p><p><img src="exTolerance_eq55725.png" alt="$$|a-b| \leq T$$"></p><p>A relative tolerance comparison looks like:</p><p><img src="exTolerance_eq28823.png" alt="$$|a-b| \leq T\max(|a|,|b|) + T_f$$"></p><p>where <i>Tf</i> is called the <i>floor tolerance</i>. It acts as an absolute tolerance when <i>a</i> and <i>b</i> are very close to 0.</p><p>For example, suppose that <i>a</i> is 100, <i>b</i> is 101, and T is 0.1.  Then <i>a</i> and <i>b</i> would not be considered equal using an absolute tolerance, because 1 &gt; 0.1.  However, <i>a</i> and <i>b</i> would be considered equal using a relative tolerance, because they differ by only 1 part in 100.</p><p>MATLAB xUnit provides the utility assertion functions called <tt>assertElementsAlmostEqual</tt> and <tt>assertVectorAlmostEqual</tt>. These functions make it easy to write tests involving floating-point tolerances.</p><p><tt>assertElementsAlmostEqual(A,B)</tt> applies the tolerance test independently to every element of <tt>A</tt> and <tt>B</tt>.  The function uses a relative tolerance test by default, but you make it use an absolute tolerance test, or change the tolerance values used, by passing additional arguments to it.</p><p><tt>assertVectorsAlmostEqual(A,B)</tt> applies the tolerance test to the vectors <tt>A</tt> and <tt>B</tt> in the L2-norm sense.  For example, suppose <tt>A</tt> is <tt>[1 1e10</tt>], <tt>B</tt> is <tt>[2 1e10]</tt>, and the tolerance is 1e-8.  Then <tt>A</tt> and <tt>B</tt> would fail an elementwise relative tolerance comparison, because the relative difference between the first elements is 0.5.  However, they would pass a vector relative tolerance comparison, because the relative vector difference between <tt>A</tt> and <tt>B</tt> is only about 1 part in 1e10.</p><p>The <tt>examples_general</tt> directory contains a portion of a unit test for the <tt>sin</tt> function.  The output of <tt>sin</tt> can sometimes be a bit surprising because of floating-point issues.  For example:</p><pre class="codeinput">sin(pi)
</pre><pre class="codeoutput">
ans =

  1.2246e-016

</pre><p>That's very close but not exactly equal to 0.  Here's how the <tt>sin</tt> unit test uses <tt>assertElementsAlmostEqual</tt> to write the <tt>sin(pi)</tt> test with a minimum of fuss.</p><pre class="codeinput">cd <span class="string">examples_general</span>
type <span class="string">testSin</span>
</pre><pre class="codeoutput">
function testSin

assertElementsAlmostEqual(sin(pi), 0);
</pre><p>Run the test using <tt>runtests</tt>.</p><pre class="codeinput">runtests <span class="string">testSin</span>
</pre><pre class="codeoutput">Starting test run with 1 test case.
.
PASSED in 0.023 seconds.
</pre><p><a href="../index.html">Back to MATLAB xUnit Test Framework</a></p><p class="footer">Copyright 2008-2010 The MathWorks, Inc.<br>
      Published with MATLAB&reg; 7.10<br></p></div><!--
##### SOURCE BEGIN #####
%% <../index.html MATLAB xUnit Test Framework>: How to Test Using a Floating-Point Tolerance
% MATLAB performs arithmetic operations using the floating-point
% hardware instructions on your processor. Because
% almost all floating-point operations are subject to round-off
% error, arithmetic operations can sometimes produce surprising
% results. Here's an example.

a = 1 + 0.1 + 0.1 + 0.1

%%
a == 1.3

%%
% So why doesn't |a| equal 1.3? Because 0.1, 1.3, and most other
% decimal fractions do not have exact representations in the binary
% floating-point number representation your computer uses.  The
% first line above is doing an approximate addition of 1 plus an
% approximation of 0.1, plus an approximation of 0.1, plus an
% approximation of 0.1.  The second line compares the result of all
% that with an approximation of 1.3.
%
% If you subtract 1.3 from |a|, you can see that the computed result
% for |a| is _extremely close_ to the floating-point approximation
% of 1.3, but it is not exactly the same.

a - 1.3

%%
% As a general rule, when comparing the results of floating-point
% calculations for equality, it is necessary to use a tolerance
% value.  Two types of tolerance comparisons are commonly used: absolute
% tolerance and relative tolerance.  An absolute tolerance comparison of _a_ and _b_ 
% looks like:
%
% $$|a-b| \leq T$$
%
% A relative tolerance comparison looks like:
%
% $$|a-b| \leq T\max(|a|,|b|) + T_f$$
%
% where _Tf_ is called the _floor tolerance_. It acts as an absolute tolerance
% when _a_ and _b_ are very close to 0.
%
% For example, suppose that _a_ is 100, _b_ is 101, and T is 0.1.  Then _a_ and
% _b_ would not be considered equal using an absolute tolerance, because 1 >
% 0.1.  However, _a_ and _b_ would be considered equal using a relative
% tolerance, because they differ by only 1 part in 100.
%
% MATLAB xUnit provides the utility assertion functions called
% |assertElementsAlmostEqual| and |assertVectorAlmostEqual|. These functions
% make it easy to write tests involving floating-point tolerances.
%
% |assertElementsAlmostEqual(A,B)| applies the tolerance test independently to
% every element of |A| and |B|.  The function uses a relative tolerance test by
% default, but you make it use an absolute tolerance test, or change the
% tolerance values used, by passing additional arguments to it.
%
% |assertVectorsAlmostEqual(A,B)| applies the tolerance test to the vectors |A|
% and |B| in the L2-norm sense.  For example, suppose |A| is |[1 1e10|], |B|
% is |[2 1e10]|, and the tolerance is 1e-8.  Then |A| and |B| would fail an
% elementwise relative tolerance comparison, because the relative difference
% between the first elements is 0.5.  However, they would pass a vector relative
% tolerance comparison, because the relative vector difference between |A| and
% |B| is only about 1 part in 1e10.
%
% The |examples_general| directory contains a portion of a unit test for the
% |sin| function.  The output of |sin| can sometimes be a bit surprising because
% of floating-point issues.  For example:

sin(pi)

%%
% That's very close but not exactly equal to 0.  Here's how the
% |sin| unit test uses |assertElementsAlmostEqual| to write the |sin(pi)|
% test with a minimum of fuss.

cd examples_general
type testSin

%%
% Run the test using |runtests|.

runtests testSin

%%
% <../index.html Back to MATLAB xUnit Test Framework>

%%
% Copyright 2008-2010 The MathWorks, Inc.
##### SOURCE END #####
--></body></html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/index.html
================================================
<html>
    <head>
        <meta http-equiv="REFRESH" content="0;url=xunit_product_page.html">
    </head>
    <body>
    </body>
</html>


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/release-history.html
================================================
<html>
    <head>
        <title>MATLAB xUnit Release History</title>
    </head>
    
    <body>
        <h1>MATLAB xUnit Release History</h1>
        
        <h3>3.1 19-Nov-2010</h3>
        <ul>
            <li>
                Add -logfile option to runtests.
            </li>
            <li>
                Allow test names to be passed to runtests as a cell array of strings.
            </li>
            <li>
                Add test suite name and execution date to output of runtests.
            </li>
            <li>
                Added warning message if function-handle-based test file has a setup function
                that returns more than one output argument.
            </li>
            <li>
                Fix bug related to handling subfunction test files in packages.
            </li>
            <li>
                Fix TestSuite.fromPackageName to find tests inside subpackages.
            </li>
            <li>
                Correct text in exTestCaseSearching.m to show that test files and functions
                can begin or end with "test" or "Test".
            </li>
        </ul>
        
        <h3>3.0.2 30-Jul-2010</h3>
        <ul>
            <li>
                Fixed bug that caused TestCase subclasses in a test package to
                be executed twice in some versions of MATLAB.
            </li>
            <li>
                Documented the <tt>out = runsuite(...)</tt> syntax.
            </li>
            <li>
                Added home doc links to the various doc pages.
            </li>
        </ul>
        
        <h3>3.0.1 16-Jun-2010</h3>
        <p>
            Fixed handling of TestCase subclasses in a test package.
        </p>
        
        <h3>3.0 12-Jun-2010</h3>
        <ul>
            <li> Added capability to run tests stored inside packages. </li>
            <li> <tt>runtests</tt> errors if no test cases are found instead
            of silently passing. </li>
            <li> Accept function names that end in "test" or "Test" as test functions. </li>
            <li> In <tt>assertElementsAlmostEqual</tt> and <tt>assertVectorsAlmostEqual</tt>, change the 
                default <tt>floor_tol</tt> value to <tt>sqrt(eps)</tt> instead of <tt>eps</tt>. This makes the assertion
            a bit more forgiving when comparing numbers very close to 0.</li>
            <li> Added -verbose option to <tt>runtests</tt>. </li>
            <li> Fixed handling of message strings containing sprintf-style control characters
            in the assert*.m functions. </li>
        </ul>
                
        <h3>2.0.1 04-Aug-2009</h3>
        <p>
            Corrected errors in assertElementsAlmostEqual and assertVectorsAlmostEqual
            related to NaN and Inf inputs.  assertElementsAlmostEqual now properly
            asserts when one input is finite and the other is infinite.
            assertVectorsAlmostEqual now asserts whenever any input element is NaN
            or infinite.  The behavior of xunit.utils.compareFloats has been changed
            to match.
        </p>
        
        <h3>2.0 05-June-2009</h3>
        <ul>
            <li>
                The name of the package has been changed to "MATLAB xUnit Test Framework."
                The command-line test runner is now called <tt>runtests</tt>.  The utilities
                package is now called <tt>xunit.utils</tt>.  If you want to continue using
                the old command-line test runner (<tt>mtest</tt>) and utilities package
                (<tt>mtest.utils</tt>), then put the <tt>obsolete</tt> directory on the
                MATLAB path.
            </li>
            <li>
                The assertion functions <tt>assertEqual</tt>, <tt>assertElementsAlmostEqual</tt>,
                and <tt>assertVectorsAlmostEqual</tt> now print more information about the input
                values in the case of an assertion failure.
            </li>
            <li>
                A new assertion function, <tt>assertFilesEqual</tt>, has been added.
            </li>
            <li>
                The command-line test runner, <tt>runtests</tt>, now supports multiple directory
                names.
            </li>
            <li>
                The assertion function <tt>assertAlmostEqual</tt> has been deprecated. Use
                <tt>assertElementsAlmostEqual</tt> and <tt>assertVectorsAlmostEqual</tt>
                instead. If you want to continue using <tt>assertAlmostEqual</tt>, then
                put the <tt>obsolete</tt> directory on the MATLAB path.
            </li>
        </ul>
        
        <h3>1.1.3 20-May-2009</h3>
        <p>
            Remove the LICENSE.txt file because the open source BSD license is
            now supplied automatically by the MATLAB Central File Exchange.
            There are no functional changes in this version.
        </p>
        
        <h3>1.1.2 02-Apr-2009</h3>
        <p>
            This release fixes a bug with <tt>assertVectorsAlmostEqual</tt> when
            the caller provided a custom message. When the function errored out
            because the tolerance was exceeded, the function would issue a
            different error message than expected.
        </p>
        
        <h3>1.1.1 16-Mar-2009</h3>
        <p>
            This release fixes a problem when calling mtest with no input
            arguments. Previously, it was not limiting its test-case discovery
            to TestCase subclasses and ordinary M-files beginning with "test" 
            or "Test" as documented.
        </p>        
        
        <p>
            This release also integrates the MTEST documentation with the MATLAB
            Help Browser.
        </p>

        <h3>1.1 11-Mar-2009</h3>
        <p>
            This release adds new tools for performing floating-point
            comparisons.  Using the new assertion functions
            <tt>assertElementsAlmostEqual</tt> and 
            <tt>assertVectorsAlmostEqual</tt>, you can perform both
            absolute and relative tolerance comparisons, either elementwise
            or in a vector L2-norm fashion.
        </p>

        <p>
            The previous floating-point comparison function,
            <tt>assertAlmostEqual</tt>, is still available, but its
            use is discouraged.
        </p>
         
        <h3>1.0 30-Jan-2009</h3>
        <p>
            This release, the first to be posted on the MATLAB Central File
            Exchange, includes a refactoring of TestCase and TestSuite to use
            the composite design pattern.  Both classes now subclass the 
            abstract class TestComponent, and the individual items contained
            in a TestSuite object are TestComponent objects.  That means
            a TestSuite object can contain both TestSuite objects and 
            TestCase objects. 
        </p>
        
        <p>
            TestSuites are now built up hierarchically.  All the test cases 
            in a subfunction-based test M-file become a test suite, which in
            turn can be part of a test suite for an entire test directory.
        </p>
        
        <p>
            The mtest driver function can now take the name of a directory,
            in which case it will automatically discover and run all the test
            cases in that directory.
        </p>
        
        <p>
            The old TestRunObserver class has become the abstract TestRunMonitor
            class, with subclasses TestRunLogger and CommandWindowTestRunDisplay.
        </p>
        
        <p>
            TestCaseInDir has been modified to do a directory change before
            executing the test case. The new class TestCaseWithAddPath makes
            a temporary path addition before executing the test case.
        </p>
        
        <p>
            Subfunction-based test M-files written for one of the alpha versions
            of MTEST need to be revised so that the output variable name is 
            "test_suite", and so that the first line of code calls the script
            "initTestSuite".
        </p>
        
        <h3>0.9 12-Sep-2008</h3>
        <p>
            This release is an extensive update that provides simpler ways of 
            writing and running test cases.
        </p>
        <ul>
            <li>
                The new function mtest automatically finds and runs all test
                cases in the current directory.
            </li>
            <li>
                Test cases can be written as simple M-file functions.
            </li>
            <li>
                Multiple test cases can be defined in a single M-file by using
                subfunctions.
            </li>
            <li>
                Many new documentation examples have been provided, including a
                "Quick Start" example intended to enable users to write and
                run their first tests in just a few minutes.
            </li>
        </ul>
        
        
        <h3>0.8.1 17-Mar-2008</h3>
        <ul>
            <li>
                Some of the sample TestCase classes were missing classdef lines.  
                FIXED
            </li>
            <li>
                Now using dot method invocation syntax in examples and doc.
            </li>
            <li>
                Minor edits to HTML doc (munit_doc.html).
            </li>
            <li>
                Edited munit_doc.html by hand to clean up command-window links.
            </li>
        </ul>
        
        <h3>0.8 15-Mar-2008</h3>
        <ul>
            <li>
                Limited initial distribution for review and comment.
            </li>
        </ul>
    </body>
</html>


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/doc/xunit_product_page.html
================================================
<html>
    <head>
        <title>MATLAB&reg; xUnit Test Framework</title>
    </head>
    <body>
        <h1>MATLAB<sup>&reg;</sup> xUnit Test Framework</h1>

        <p>
            MATLAB xUnit is a unit test framework for MATLAB code.  
            MATLAB xUnit is designed to be easy to use for MATLAB users with a 
            wide range of experience. Users can write tests using 
            ordinary M-files that are very simple in structure.  
        </p>

        <p>
            <strong>Important Note:</strong> MATLAB xUnit 
            relies heavily on object-oriented language features
            introduced in MATLAB 7.6 (R2008a), which was released in March 2008.   
            MATLAB xUnit functions and classes will not work in earlier MATLAB releases.
            In addition, writing and running tests inside packages requires MATLAB
            7.7 (R008b) or later.
        </p>
        

        <h2>Installation</h2>
        
        <p>
        To use MATLAB xUnit in MATLAB, add the "xunit" folder (directory) to the MATLAB path.  See the <a href="http://www.mathworks.com/access/helpdesk/help/techdoc/matlab_env/f10-26235.html">
            MATLAB documentation for setting
        the search path</a>. (The "tests" directory contains the 
        framework's own self-tests, and the "architecture" directory contains
        information about the framework architecture; these directories are not
        needed for using MATLAB xUnit.)
        </p>
        
        <p>
        <strong>Note for users of earlier versions of MATLAB xUnit:</strong> If you have already
        written unit tests based on MTEST, an earlier version of MATLAB xUnit, you may also want 
        to add the "obsolete" folder to the MATLAB path. This folder contains the old command-line
        test runner, <tt>mtest</tt>, as well as the deprecated function <tt>assertAlmostEqual</tt>.
        </p>
        
        <h2>Getting Started</h2>
        
        <p>
            <a href="html/exQuickStart.html">Quick Start: How to Write and
            Run Tests</a>
        </p>
        
        <p>
            <a href="html/exSubfunctionTests.html">How to Put Multiple Test
            Cases in One M-file</a>
        </p>
        
        <p>
            <a href="html/exRunSpecificTest.html">How to Run a Specific Test</a>
        </p>
        
        <p>
            <a href="html/exRunTestsInADirectory.html">How to Run Tests in Specific Directories</a>
        </p>
        
        <p>
            <a href="html/exRunTestsInPackage.html">How to Run Tests in a Package</a>
        </p>
        
        <h2>Advanced Usage</h2>
        
        <p>
            <a href="html/exTolerance.html">How to Test Using a Floating-Point Tolerance</a>
        </p>
        
        <p>
            <a href="html/exException.html">How to Test an Error Message</a>
        </p>
        
        <p>
            <a href="html/exSilentRunning.html">How to Run Tests Silently and Query the Results</a>
        </p>
        
        <p>
            <a href="html/exTestFixtures.html">How to Write Tests That Share Common
            Set-Up Code</a>
        </p>
        
        <p>
            <a href="html/exTestCase.html">How to Write xUnit-Style Tests by 
            Subclassing TestCase</a>
        </p>
        
        <p>
            <a href="html/exTestCaseSearching.html">How MATLAB xUnit Searches for Test 
            Cases</a>
        </p>
        
        <h2>Key Functions and Classes</h2>
        
        <p>
            Main test driver function:
            <ul>
                <li>
                    <a href="matlab:doc runtests"><code>runtests</code></a>
                </li>
            </ul>
        </p>
        
        <p>
            Assertion functions you can use in your tests:
            <ul>
                <li>
                    <a href="matlab:doc assertTrue"><code>assertTrue</code></a>
                </li>
                <li>
                    <a href="matlab:doc assertFalse"><code>assertFalse</code></a>
                </li>
                <li>
                    <a href="matlab:doc assertEqual"><code>assertEqual</code></a>
                </li>
                <li>
                    <a href="matlab:doc assertFilesEqual"><code>assertFilesEqual</code></a>
                </li>
                <li>
                    <a href="matlab:doc assertElementsAlmostEqual"><code>assertElementsAlmostEqual</code></a>
                </li>
                <li>
                    <a href="matlab:doc assertVectorsAlmostEqual"><code>assertVectorsAlmostEqual</code></a>
                </li>
                <li>
                    <a href="matlab:doc assertExceptionThrown"><code>assertExceptionThrown</code></a>
                </li>
            </ul>
        </p>
        
        <p>
            The key xUnit-style classes that make everything work:
            <ul>
                <li>
                    <a href="matlab:doc TestComponent"><code>TestComponent</code></a>
                </li>
                <li>
                    <a href="matlab:doc TestSuite"><code>TestSuite</code></a>
                </li>                
                <li>
                    <a href="matlab:doc TestCase"><code>TestCase</code></a>
                </li>
                <li>
                    <a href="matlab:doc FunctionHandleTestCase"><code>FunctionHandleTestCase</code></a>
                </li>
                <li>
                    <a href="matlab:doc TestRunMonitor"><code>TestRunMonitor</code></a>
                </li>                
                <li>
                    <a href="matlab:doc TestRunLogger"><code>TestRunLogger</code></a>
                </li>
                <li>
                    <a href="matlab:doc CommandWindowTestRunDisplay"><code>CommandWindowTestRunDisplay</code></a>
                </li>
            </ul>
        </p>

        <p>
            <a href="release-history.html">Release History</a>
        </p>
        
        <p>
            <em>Copyright 2008-2010 The MathWorks, Inc.</em>
        </p>
        
    </body>
</html>

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/license.txt
================================================
Copyright (c) 2010, The MathWorks, Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without 
modification, are permitted provided that the following conditions are 
met:

    * Redistributions of source code must retain the above copyright 
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright 
      notice, this list of conditions and the following disclaimer in 
      the documentation and/or other materials provided with the distribution
    * Neither the name of the The MathWorks, Inc. nor the names 
      of its contributors may be used to endorse or promote products derived 
      from this software without specific prior written permission.
      
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/Contents.m
================================================
% UTILS Utility package for MTEST unit testing framework
%
% Array Comparison
%   compareFloats            - Compare floating-point arrays using tolerance
%
% Test Case Discovery Functions
%   isTestCaseSubclass       - True for name of TestCase subclass
%
% String Functions
%   containsRegexp           - True if string contains regular expression
%   isSetUpString            - True for string that looks like a setup function
%   isTearDownString         - True for string that looks like teardown function
%   isTestString             - True for string that looks like a test function
%
% Miscellaneous Functions
%   generateDoc              - Publish test scripts in mtest/doc
%   parseFloatAssertInputs   - Common input-parsing logic for several functions

% Undocumented Functions
%   isAlmostEqual        - Floating-point equality test using relative tolerance

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/compareFloats.m
================================================
function result = compareFloats(varargin)
%compareFloats Compare floating-point arrays using tolerance.
%   result = compareFloats(A, B, compare_type, tol_type, tol, floor_tol)
%   compares the floating-point arrays A and B using a tolerance.  compare_type
%   is either 'elementwise' or 'vector'.  tol_type is either 'relative' or
%   'absolute'.  tol and floor_tol are the scalar tolerance values.
%
%   There are four different tolerance tests used, depending on the comparison
%   type and the tolerance type:
%
%   1. Comparison type: 'elementwise'     Tolerance type: 'relative'
%
%       all( abs(A(:) - B(:)) <= tol * max(abs(A(:)), abs(B(:))) + floor_tol )
%
%   2. Comparison type: 'elementwise'     Tolerance type: 'absolute'
%
%       all( abs(A(:) - B(:) <= tol )
%
%   3. Comparison type: 'vector'          Tolerance type: 'relative'
%
%       norm(A(:) - B(:) <= tol * max(norm(A(:)), norm(B(:))) + floor_tol
%
%   4. Comparison type: 'vector'          Tolerance type: 'absolute'
%
%       norm(A(:) - B(:)) <= tol
%
%   Note that floor_tol is not used when the tolerance type is 'absolute'.
%
%   compare_type, tol_type, tol, and floor_tol are all optional inputs.  The
%   default value for compare_type is 'elementwise'.  The default value for
%   tol_type is 'relative'.  If both A and B are double, then the default value
%   for tol is sqrt(eps), and the default value for floor_tol is eps.  If either
%   A or B is single, then the default value for tol is sqrt(eps('single')), and
%   the default value for floor_tol is eps('single').
%
%   If A or B is complex, then the tolerance test is applied independently to
%   the real and imaginary parts.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

if nargin >= 3
    % compare_type specified.  Grab it and then use parseFloatAssertInputs to
    % process the remaining input arguments.
    compare_type = varargin{3};
    varargin(3) = [];
    if isempty(strcmp(compare_type, {'elementwise', 'vector'}))
        error('MTEST:compareFloats:unrecognizedCompareType', ...
            'COMPARE_TYPE must be ''elementwise'' or ''vector''.');
    end
else
    compare_type = 'elementwise';
end

params = mtest.utils.parseFloatAssertInputs(varargin{:});

A = params.A(:);
B = params.B(:);

[A, B] = preprocessNanInf(A, B);

switch compare_type
    case 'elementwise'
        magFcn = @abs;
        
    case 'vector'
        magFcn = @norm;
        
    otherwise
        error('MTEST:compareFloats:unrecognizedCompareType', ...
            'COMPARE_TYPE must be ''elementwise'' or ''vector''.');
end

switch params.ToleranceType
    case 'relative'
        compareFcn = @(A, B) magFcn(A - B) <= ...
            params.Tolerance * max(magFcn(A), magFcn(B)) + ...
            params.FloorTolerance;
        
    case 'absolute'
        compareFcn = @(A, B) magFcn(A - B) <= params.Tolerance;
        
    otherwise
        error('MTEST:compareFloats:unrecognizedToleranceType', ...
            'TOL_TYPE must be ''relative'' or ''absolute''.');
end

if isreal(A) && isreal(B)
    result = compareFcn(A, B);
else
    result = compareFcn(real(A), real(B)) & compareFcn(imag(A), imag(B));
end

result = all(result);

%===============================================================================
function [A, B] = preprocessNanInf(A, B)

make_zero = isnan(A) & isnan(B);
make_zero = make_zero | ((A == Inf) & (B == Inf));
make_zero = make_zero | ((A == -Inf) & (B == -Inf));

A(make_zero) = 0;
B(make_zero) = 0;


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/containsRegexp.m
================================================
function tf = containsRegexp(str, exp)
%containsRegexp True if string contains regular expression
%   TF = containsRegexp(str, exp) returns true if the string str contains the
%   regular expression exp.  If str is a cell array of strings, then
%   containsRegexp tests each string in the cell array, returning the results in
%   a logical array with the same size as str.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

% Convert to canonical input form: A cell array of strings.
if ~iscell(str)
   str = {str};
end

matches = regexp(str, exp);
tf = ~cellfun('isempty', matches);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/generateDoc.m
================================================
function generateDoc
%generateDoc Publish the example scripts in the doc directory

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

doc_dir = fullfile(fileparts(which('mtest')), '..', 'doc');
addpath(doc_dir);
cd(doc_dir)
mfiles = dir('*.m');
for k = 1:numel(mfiles)
    publish(mfiles(k).name);
    cd(doc_dir)
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/isAlmostEqual.m
================================================
function same = isAlmostEqual(A, B, reltol)
%isAlmostEqual Equality test using relative tolerance
%   same = isAlmostEqual(A, B, reltol), for two floating-point arrays A and B,
%   tests A and B for equality using the specified relative tolerance.
%   isAlmostEqual returns true if the following relationship is satisfied for
%   all values in A and B:
%
%       abs(A - B) ./ max(abs(A), abs(B)) <= reltol
%
%   same = isAlmostEqual(A, B) uses the following value for the relative
%   tolerance:
%
%       100 * max(eps(class(A)), eps(class(B)))
%
%   If either A or B is not a floating-point array, then isAlmostEqual returns
%   the result of isequal(A, B).

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

if ~isfloat(A) || ~isfloat(B)
    same = isequal(A, B);
    return
end

if nargin < 3
    reltol = 100 * max(eps(class(A)), eps(class(B)));
end

if ~isequal(size(A), size(B))
    same = false;
    return
end

A = A(:);
B = B(:);

delta = abs(A - B) ./ max(max(abs(A), abs(B)), 1);

% Some floating-point values require special handling.
delta((A == 0) & (B == 0)) = 0;
delta(isnan(A) & isnan(B)) = 0;
delta((A == Inf) & (B == Inf)) = 0;
delta((A == -Inf) & (B == -Inf)) = 0;

same = all(delta <= reltol);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/isSetUpString.m
================================================
function tf = isSetUpString(str)
%isSetUpString True if string looks like the name of a setup function
%   tf = isSetUpString(str) returns true if the string str looks like the name
%   of a setup function.  If str is a cell array of strings, then isSetUpString
%   tests each string in the cell array, returning the results in a logical
%   array with the same size as str.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

setup_exp = '^[sS]et[uU]p';
tf = mtest.utils.containsRegexp(str, setup_exp);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/isTearDownString.m
================================================
function tf = isTearDownString(str)
%isTearDownString True if string looks like the name of a teardown function
%   tf = isTearDownString(str) returns true if the string str looks like the
%   name of a teardown function.  If str is a cell array of strings, then
%   isTearDownString tests each string in the cell array, returning the results
%   in a logical array with the same size as str.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

setup_exp = '^[tT]ear[dD]own';
tf = mtest.utils.containsRegexp(str, setup_exp);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/isTestCaseSubclass.m
================================================
function tf = isTestCaseSubclass(name)
%isTestCaseSubclass True for name of a TestCase subclass
%   tf = isTestCaseSubclass(name) returns true if the string name is the name of
%   a TestCase subclass on the MATLAB path.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

tf = false;

class_meta = meta.class.fromName(name);
if isempty(class_meta)
    % Not the name of a class
    return;
end

if strcmp(class_meta.Name, 'TestCase')
    tf = true;
else
    tf = isMetaTestCaseSubclass(class_meta);
end

function tf = isMetaTestCaseSubclass(class_meta)

tf = false;

if strcmp(class_meta.Name, 'TestCase')
    tf = true;
else
    % Invoke function recursively on parent classes.
    super_classes = class_meta.SuperClasses;
    for k = 1:numel(super_classes)
        if isMetaTestCaseSubclass(super_classes{k})
            tf = true;
            break;
        end
    end
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/isTestString.m
================================================
function tf = isTestString(str)
%isTestString True if string looks like the name of a test
%   tf = isTestString(str) returns true if the string str looks like the name of
%   a test.  If str is a cell array of strings, then isTestString tests each
%   string in the cell array, returning the results in a logical array with the
%   same size as str.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

test_exp = '^[tT]est';
tf = mtest.utils.containsRegexp(str, test_exp);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/+mtest/+utils/parseFloatAssertInputs.m
================================================
function params = parseFloatAssertInputs(varargin)
%parseFloatAssertInputs Parse inputs for floating-point assertion functions.
%   params = parseFloatAssertInputs(varargin) parses the input arguments for
%   assertElementsAlmostEqual, assertVectorsAlmostEqual, and compareFcn. It
%   returns a parameter struct containing the fields:
%
%       A    B    ToleranceType    Tolerance    FloorTolerance

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

error(nargchk(2, 6, nargin, 'struct'));

params = struct('A', {[]}, 'B', {[]}, 'ToleranceType', {[]}, ...
    'Tolerance', {[]}, 'FloorTolerance', {[]}, 'Message', {''});

% The first two input arguments are always A and B.
params.A = varargin{1};
params.B = varargin{2};
varargin(1:2) = [];

% If the last argument is a message string, process it and remove it from the list.
if (numel(varargin) >= 1) && ischar(varargin{end}) && ...
        ~any(strcmp(varargin{end}, {'relative', 'absolute'}))
    params.Message = varargin{end};
    varargin(end) = [];
end

checkAB(params.A, params.B);

epsilon = max(eps(class(params.A)), eps(class(params.B)));

if numel(varargin) < 3
    % floor_tol not specified; set default.
    params.FloorTolerance = epsilon;
else
    params.FloorTolerance = varargin{3};
end

if numel(varargin) < 2
    % tol not specified; set default.
    params.Tolerance = sqrt(epsilon);
else
    params.Tolerance = varargin{2};
end

if numel(varargin) < 1
    % tol_type not specified; set default.
    params.ToleranceType = 'relative';
else
    params.ToleranceType = varargin{1};
end

%===============================================================================
function checkAB(A, B)
if ~isfloat(A) || ~isfloat(B)
    error('MTEST:parseFloatAssertInputs:inputsNotFloat', ...
        'A and B must be floating-point arrays.');
end

if ~isequal(size(A), size(B))
    error('MTEST:parseFloatAssertInputs:sizeMismatch', ...
        'A and B must have the same size.');
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/assertAlmostEqual.m
================================================
function assertAlmostEqual(A, B, reltol, message)
%assertEqual Assert that inputs are equal within relative tolerance
%   assertEqual(A, B, RELTOL) throws an exception of any of the values in A and
%   B are not equal within the specified tolerance.  NaN values are considered
%   to be equal.  A and B have to have the same class and sparsity to be
%   considered equal.
%
%   assertEqual(A, B) uses the following relative tolerance value:
%
%       100 * eps(class(A))
%
%   assertEqual(A, B, RELTOL, MESSAGE) uses the specified message string when
%   throwing the exception.  With this syntax, use RELTOL = [] to specify the
%   default relative tolerance.
%
%   Note that if either A or B are not floating-point arrays, then A and B are
%   compared using ISEQUALWITHEQUALNANS and the relative tolerance value is not
%   used. 
%
%   Examples
%   --------
%   % This call returns silently.
%   assertAlmostEqual(1.0, 1.0 + eps);
%
%   % This call throws an error.
%   assertAlmostEqual(1.0, 1.1);
%
%   See also assertEqual, mtest.utils.isAlmostEqual

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

if ~(issparse(A) == issparse(B))
   throw(MException('assertAlmostEqual:sparsityNotEqual', message));
end

if ~strcmp(class(A), class(B))
   throw(MException('assertAlmostEqual:classNotEqual', message));
end

if nargin < 3 || isempty(reltol)
    reltol = 100 * eps(class(A));
end

if nargin < 4
    message = sprintf('Inputs are not equal within relative tolerance: %g', ...
        reltol);
end

if ~mtest.utils.isAlmostEqual(A, B, reltol)
   throw(MException('assertAlmostEqual:tolExceeded', message));
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/mtest.m
================================================
function out = mtest(name)
%mtest Run unit tests
%   mtest runs all the test cases that can be found in the current directory and
%   summarizes the results in the Command Window.
%
%   Test cases can be found in the following places in the current directory:
%
%       * An M-file function whose name starts with "test" or "Test" that
%       returns no output arguments.
%
%       * An M-file function whose name starts with "test" or "Test" that
%       contains subfunction tests and uses the initTestSuite script to
%       return a TestSuite object.
%
%       * An M-file defining a subclass of TestCase.
%
%   mtest(mfilename) runs test cases found in the specified function or class
%   name. The function or class needs to be in the current directory or on the
%   MATLAB path.
%
%   mtest('mfilename:testname') runs the specific test case named 'testname'
%   found in the function or class 'name'.
%
%   mtest(dirname) runs all the test cases that can be found in the specified
%   directory.
%
%   Examples
%   --------
%   Find and run all the test cases in the current directory.
%
%       mtest
%
%   Find and run all the test cases contained in the M-file myfunc.
%
%       mtest myfunc
%
%   Find and run all the test cases contained in the TestCase subclass
%   MyTestCase.
%
%       mtest MyTestCase
%
%   Run the test case named 'testFeature' contained in the M-file myfunc.
%
%       mtest myfunc:testFeature
%
%   Run all the tests in a specific directory.
%
%       mtest c:\Work\MyProject\tests

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

if nargin < 1
    suite = TestSuite.fromPwd();
else
    suite = TestSuite.fromName(name);
    
    user_gave_a_directory_name = isempty(suite.TestComponents) && ...
        (exist(name, 'file') == 7);
    if user_gave_a_directory_name
        % Before changing directories, arrange to restore the current directory
        % safely.
        currentDir = pwd;
        c = onCleanup(@() cd(currentDir));
        
        cd(name);
        suite = TestSuite.fromPwd();
    end
end

did_pass = suite.run(CommandWindowTestRunDisplay());

if nargout > 0
    out = did_pass;
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/MtestTest.m
================================================
%TestSuiteTest Unit tests for mtest command-line test runner.

classdef MtestTest < TestCaseInDir

   methods
       
       function self = MtestTest(name)
           self = self@TestCaseInDir(name, ...
               fullfile(fileparts(which(mfilename)), 'cwd_test'));
       end
      
      function test_noInputArgs(self)
          [T, did_pass] = evalc('mtest');
          % The cwd_test directory contains some test cases that fail,
          % so output of mtest should be false.
          assertFalse(did_pass);
      end
      
      function test_oneInputArg(self)
          [T, did_pass] = evalc('mtest(''testFoobar'')');
          % cwd_test/testFoobar.m is supposed to pass.
          assertTrue(did_pass);
      end
      
      function test_oneInputArgWithFilter_passing(self)
          [T, did_pass] = evalc('mtest(''TestCaseSubclass:testA'')');
          assertTrue(did_pass);
      end
      
      function test_oneInputArgWithFilter_failing(self)
          [T, did_pass] = evalc('mtest(''TestCaseSubclass:testB'')');
          assertFalse(did_pass);
      end
      
   end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/cwd_test/TestCaseSubclass.m
================================================
%TestCaseSubclass TestCase subclass containing two passing tests

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

classdef TestCaseSubclass < TestCase
   methods
       function self = TestCaseSubclass(name)
           self = self@TestCase(name);
       end
       
       function testA(self)
       end
       
       function testB(self)
           % Intentionally fail this test case.
           assertFalse(true);
       end
   end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/cwd_test/testFoobar.m
================================================
function testFoobar
%testFoobar Passing M-file test

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/cwd_test/testSubfunctions.m
================================================
function test_cases = testSubfunctions
%testSubfunctions Contains two passing subfunction tests

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

findSubfunctionTests;

function testSub1

function testSub2


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/testAssertAlmostEqual.m
================================================
function test_suite = testAssertAlmostEqual
%testAssertAlmostEqual Unit tests for assertAlmostEqual

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testEqual
assertAlmostEqual(1, 1);

function testEqualWithThreeInputs
assertAlmostEqual(1, 1.1, 0.2);

function testEqualWithFourInputs
assertExceptionThrown(@() assertAlmostEqual(1, 2, 0.1, 'checkmate'), ...
    'assertAlmostEqual:tolExceeded');

function testEmptyRelTol
assertAlmostEqual(1, 1+10*eps, [], 'checkmate');

function testNotEqual
assertExceptionThrown(@() assertAlmostEqual(1, 1+1000*eps), ...
    'assertAlmostEqual:tolExceeded');

function testSingleEqual
assertAlmostEqual(single(1), single(1 + 10*eps('single')));

function testSingleNotEqual
assertExceptionThrown(@() assertAlmostEqual(single(1), ...
    single(1 + 1000*eps('single'))), 'assertAlmostEqual:tolExceeded');

function testZeros
assertAlmostEqual(0, 0);

function testSingleZeros
assertAlmostEqual(single(0), single(0));

function testSparse
assertAlmostEqual(sparse(1), sparse(1 + 10*eps));


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/obsolete/tests/testIsAlmostEqual.m
================================================
function test_suite = testIsAlmostEqual
%testIsAlmostEqual Unit tests for isAlmostEqual

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testExactlyEqual
A = [1 2; 3 4];
B = [1 2; 3 4];
assertTrue(mtest.utils.isAlmostEqual(A, B));

function testDefaultTolerance
assertTrue(mtest.utils.isAlmostEqual(1, 1+10*eps));
assertFalse(mtest.utils.isAlmostEqual(1, 1+1000*eps));

function testDefaultToleranceSingle
assertTrue(mtest.utils.isAlmostEqual(single(1), 1 + 10*eps('single')));
assertFalse(mtest.utils.isAlmostEqual(single(1), 1 + 1000*eps('single')));

function testSpecifiedTolerance
assertTrue(mtest.utils.isAlmostEqual(1, 1.09, 0.1));
assertFalse(mtest.utils.isAlmostEqual(1, 1.2, 0.1));

function testSpecialValues
A = [Inf, -Inf, NaN, 2.0];
B = [Inf, -Inf, NaN, 2.0+10*eps];
assertTrue(mtest.utils.isAlmostEqual(A, B));

C = [Inf, -Inf, NaN, 2.0];
D = [Inf, -Inf, 0, 2.0+10*eps];
assertFalse(mtest.utils.isAlmostEqual(C, D));

function testUint8
assertTrue(mtest.utils.isAlmostEqual(uint8(1), uint8(1)));
assertFalse(mtest.utils.isAlmostEqual(uint8(1), uint8(2)));

function testChar
assertTrue(mtest.utils.isAlmostEqual('foobar', 'foobar'));
assertFalse(mtest.utils.isAlmostEqual('foo', 'bar'));

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/readme
================================================
test


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/+subpkg/test_a_bit.m
================================================
function test_suite = test_a_bit
initTestSuite

function test_now

function test_later


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/A.m
================================================
% Class A is a TestCase subclass containing two test cases (test_a and test_b).
classdef A < TestCase
    
    methods
        function self = A(name)
            self = self@TestCase(name);
        end
        
        function test_a(self)
        end
        
        function test_b(self)
        end
    end
    
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/B.m
================================================
% Class B is not a TestCase subclass.

classdef B
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/FooTest.m
================================================
classdef FooTest < TestCase
    methods
        function object = FooTest(name)
            object = object@TestCase(name);
        end
        function test_sanity(object)
            assertEqual(0, 0)
        end
    end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/helper_that.m
================================================
% helper_that is not a test file.

function y = helper_that(x)
y = x;


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/test_that.m
================================================
% test_that.m is a subfunction test file.
function test_suite = test_this
initTestSuite

function test_the_other
a = magic(3);

function test_nifty
b = magic(5);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/+xunit/+mocktests/test_this.m
================================================
% test_this.m is a function-file test case.
function test_this

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/Readme.m
================================================
%   This directory contains the test suite for the mUnit test framework.  Before
%   running the test suite, do the following:
%
%   1. Make sure the mUnit test framework directory is on your path
%   2. Make sure the helper_classes subdirectory of the test directory is on
%      your path.
%   3. Make the test directory your current directory.
%
%   To run the test suite:
%
%       run(TestSuite())

%   Steven L. Eddins
%   Copyright 2008 The MathWorks

help Readme

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/RuntestsTest.m
================================================
%TestSuiteTest Unit tests for runtests command-line test runner.

classdef RuntestsTest < TestCaseInDir

   methods
       
       function self = RuntestsTest(name)
           self = self@TestCaseInDir(name, ...
               fullfile(fileparts(which(mfilename)), 'cwd_test'));
       end
      
      function test_noInputArgs(self)
          [T, did_pass] = evalc('runtests');
          % The cwd_test directory contains some test cases that fail,
          % so output of runtests should be false.
          assertFalse(did_pass);
      end
      
      function test_Verbose(self)
          [T, did_pass] = evalc('runtests(''-verbose'')');
          assertFalse(did_pass);
      end
      
      function test_oneInputArg(self)
          [T, did_pass] = evalc('runtests(''testFoobar'')');
          % cwd_test/testFoobar.m is supposed to pass.
          assertTrue(did_pass);
      end
      
      function test_verboseThenTestName(self)
          [T, did_pass] = evalc('runtests(''-verbose'', ''.'')');
          assertFalse(did_pass);
      end
      
      function test_testNameThenVerbose(self)
          [T, did_pass] = evalc('runtests(''.'', ''-verbose'')');
          assertFalse(did_pass);
      end
      
      function test_oneInputArgWithFilter_passing(self)
          [T, did_pass] = evalc('runtests(''TestCaseSubclass:testA'')');
          assertTrue(did_pass);
      end
      
      function test_oneInputArgWithFilter_failing(self)
          [T, did_pass] = evalc('runtests(''TestCaseSubclass:testB'')');
          assertFalse(did_pass);
      end
      
      function test_oneDirname(self)
          [T, did_pass] = evalc('runtests(''../dir1'')');
          assertTrue(did_pass);
          
          [T, did_pass] = evalc('runtests(''../dir2'')');
          assertFalse(did_pass);
      end
      
      function test_twoDirnames(self)
          [T, did_pass] = evalc('runtests(''../dir1'', ''../dir2'')');
          assertFalse(did_pass);
      end
      
      function test_packageName(self)
          [T, did_pass] = evalc('runtests(''xunit.mocktests'')');
          assertTrue(did_pass);
      end
      
      function test_noTestCasesFound(self)
          assertExceptionThrown(@() runtests('no_such_test'), ...
              'xunit:runtests:noTestCasesFound');
      end
      
      function test_optionStringsIgnored(self)
          % Option string at beginning.
          [T, did_pass] = evalc('runtests(''-bogus'', ''../dir1'')');
          assertTrue(did_pass);
          
          % Option string at end.
          [T, did_pass] = evalc('runtests(''../dir2'', ''-bogus'')');
          assertFalse(did_pass);
      end
      
      function test_logfile(self)
          name = tempname;
          command = sprintf('runtests(''../dir1'', ''-logfile'', ''%s'')', name);
          [T, did_pass] = evalc(command);
          assertTrue(did_pass);
          assertTrue(exist(name, 'file') ~= 0);
          delete(name);
      end
      
      function test_logfileWithNoFile(self)
          assertExceptionThrown(@() runtests('../dir1', '-logfile'), ...
              'xunit:runtests:MissingLogfile');
      end
      
      function test_logfileWithNoWritePermission(self)
          assertExceptionThrown(@() runtests('../dir1', '-logfile', ...
              'C:\dir__does__not__exist\foobar.txt'), ...
              'xunit:runtests:FileOpenFailed');
      end
      
      function test_namesInCellArray(self)
          [T, did_pass] = evalc('runtests({''TestCaseSubclass:testA''})');
          assertTrue(did_pass);
          
          [T, did_pass] = evalc('runtests({''TestCaseSubclass:testA'', ''TestCaseSubclass:testB''})');
          assertFalse(did_pass);
      end
      
   end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/TestCaseTest.m
================================================
%TestCaseTest Unit tests for the TestCase class

%   Steven L. Eddins
%   Copyright The MathWorks 2008

classdef TestCaseTest < TestCaseInDir

    methods
        function self = TestCaseTest(name)
            self = self@TestCaseInDir(name, ...
                fullfile(fileparts(which(mfilename)), 'helper_classes'));
        end

        function testConstructor(self)
            % Exercise the constructor.  Verify that the Name and Location
            % properties are set correctly.
            tc = TwoPassingTests('testMethod1');
            assertEqual(tc.Name, 'testMethod1');
            assertEqual(tc.Location, which('TwoPassingTests'));
        end

        function testPassingTests(self)
            % Verify that the expected observer notifications are received in
            % the proper order.
            logger = TestRunLogger();
            TestSuite('TwoPassingTests').run(logger);
            assertTrue(isequal(logger.Log, ...
                {'TestRunStarted', 'TestComponentStarted', ...
                'TestComponentStarted', 'TestComponentFinished', ...
                'TestComponentStarted', 'TestComponentFinished', ...
                'TestComponentFinished', 'TestRunFinished'}));
        end

        function testFixtureCalls(self)
            % Verify that fixture calls are made in the proper order.
            tc = LoggingTestCase('testMethod');
            tc.run(TestRunLogger());
            assertTrue(isequal(tc.log, {'setUp', 'testMethod', 'tearDown'}));
        end

        function testTestFailure(self)
            % Verify that a test failure is recorded.
            logger = TestRunLogger();
            TestSuite('FailingTestCase').run(logger);
            assertTrue(isequal(logger.NumFailures, 1));
        end

        function testTestError(self)
            % Verify that a test error is recorded.
            logger = TestRunLogger();
            TestSuite('BadFixture').run(logger);
            assertTrue(isequal(logger.NumErrors, 1));
        end

    end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/TestCaseWithAddPathTest.m
================================================
%TestCaseTest Unit tests for the TestCaseWithAddPath class

%   Steven L. Eddins
%   Copyright The MathWorks 2008

classdef TestCaseWithAddPathTest < TestCaseWithAddPath

    methods
        function self = TestCaseWithAddPathTest(name)
            self = self@TestCaseWithAddPath(name, ...
                fullfile(fileparts(which(mfilename)), 'helper_classes'));
        end

        function testPath(self)
            % Verify that a function in helper_classes is seen on the path.
            assertEqual(exist('testFunctionHandlesA', 'file'), 2);
        end
        
        function testRunTestOnPath(self)
            % Verify that we can make a test suite and run it using a file
            % in the new path directory.
            logger = TestRunLogger();
            suite = TestSuite('testFunctionHandlesA');
            did_pass = suite.run(logger);
            assertTrue(did_pass);
        end
    end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/TestFuncHandleTests.m
================================================
%TestFuncHandleTests TeseCase class used to test function-handle-based tests

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

classdef TestFuncHandleTests < TestCaseInDir

    methods
        function self = TestFuncHandleTests(name)
            self = self@TestCaseInDir(name, ...
                fullfile(fileparts(which(mfilename)), 'helper_classes'));
        end
        
        function testSuiteNameAndLocation(self)
            test_suite = testFunctionHandlesA();
            assertEqual(test_suite.Name, 'testFunctionHandlesA');
            assertEqual(test_suite.Location, which('testFunctionHandlesA'));
        end

        function testOutputs(self)
            % Exercise the function-handle test M-file. Output should be a
            % two-element cell array of TestCase objects.
            test_suite = testFunctionHandlesA();
            assertTrue(isa(test_suite, 'TestSuite'));
            assertEqual(test_suite.numTestCases(), 2);
        end

        function testCaseNames(self)
            % Verify that Name property of test cases is set properly.
            test_suite = testFunctionHandlesA();
            assertEqual(test_suite.TestComponents{1}.Name, 'testA');
            assertEqual(test_suite.TestComponents{2}.Name, 'testB');
        end

        function testCaseLocation(self)
            % Verify that the Location field of test cases is set properly.
            test_suite = testFunctionHandlesA();
            expected_location = which('testFunctionHandlesA');
            assertEqual(test_suite.TestComponents{1}.Location, expected_location);
            assertEqual(test_suite.TestComponents{2}.Location, expected_location);
        end

        function testPassingTests(self)
            % Verify that the expected observer notifications are received in
            % the proper order.
            logger = TestRunLogger();
            suite = testFunctionHandlesA;
            suite.run(logger);
            assertEqual(logger.Log, ...
                {'TestRunStarted', 'TestComponentStarted', ...
                'TestComponentStarted', 'TestComponentFinished', ...
                'TestComponentStarted', 'TestComponentFinished', ...
                'TestComponentFinished', 'TestRunFinished'});
        end

        function testTestFixture(self)
            % Verify that test fixture functions that use testData run without
            % error.  (See test assertions in testFunctionHandlesB.)
            logger = TestRunLogger();
            suite = testFunctionHandlesB;
            suite.run(logger);
            assertEqual(logger.NumFailures, 0);
            assertEqual(logger.NumErrors, 0);
        end

        function testTestFixtureError(self)
            % Verify that an exception thrown in a test fixture is recorded as a
            % test error.
            logger = TestRunLogger();
            suite = testFunctionHandlesC();
            suite.run(logger);
            assertEqual(logger.NumErrors, 2);
        end

        function testFixtureNoTestData(self)
            % Verify that when setupFcn returns no output argument, the test
            % functions and the teardown function are called with no inputs.
            % (See test assertions in testFunctionHandlesD.)
            logger = TestRunLogger();
            suite = testFunctionHandlesD();
            suite.run(logger);
            assertEqual(logger.NumFailures, 0);
            assertEqual(logger.NumErrors, 0);
        end
        
        function testFailingTest(self)
            % Verify that the expected observer notifications are received in
            % the proper order for a failing test.
            logger = TestRunLogger();
            suite = testFunctionHandlesE();
            suite.run(logger);
            assertEqual(logger.Log, ...
                {'TestRunStarted', 'TestComponentStarted', ...
                'TestComponentStarted', 'TestCaseFailure', 'TestComponentFinished', ...
                'TestComponentFinished', 'TestRunFinished'});
        end
        
        function testTeardownFcnButNoSetupFcn(self)
            % Verify that a test file works if it has a teardown function but no
            % setup function.
            logger = TestRunLogger();
            suite = testFunctionHandlesTeardownNoSetup();
            suite.run(logger);
            
            assertEqual(logger.NumTestCases, 1);
            assertEqual(logger.NumFailures, 0);
            assertEqual(logger.NumErrors, 0);
        end

    end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/TestRunLoggerTest.m
================================================
%TestSuiteTest Unit tests for TestSuite class

classdef TestRunLoggerTest < TestCaseInDir

   methods
      function self = TestRunLoggerTest(name)
         self = self@TestCaseInDir(name, ...
             fullfile(fileparts(which(mfilename)), 'helper_classes'));
      end
      
      function testTwoPassingTests(self)
         logger = TestRunLogger;
         suite = TestSuite('TwoPassingTests');
         suite.run(logger);
         
         assertEqual(logger.Log, ...
             {'TestRunStarted', ...
             'TestComponentStarted', ...
             'TestComponentStarted', 'TestComponentFinished', ...
             'TestComponentStarted', 'TestComponentFinished', ...
             'TestComponentFinished', ...
             'TestRunFinished'});
         
         assertEqual(logger.NumTestCases, 2);
         assertEqual(logger.NumFailures, 0);
         assertEqual(logger.NumErrors, 0);
         assertTrue(isempty(logger.Faults));
      end
      
      function testFailingTestCase(self)
         logger = TestRunLogger;
         suite = TestSuite('FailingTestCase');
         suite.run(logger);
         
         assertEqual(logger.Log, ...
             {'TestRunStarted', ...
             'TestComponentStarted', ...
             'TestComponentStarted', 'TestCaseFailure', 'TestComponentFinished', ...
             'TestComponentFinished', ...
             'TestRunFinished'});
         
         assertEqual(logger.NumTestCases, 1);
         assertEqual(logger.NumFailures, 1);
         assertEqual(logger.NumErrors, 0);
         assertEqual(numel(logger.Faults), 1);
         assertEqual(logger.Faults(1).Type, 'failure');
      end
      
   end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/TestSuiteTest.m
================================================
%TestSuiteTest Unit tests for TestSuite class

classdef TestSuiteTest < TestCaseInDir

   methods
      function self = TestSuiteTest(name)
         self = self@TestCaseInDir(name, ...
             fullfile(fileparts(which(mfilename)), 'helper_classes'));
      end
      
      function testClassNameIn(self)
         % Syntax check: TestSuite('classname')
         suite = TestSuite('TwoPassingTests');
         assertTrue(numel(suite.TestComponents) == 2, ...
            'TestSuite finds two test methods given class name');
      end
      
      function testCurrentDirectory(self)
         % See that the no-input syntax executes without error.
         % Not sure how to test this more effectively.
         suite = TestSuite();
      end
      
      function testNoTestMethods(self)
         % TestCase class containing no test methods
         suite = TestSuite('NoTestMethods');
         assertTrue(numel(suite.TestComponents) == 0, ...
            'No test cases when class contains no test methods');
      end
      
      function test_fromTestCaseClassName(self)
          suite = TestSuite.fromTestCaseClassName('TwoPassingTests');
          assertTrue(numel(suite.TestComponents) == 2);
          assertTrue(ismember(suite.TestComponents{1}.Name, ...
              {'testMethod1', 'testMethod2'}));
          assertTrue(ismember(suite.TestComponents{2}.Name, ...
              {'testMethod1', 'testMethod2'}));     
      end
      
      function test_fromTestCaseClassName_badclass(self)
          assertExceptionThrown(@() TestSuite.fromTestCaseClassName('atan2'), ...
              'xunit:fromTestCaseClassName');
      end
      
      function test_fromName_TestCaseSubclass(self)
          suite = TestSuite.fromName('TwoPassingTests');
          assertTrue(numel(suite.TestComponents) == 2);
          assertEqual(suite.Name, 'TwoPassingTests');
      end
      
      function test_fromName_notTestCaseSubclass(self)
          suite = TestSuite.fromName('TestRunMonitor');
          assertTrue(isempty(suite.TestComponents));
          assertEqual(suite.Name, 'TestRunMonitor');
      end
      
      function test_fromName_simpleTest(self)
          suite = TestSuite.fromName('testSimple');
          assertEqual(numel(suite.TestComponents), 1);
          assertEqual(suite.Name, 'testSimple');
          assertEqual(suite.Location, which('testSimple'));
      end
      
      function test_fromName_subfunctions(self)
          suite = TestSuite.fromName('testFunctionHandlesA');
          assertEqual(numel(suite.TestComponents), 2);
          assertEqual(suite.Name, 'testFunctionHandlesA');
          assertEqual(suite.Location, which('testFunctionHandlesA'));
      end
      
      function test_fromName_bogus_name(self)
          suite = TestSuite.fromName('atan2');
          assertTrue(isempty(suite.TestComponents));
          assertEqual(suite.Name, 'atan2');
      end
      
      function test_fromName_with_filter_string(self)
          suite = TestSuite.fromName('testFunctionHandlesA:testA');
          assertEqual(numel(suite.TestComponents), 1);
          assertEqual(suite.TestComponents{1}.Name, 'testA');
          assertEqual(suite.Name, 'testFunctionHandlesA');
      end
      
      function test_fromName_with_nonmatching_filter_string(self)
          suite = TestSuite.fromName('testFunctionHandlesA:foobar');
          assertTrue(isempty(suite.TestComponents));
      end
      
      function test_fromName_with_dirname(self)
         xunit_test_dir = which('TestSuiteTest');
         xunit_test_dir = fileparts(xunit_test_dir);
         cwd_test_dir = fullfile(xunit_test_dir, 'cwd_test');
         suite = TestSuite.fromName(cwd_test_dir);
         
         assertEqual(suite.Name, 'cwd_test');
         assertEqual(suite.Location, cwd_test_dir);
         assertEqual(numel(suite.TestComponents), 3);
      end
      
      function test_fromPwd(self)
          % Verify that the fromPwd method returns a nonempty TestSuite object
          % from the helper_classes directory, with the correct number of
          % test components.
          suite = TestSuite.fromPwd();
          assertTrue(isa(suite, 'TestSuite'));
          assertTrue(numel(suite.TestComponents) == 16);
      end
      
   end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/ThrowsExceptionTest.m
================================================
classdef ThrowsExceptionTest < TestCaseInDir
    
    methods
        function self = ThrowsExceptionTest(methodName)
            self = self@TestCaseInDir(methodName, ...
                fullfile(fileparts(which(mfilename)), 'helper_classes'));
        end
        
        function testPassingTest(self)
            logger = TestRunLogger();
            TestSuite('PassingExceptionTest').run(logger);
            assertTrue((logger.NumTestCases == 1) && ...
                (logger.NumFailures == 0) && ...
                (logger.NumErrors == 0), ...
                'Passing exception test should have no failures or errors');
        end
        
        function testNoExceptionTest(self)
            logger = TestRunLogger();
            TestSuite('ExceptionNotThrownTest').run(logger);
            assertTrue(strcmp(logger.Faults(1).Exception.identifier, ...
                'assertExceptionThrown:noException'), ...
                'Fault exception should be throwsException:noException');
        end
        
        function testWrongExceptionTest(self)
            logger = TestRunLogger();
            TestSuite('WrongExceptionThrownTest').run(logger);
            assertTrue(strcmp(logger.Faults(1).Exception.identifier, ...
                'assertExceptionThrown:wrongException'), ...
                'Fault exception should be throwsException:wrongException');
        end
        
    end
    
    
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/cwd_test/TestCaseSubclass.m
================================================
%TestCaseSubclass TestCase subclass containing two passing tests

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

classdef TestCaseSubclass < TestCase
   methods
       function self = TestCaseSubclass(name)
           self = self@TestCase(name);
       end
       
       function testA(self)
       end
       
       function testB(self)
           % Intentionally fail this test case.
           assertFalse(true);
       end
   end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/cwd_test/testFoobar.m
================================================
function testFoobar
%testFoobar Passing M-file test

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/cwd_test/testSubfunctions.m
================================================
function test_suite = testSubfunctions
%testSubfunctions Contains two passing subfunction tests

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testSub1

function testSub2


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/dir1/test_thatPasses.m
================================================
function test_suite = test_thatPasses
initTestSuite;

function test_case
assertTrue(true);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/dir2/test_thatFails.m
================================================
function test_suite = test_thatFails
initTestSuite;

function test_case
assertTrue(false);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/empty_file
================================================


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/BadFixture.m
================================================
classdef BadFixture < TestCase
    
    methods
        function self = BadFixture(name)
            self = self@TestCase(name);
        end
        
        function setUp(self)
            throw(MException('setUpError:BadFixture', ...
                'BadFixture setUp method always throws exception'));
        end
        
        function testMethod(self)
        end
    end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/Contents.m
================================================
% Helper Classes for mUnit Test Suite
%
% TestCase Subclasses
%   BadFixture - Contains setUp method that throws exception
%   FailingTestCase - Contains one test method that throws exception
%   LoggingTestCase - Logs calls to setUp, tearDown, and test method
%   NoTestMethods - TestCase subclass that contains no test methods
%   TestsToBeDiscovered - Used in TestSuiteTest
%   TwoPassingTests - Contains two passing test methods

% Steven L. Eddins
% Copyright 2008 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/ExceptionNotThrownTest.m
================================================
classdef ExceptionNotThrownTest < TestCase
   methods
      function self = ExceptionNotThrownTest(methodName)
         self = self@TestCase(methodName);
      end
      
      function testThrowsException(self)
         f = @() [];
         assertExceptionThrown(f, 'a:b:c');
      end
   end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/FailingTestCase.m
================================================
% FailingTestCase
% Utility class used by unit tests.

% Steven L. Eddins
% Copyright 2008 The MathWorks, Inc.

classdef FailingTestCase < TestCase

   methods
      function self = FailingTestCase(name)
         self = self@TestCase(name);
      end

      function testFail(self)
         throw(MException('testFail:FailingTestCase', ...
            'testFail always fails'));
      end
   end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/LoggingTestCase.m
================================================
% LoggingTestCase
% Utility class used by unit tests.

% Steven L. Eddins
% Copyright 2008 The MathWorks, Inc.

classdef LoggingTestCase < TestCase
    
    properties
        log = {};
    end
    
    methods
        function self = LoggingTestCase(name)
            self = self@TestCase(name);
        end
        
        function setUp(self)
            self.log{end + 1} = 'setUp';
        end
        
        function tearDown(self)
            self.log{end + 1} = 'tearDown';
        end
        
        function testMethod(self)
            self.log{end + 1} = 'testMethod';
        end
        
        function testBrokenMethod(self)
            throw(MException('brokenMethod:WasRun', ...
                'Call to testBrokenMethod always throws exception'));
        end
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/NoTestMethods.m
================================================
classdef NoTestMethods < TestCase
   methods
      function self = NoTestMethods(name)
         self = self@TestCase(name);
      end
   end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/PassingExceptionTest.m
================================================
classdef PassingExceptionTest < TestCase
   methods
      function self = PassingExceptionTest(methodName)
         self = self@TestCase(methodName);
      end
      
      function testThrowsException(self)
         f = @() error('a:b:c', 'error message');
         assertExceptionThrown(f, 'a:b:c');
      end
   end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/TestsToBeDiscovered.m
================================================
classdef TestsToBeDiscovered < TestCase

   methods
      function self = TestsToBeDiscovered(name)
         self = self@TestCase(name);
      end
      
      function testMethodA
      end
      
      function testMethodB
      end
      
      function notATestMethod
      end

   end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/TwoPassingTests.m
================================================
classdef TwoPassingTests < TestCase
    
    methods
        function self = TwoPassingTests(name)
            self = self@TestCase(name);
        end
                
        function testMethod1(self)
        end
        
        function testMethod2(self)
        end
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/WrongExceptionThrownTest.m
================================================
classdef WrongExceptionThrownTest < TestCase
   methods
      function self = WrongExceptionThrownTest(methodName)
         self = self@TestCase(methodName);
      end
      
      function testThrowsException(self)
         f = @() error('d:e:f', 'message');
         assertExceptionThrown(f, 'a:b:c');
      end
   end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/notTestString.m
================================================
function suite = notTestString
% This function exists to help test that the TestSuite.fromPwd() method does not
% pick up function-handle test files that do not match the naming convention.
initTestSuite;

function testA

function testB


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesA.m
================================================
function test_suite = testFunctionHandlesA
%testFunctionHandlesA Test file used by TestFunctionHandlesTest
%   Contains two passing tests.

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testA

function testB


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesB.m
================================================
function test_suite = testFunctionHandlesB
%testFunctionHandlesB Test file used by TestFunctionHandlesTest
%   Contains two passing tests that use a test fixture.

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testData = setUpFcn
testData = 5;

function testA(testData)
assertEqual(testData, 5);

function testB(testData)
assertEqual(testData, 5);

function tearDownFcn(testData)
assertEqual(testData, 5);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesC.m
================================================
function test_suite = testFunctionHandlesC
%testFunctionHandlesC Test file used by TestFunctionHandlesTest
%   Contains two passing tests that use a test fixture containing an intentional
%   error.

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testData = setUpFcn
testData = 5;

function testA(testData)
assertEqual(testData, 5);

function testB(testData)
assertEqual(testData, 5);

function tearDownFcn(testData)
% This assertion is expected to error.
assertEqual(testData, 20);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesD.m
================================================
function test_suite = testFunctionHandlesD
%testFunctionHandlesD Test file used by TestFunctionHandlesTest
%   Contains two passing tests that use a test fixture with no test data.

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function setUpFcn


function testA(varargin)
assertTrue(isempty(varargin));

function testB(varargin)
assertTrue(isempty(varargin));

function tearDownFcn(varargin)
assertTrue(isempty(varargin));


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesE.m
================================================
function test_suite = testFunctionHandlesA
%testFunctionHandlesE Test file used by TestFunctionHandlesTest
%   Contains one failing test.

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testA
error('testFunctionHandlesA:expectedFailure', 'Bogus message');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testFunctionHandlesTeardownNoSetup.m
================================================
function suite = testFunctionHandlesTeardownNoSetup
% Verify that test file works if it has a teardown function but no setup
% function.
initTestSuite;

function teardown
close all

function test_normalCase
assertEqual(1, 1);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/helper_classes/testSimple.m
================================================
function testSimple
%testSimple Simple M-file test that passes

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testAssertEqual.m
================================================
function test_suite = testAssertEqual
%testAssertEqual Unit tests for assertEqual

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testAssertEqualHappyCase
assertEqual(5, 5);

function testAssertEqualWithThreeInputs
assertEqual(5, 5, 'Scandinavian Defense');

function testAssertEqualHappyCaseString
assertEqual('foobar', 'foobar');

function testAssertEqualHappyCaseMatrix
assertEqual(magic(3), magic(3))

function testInfAndInf
assertEqual(Inf, Inf);

function testMinusInfAndMinusInf
assertEqual(-Inf, -Inf);

function testOppositeSignInfs
assertExceptionThrown(@() assertEqual(-Inf, Inf), 'assertEqual:nonEqual');

function testFiniteAndInf
assertExceptionThrown(@() assertEqual(1, Inf), 'assertEqual:nonEqual');

function testFiniteAndNaN
assertExceptionThrown(@() assertEqual(1, NaN), 'assertEqual:nonEqual');

function testInfiniteAndNaN
assertExceptionThrown(@() assertEqual(Inf, NaN), 'assertEqual:nonEqual');

function testAssertEqualNotEqual
assertExceptionThrown(@() assertEqual(5, 4), 'assertEqual:nonEqual');

function testAssertEqualSparsity
assertExceptionThrown(@() assertEqual(5, sparse(5)), 'assertEqual:sparsityNotEqual');

function testAssertEqualNans
assertEqual([1 NaN 2], [1 NaN 2]);

function testAssertEqualClass
assertExceptionThrown(@() assertEqual(5, uint8(5)), 'assertEqual:classNotEqual');

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testAssertExceptionThrown.m
================================================
function test_suite = testAssertExceptionThrown
%testAssertExceptionThrown Unit tests for assertExceptionThrown

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function test_happyCase
assertExceptionThrown(...
    @() error('MyProd:MyFun:MyId', 'my message'), 'MyProd:MyFun:MyId');

function test_wrongException
assertExceptionThrown(@() assertExceptionThrown(...
    @() error('MyProd:MyFun:MyId', 'my message'), ...
    'MyProd:MyFun:DifferentId'), 'assertExceptionThrown:wrongException');

function test_noException
assertExceptionThrown(@() assertExceptionThrown(@() sin(pi), 'foobar'), ...
    'assertExceptionThrown:noException');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testAssertFalse.m
================================================
function test_suite = testAssertFalse
%testAssertFalse Unit tests for assertFalse

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testAssertFalseHappyCase
assertFalse(false);

function testAssertFalseHappyCaseWithTwoArgs
assertFalse(false, '1.e4 e5 2.Nf3 Nc6');

function testAssertFalseFailed
% Verify exception when false is passed to assertFalse.
assertExceptionThrown(@() assertFalse(true), 'assertFalse:trueCondition');

function testAssertFalseNonscalar
% Verify that assertFalse doesn't like nonscalar input.
assertExceptionThrown(@() assertFalse(logical([0 0])), 'assertFalse:invalidCondition');

function testAssertFalseNonlogical
% Verify that assertFalse doesn't like nonlogical input.
assertExceptionThrown(@() assertFalse(0), 'assertFalse:invalidCondition');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testAssertTrue.m
================================================
function test_suite = testAssertTrue
%testAssertTrue Unit tests for assertTrue

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testAssertTrueHappyCase
assertTrue(true);

function testAssertTrueHappyCaseWithTwoArgs
assertTrue(true, '1.e4 e5 2.Nf3 Nc6');

function testAssertTrueFailed
% Verify exception when false is passed to assertTrue.
assertExceptionThrown(@() assertTrue(false), 'assertTrue:falseCondition');

function testAssertTrueNonscalar
% Verify that assertTrue doesn't like nonscalar input.
assertExceptionThrown(@() assertTrue(logical([1 1])), 'assertTrue:invalidCondition');

function testAssertTrueNonlogical
% Verify that assertTrue doesn't like nonlogical input.
assertExceptionThrown(@() assertTrue(5), 'assertTrue:invalidCondition');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testContainsRegexp.m
================================================
function test_suite = testContainsRegexp
%testContainsRegexp Unit tests for containsRegexp

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testOneStringContains
assertTrue(xunit.utils.containsRegexp('MATLAB is great', '[A-Z]'));

function testOneStringDoesntContain
assertTrue(~ xunit.utils.containsRegexp('no upper-case letters', '[A-Z]'));

function testCellArray
strs = {'MATLAB is great', 'no upper-case letters'};
assertEqual(xunit.utils.containsRegexp(strs, '[A-Z]'), [true false]);
assertEqual(xunit.utils.containsRegexp(strs', '[A-Z]'), [true; false]);

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testIsSetUpString.m
================================================
function test_suite = testIsSetUpString
%testIsSetUpString Unit tests for isSetUpString

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testOneStringIs
assertTrue(xunit.utils.isSetUpString('setup'));
assertTrue(xunit.utils.isSetUpString('setUp_fixture'));

function testOneStringIsNot
assertFalse(xunit.utils.isSetUpString('bogus'));

function testCellArray
strs = {'setup', 'bogus'};
assertEqual(xunit.utils.isSetUpString(strs), [true false]);
assertEqual(xunit.utils.isSetUpString(strs'), [true; false]);

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testIsTearDownString.m
================================================
function test_suite = testIsTearDownString
%testIsTearDownString Unit tests for isTearDownString

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testOneStringIs
assertTrue(xunit.utils.isTearDownString('teardownfoobar'));
assertTrue(xunit.utils.isTearDownString('TearDown_foobar'));

function testOneStringIsNot
assertFalse(xunit.utils.isTearDownString('tEardown'));

function testCellArray
strs = {'teardown', 'tearup'};
assertEqual(xunit.utils.isTearDownString(strs), [true false]);
assertEqual(xunit.utils.isTearDownString(strs'), [true; false]);

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testIsTestCaseSubclass.m
================================================
function test_suite = testIsTestCaseSubclass
%testIsTestCaseSubclass Unit tests for isTestCaseSubclass

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testTestCase
assertTrue(xunit.utils.isTestCaseSubclass('TestCase'));

function testSubclass
assertTrue(xunit.utils.isTestCaseSubclass('TestCaseInDir'));

function testNotASubclass
assertFalse(xunit.utils.isTestCaseSubclass('atan2'));

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testIsTestString.m
================================================
function test_suite = testIsTestString
%testIsTestString Unit tests for isTestString

%   Steven L. Eddins
%   Copyright 2008 The MathWorks, Inc.

initTestSuite;

function testOneStringIs
assertTrue(xunit.utils.isTestString('testFoobar'));
assertTrue(xunit.utils.isTestString('Test_foobar'));

function testOneStringIsNot
assertFalse(xunit.utils.isTestString('foobar'));

function testCellArray
strs = {'testFoobar', 'foobar_test', 'foobar', 'foobar_Test'};
assertEqual(xunit.utils.isTestString(strs), [true true false true]);
assertEqual(xunit.utils.isTestString(strs'), [true; true; false; true]);

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/testRuntestsWithDirectoryName.m
================================================
function test_suite = testRuntestsWithDirectoryName
%testRuntestsWithDirectoryName Unit test for mtest('dirname') syntax.

initTestSuite;

function testDirName
current_dir = pwd;
target_dir = fullfile(fileparts(which(mfilename)), 'cwd_test');
[T, did_pass] = evalc('runtests(target_dir)');
assertFalse(did_pass);
assertEqual(current_dir, pwd);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_TestSuiteInDir.m
================================================
function test_suite = test_TestSuiteInDir
%test_TestSuiteInDir Unit test for TestSuiteInDir class.

%   Steven L. Eddins
%   Copyright 2009 The MathWorks, Inc.

initTestSuite;

function test_constructor
this_test_path = fileparts(which(mfilename));
cwd_test_dir = fullfile(this_test_path, 'cwd_test');
suite = TestSuiteInDir(cwd_test_dir);

assertEqual(suite.Name, 'cwd_test');
assertEqual(suite.Location, cwd_test_dir);

function test_gatherTestCases
this_test_path = fileparts(which(mfilename));
cwd_test_dir = fullfile(this_test_path, 'cwd_test');
suite = TestSuiteInDir(cwd_test_dir);
suite.gatherTestCases();

assertEqual(numel(suite.TestComponents), 3);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_arrayToString.m
================================================
function test_suite = test_arrayToString
%test_arrayToString Unit test for arrayToString.

%   Steven L. Eddins
%   Copyright 2009 The MathWorks, Inc.

initTestSuite;

function test_smallInput
A = [1 2 3];
assertEqual(strtrim(xunit.utils.arrayToString(A)), '1     2     3');

function test_largeInput
A = zeros(1000, 1000);
assertEqual(xunit.utils.arrayToString(A), '[1000x1000 double]');

function test_emptyInput
assertEqual(xunit.utils.arrayToString(zeros(1,0,2)), '[1x0x2 double]');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_assertElementsAlmostEqual.m
================================================
function suite = test_assertElementsAlmostEqual
initTestSuite;

%===============================================================================
function test_happyCase

% All code here should execute with no error.
assertElementsAlmostEqual(1, 1 + sqrt(eps)/10);
assertElementsAlmostEqual(1, 1 + sqrt(eps)/10, 'custom message');

%===============================================================================
function test_failedAssertion

f = @() assertElementsAlmostEqual(1, 1 + 10*sqrt(eps));
assertExceptionThrown(f, 'assertElementsAlmostEqual:tolExceeded');

%===============================================================================
function test_nonFloatInputs()
assertExceptionThrown(@() assertElementsAlmostEqual('hello', 'world'), ...
    'assertElementsAlmostEqual:notFloat');

%===============================================================================
function test_sizeMismatch()
assertExceptionThrown(@() assertElementsAlmostEqual(1, [1 2]), ...
    'assertElementsAlmostEqual:sizeMismatch');

function test_finiteAndInfinite()
assertExceptionThrown(@() assertElementsAlmostEqual(1, Inf), ...
    'assertElementsAlmostEqual:tolExceeded');

function test_infiniteAndInfinite()
assertElementsAlmostEqual(Inf, Inf);

function test_finiteAndNaN()
assertExceptionThrown(@() assertElementsAlmostEqual(1, NaN), ...
    'assertElementsAlmostEqual:tolExceeded');

function test_nanAndNaN()
assertElementsAlmostEqual(NaN, NaN);

function test_plusMinusInfinity()
assertExceptionThrown(@() assertElementsAlmostEqual(+Inf, -Inf), ...
    'assertElementsAlmostEqual:tolExceeded');

function test_infiniteAndNaN()
assertExceptionThrown(@() assertElementsAlmostEqual(Inf, NaN), ...
    'assertElementsAlmostEqual:tolExceeded');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_assertFilesEqual.m
================================================
function test_suite = test_assertFilesEqual
%test_assertFilesEqual Unit test for assertFilesEqual

%   Steven L. Eddins
%   Copyright 2009 The MathWorks, Inc.

initTestSuite;

function test_equal
assertFilesEqual('black.tif', 'black.tif');

function test_differentSize
assertExceptionThrown(@() assertFilesEqual('black.tif', 'black.png'), ...
    'assertFilesEqual:sizeMismatch');

function test_sameSizeButDifferent
assertExceptionThrown(@() assertFilesEqual('black.tif', 'almost_black.tif'), ...
    'assertFilesEqual:valuesDiffer');

function test_oneFileEmpty
assertExceptionThrown(@() assertFilesEqual('empty_file', 'black.png'), ...
    'assertFilesEqual:sizeMismatch');

function test_bothFilesEmpty
assertFilesEqual('empty_file', 'empty_file');

function test_cannotReadFirstFile
assertExceptionThrown(@() assertFilesEqual('bogus', 'black.png'), ...
    'assertFilesEqual:readFailure');

function test_cannotReadSecondFile
assertExceptionThrown(@() assertFilesEqual('black.png', 'bogus'), ...
    'assertFilesEqual:readFailure');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_assertVectorsAlmostEqual.m
================================================
function suite = test_assertVectorsAlmostEqual
initTestSuite;

%===============================================================================
function test_happyCase

A = [1 1e10];
B = [2 1e10];
% All code here should execute with no error.
assertVectorsAlmostEqual(A, B);
assertVectorsAlmostEqual(A, B, 'custom message');

%===============================================================================
function test_failedAssertion

A = [1 1e6];
B = [2 1e6];

f = @() assertVectorsAlmostEqual(A, B);
assertExceptionThrown(f, 'assertVectorsAlmostEqual:tolExceeded');

%===============================================================================
function test_failedAssertionWithCustomMessage

A = [1 1e6];
B = [2 1e6];
f = @() assertVectorsAlmostEqual(A, B, 'my message');
assertExceptionThrown(f, 'assertVectorsAlmostEqual:tolExceeded');

%===============================================================================
function test_nonFloatInputs()
assertExceptionThrown(@() assertVectorsAlmostEqual('hello', 'world'), ...
    'assertVectorsAlmostEqual:notFloat');

%===============================================================================
function test_sizeMismatch()
assertExceptionThrown(@() assertVectorsAlmostEqual(1, [1 2]), ...
    'assertVectorsAlmostEqual:sizeMismatch');

%===============================================================================
function test_finiteAndInfinite()
assertExceptionThrown(@() assertVectorsAlmostEqual([1 2], [1 Inf]), ...
    'assertVectorsAlmostEqual:tolExceeded');

%===============================================================================
function test_infiniteAndInfinite
assertExceptionThrown(@() assertVectorsAlmostEqual([1 Inf], [1 Inf]), ...
    'assertVectorsAlmostEqual:tolExceeded');

%===============================================================================
function test_finiteAndNaN
assertExceptionThrown(@() assertVectorsAlmostEqual([1 2], [1 NaN]), ...
    'assertVectorsAlmostEqual:tolExceeded');

%===============================================================================
function test_NanAndNan
assertExceptionThrown(@() assertVectorsAlmostEqual([1 NaN], [1 NaN]), ...
    'assertVectorsAlmostEqual:tolExceeded');

%===============================================================================
function test_oppositeSignInfs
assertExceptionThrown(@() assertVectorsAlmostEqual([1 Inf], [1 -Inf]), ...
    'assertVectorsAlmostEqual:tolExceeded');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_compareFloats.m
================================================
function suite = test_compareFloats
initTestSuite;

%===============================================================================
function test_elementwiseRelativeTolerance

tol = 0.1;
floor_tol = 0.01;

assertTrue(xunit.utils.compareFloats([10 20], [11 20], 'elementwise', ...
    'relative', tol, floor_tol));
assertFalse(xunit.utils.compareFloats([10 20], [11.2 20], 'elementwise', ...
    'relative', tol, floor_tol));

% Verify floor tolerance
assertTrue(xunit.utils.compareFloats([0.001 1], [0.010 1], 'elementwise', ...
    'relative', tol, floor_tol));

%===============================================================================
function test_elementwiseAbsoluteTolerance

assertTrue(xunit.utils.compareFloats([10 20], [10.1 20], 'elementwise', ...
    'absolute', 0.1));
assertFalse(xunit.utils.compareFloats([10 20], [10.1001 20], 'elementwise', ...
    'absolute', 0.1));

%===============================================================================
function test_vectorRelativeTolerance

% The A-B pair below would fail an elementwise test.
A = [1 10];
B = [1.5 10];
tol = 0.05;

assertTrue(xunit.utils.compareFloats(A, B, 'vector', 'relative', tol));

B = [1.6 10];
assertFalse(xunit.utils.compareFloats(A, B, 'vector', 'relative', tol));

%===============================================================================
function test_vectorAbsoluteTolerance

A = [1 10];
B = [1.4 10];

assertTrue(xunit.utils.compareFloats(A, B, 'vector', 'absolute', 0.5));
assertFalse(xunit.utils.compareFloats(A, B, 'vector', 'absolute', 0.3));

%===============================================================================
function test_NaNs

% NaNs in the same spots are OK.
A = [1 1 1 NaN 1 1 1 NaN 1];
B = [1 1 1 NaN 1 1 1 NaN 1];

assertTrue(xunit.utils.compareFloats(A, B));

% NaNs in different spots are not OK.
B2 = [1 1 NaN NaN 1 1 1 NaN 1];
assertFalse(xunit.utils.compareFloats(A, B2));

%===============================================================================
function test_Infs

% Infinities in the same locations are OK if they have the same sign.
assertTrue(xunit.utils.compareFloats([1 2 3 Inf 4 5], [1 2 3 Inf 4 5]));
assertTrue(xunit.utils.compareFloats([1 2 3 -Inf 4 5], [1 2 3 -Inf 4 5]));
assertFalse(xunit.utils.compareFloats([1 2 3 Inf 4 5], [1 2 3 -Inf 4 5], ...
    'elementwise', 'absolute'));

%===============================================================================
function test_complexInput

% Real and imaginary parts are compared separately.
assertTrue(xunit.utils.compareFloats(1, 1+0.09i, 'elementwise', 'absolute', 0.1));
assertFalse(xunit.utils.compareFloats(1, 1+0.11i, 'elementwise', 'absolute', 0.1));

%===============================================================================
function test_comparisonTypeSpecified

% Verify handling of third input argument, the comparison type.  The rest of the
% input syntax is handled by parseFloatAssertInputs and tested by the unit test
% for that function.

% The A-B pair below fails using elementwise comparison but passes using vector
% comparison.
A = [1.5 10];
B = [1 10];
tol = 0.1;

assertFalse(xunit.utils.compareFloats(A, B, 'elementwise', 'relative', tol));
assertTrue(xunit.utils.compareFloats(A, B, 'vector', 'relative', tol));


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_comparisonMessage.m
================================================
function test_suite = test_comparisonMessage
%test_comparisonMessage Unit test for comparisonMessage.

%   Steven L. Eddins
%   Copyright 2009 The MathWorks, Inc.

initTestSuite;

function test_happyCase
s = xunit.utils.comparisonMessage('user message', 'assertion message', ...
    [1 2 3], 'hello');
c = xunit.utils.stringToCellArray(s);

expected_output = { 'user message' 
    'assertion message' 
    ''
    'First input:'
    '     1     2     3'
    ''
    'Second input:'
    'hello'};

assertEqual(c, expected_output);

    
================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_packageName.m
================================================
function test_suite = test_packageName
initTestSuite;

function test_happyCase
suite = TestSuite.fromPackageName('xunit.mocktests');
assertEqual(numel(suite.TestComponents), 5);

assertEqual(numel(suite.TestComponents{1}.TestComponents), 1);
assertEqual(suite.TestComponents{1}.Name, 'xunit.mocktests.subpkg');

assertEqual(numel(suite.TestComponents{2}.TestComponents), 2);
assertEqual(suite.TestComponents{2}.Name, 'xunit.mocktests.A');

assertEqual(numel(suite.TestComponents{3}.TestComponents), 1);
assertEqual(suite.TestComponents{3}.Name, 'xunit.mocktests.FooTest');

assertEqual(numel(suite.TestComponents{4}.TestComponents), 2);
assertEqual(suite.TestComponents{4}.Name, 'test_that');

assertEqual(numel(suite.TestComponents{5}.TestComponents), 1);
assertEqual(suite.TestComponents{5}.Name, 'xunit.mocktests.test_this');

function test_badPackageName
assertExceptionThrown(@() TestSuite.fromPackageName('bogus'), ...
    'xunit:fromPackageName:invalidName');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_parseFloatAssertInputs.m
================================================
function suite = test_parseFloatAssertInputs
initTestSuite;

%===============================================================================
function test_tooFewInputs()
assertExceptionThrown(@() xunit.utils.parseFloatAssertInputs(), ...
    'MATLAB:nargchk:notEnoughInputs');

%===============================================================================
function test_tooManyInputs()
assertExceptionThrown(@() xunit.utils.parseFloatAssertInputs(1,2,3,4,5,6,7), ...
    'MATLAB:nargchk:tooManyInputs');

%===============================================================================
function test_twoInputs()
params = xunit.utils.parseFloatAssertInputs(1, 2);
assertEqual(params.A, 1);
assertEqual(params.B, 2);
assertEqual(params.ToleranceType, 'relative');
assertEqual(params.Tolerance, sqrt(eps));
assertEqual(params.FloorTolerance, sqrt(eps));
assertEqual(params.Message, '');

%===============================================================================
function test_threeInputs()
expected.A = 1;
expected.B = 2;
expected.ToleranceType = 'relative';
expected.Tolerance = sqrt(eps);
expected.FloorTolerance = sqrt(eps);
expected.Message = '';

params = xunit.utils.parseFloatAssertInputs(1, 2, 'relative');
assertEqual(params, expected);

params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute');
expected.ToleranceType = 'absolute';
assertEqual(params, expected);

params = xunit.utils.parseFloatAssertInputs(1, 2, 'message');
expected.ToleranceType = 'relative';
expected.Message = 'message';
assertEqual(params, expected);

%===============================================================================
function test_fourInputs()
expected.A = 1;
expected.B = 2;
expected.ToleranceType = 'absolute';
expected.Tolerance = sqrt(eps);
expected.FloorTolerance = sqrt(eps);
expected.Message = '';

params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute', 0.1);
expected.Tolerance = 0.1;
assertEqual(params, expected);

params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute', 'message');
expected.Tolerance = sqrt(eps);
expected.Message = 'message';
assertEqual(params, expected);

%===============================================================================
function test_fiveInputs()
expected.A = 1;
expected.B = 2;
expected.ToleranceType = 'absolute';
expected.Tolerance = 0.1;
expected.FloorTolerance = 0.05;
expected.Message = '';

params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute', 0.1, 0.05);
assertEqual(params, expected);

params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute', 0.1, 'message');
expected.FloorTolerance = sqrt(eps);
expected.Message = 'message';
assertEqual(params, expected);

%===============================================================================
function test_sixInputs()
expected.A = 1;
expected.B = 2;
expected.ToleranceType = 'absolute';
expected.Tolerance = 0.1;
expected.FloorTolerance = 0.05;
expected.Message = 'message';

params = xunit.utils.parseFloatAssertInputs(1, 2, 'absolute', 0.1, 0.05, 'message');
assertEqual(params, expected);

%===============================================================================
function test_twoSingleInputs()
expected.A = 1;
expected.B = 2;
expected.ToleranceType = 'relative';
expected.Tolerance = sqrt(eps('single'));
expected.FloorTolerance = sqrt(eps('single'));
expected.Message = '';

params = xunit.utils.parseFloatAssertInputs(single(1), single(2));
assertEqual(params, expected);

%===============================================================================
function test_twoSingleAndDoubleInputs()
expected.A = 1;
expected.B = 2;
expected.ToleranceType = 'relative';
expected.Tolerance = sqrt(eps('single'));
expected.FloorTolerance = sqrt(eps('single'));
expected.Message = '';

params = xunit.utils.parseFloatAssertInputs(single(1), double(2));
assertEqual(params, expected);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/tests/test_stringToCellArray.m
================================================
function test_suite = test_stringToCellArray
%test_stringToCellArray Unit test for stringToCellArray

%   Steven L. Eddins
%   Copyright 2009 The MathWorks, Inc.

initTestSuite;

function test_happyCase
s = sprintf('Hello\nWorld');
assertEqual(xunit.utils.stringToCellArray(s), {'Hello' ; 'World'});

function test_emptyInput
assertEqual(xunit.utils.stringToCellArray(''), cell(0, 1));

function test_spacesInFront
s = sprintf('    Hello\n  World\n');
assertEqual(xunit.utils.stringToCellArray(s), {'    Hello' ; '  World'});

function test_spacesAtEnd
s = sprintf('Hello  \nWorld     ');
assertEqual(xunit.utils.stringToCellArray(s), {'Hello  ' ; 'World     '});


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/Contents.m
================================================
% UTILS Utility package for MATLAB xUnit Test Framework
%
% Array Comparison
%   compareFloats            - Compare floating-point arrays using tolerance
%
% Test Case Discovery Functions
%   isTestCaseSubclass       - True for name of TestCase subclass
%
% String Functions
%   arrayToString            - Convert array to string for display
%   comparisonMessage        - Assertion message string for comparing two arrays
%   containsRegexp           - True if string contains regular expression
%   isSetUpString            - True for string that looks like a setup function
%   isTearDownString         - True for string that looks like teardown function
%   isTestString             - True for string that looks like a test function
%   stringToCellArray        - Convert string to cell array of strings
%
% Miscellaneous Functions
%   generateDoc              - Publish test scripts in mtest/doc
%   parseFloatAssertInputs   - Common input-parsing logic for several functions

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/arrayToString.m
================================================
function s = arrayToString(A)
%arrayToString Convert array to string for display.
%   S = arrayToString(A) converts the array A into a string suitable for
%   including in assertion messages.  Small arrays are converted using disp(A).
%   Large arrays are displayed similar to the way structure field values display
%   using disp.

%   Steven L. Eddins
%   Copyright 2009 The MathWorks, Inc.

if isTooBigToDisp(A)
    s = dispAsStructField(A);
else
    s = dispAsArray(A);
end

%===============================================================================
function tf = isTooBigToDisp(A)
%   Use a heuristic to determine if the array is to convert to a string using
%   disp.  The heuristic is based on the size of the array in bytes, as reported
%   by the whos function.

whos_output = whos('A');
byte_threshold = 1000;
tf = whos_output.bytes > byte_threshold;

%===============================================================================
function s = dispAsArray(A)
%   Convert A to a string using disp.  Remove leading and trailing blank lines.

s = evalc('disp(A)');
if isempty(s)
    % disp displays nothing for some kinds of empty arrays.
    s = dispAsStructField(A);
else
    s = postprocessDisp(s);
end

%===============================================================================
function s = dispAsStructField(A)
%   Convert A to a string using structure field display.

b.A = A;
s = evalc('disp(b)');
s = postprocessStructDisp(s);

%===============================================================================
function out = postprocessDisp(in)
%   Remove leading and trailing blank lines from input string.  Don't include a
%   newline at the end.

lines = xunit.utils.stringToCellArray(in);

% Remove leading blank lines.
lines = removeLeadingBlankLines(lines);

% Remove trailing blank lines.
while ~isempty(lines) && isBlankLine(lines{end})
    lines(end) = [];
end

% Convert cell of strings to single string with newlines.  Don't put a newline
% at the end.
out = sprintf('%s\n', lines{1:end-1});
out = [out, lines{end}];

%===============================================================================
function out = postprocessStructDisp(in)
%   Return the portion of the display string to the right of the colon in the
%   output of the first structure field.  Input is a string.

lines = xunit.utils.stringToCellArray(in);

% Remove leading blank lines
lines = removeLeadingBlankLines(lines);

line = lines{1};
idx = find(line == ':');
out = line((idx+2):end);  % struct fields display with blank space following colon

%===============================================================================
function out = removeLeadingBlankLines(in)
%   Input and output are cell arrays of strings.

out = in;
while ~isempty(out) && isBlankLine(out{1})
    out(1) = [];
end

%===============================================================================
function tf = isBlankLine(line)
%   Input is a string.

tf = all(isspace(line));


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/compareFloats.m
================================================
function result = compareFloats(varargin)
%compareFloats Compare floating-point arrays using tolerance.
%   result = compareFloats(A, B, compare_type, tol_type, tol, floor_tol)
%   compares the floating-point arrays A and B using a tolerance.  compare_type
%   is either 'elementwise' or 'vector'.  tol_type is either 'relative' or
%   'absolute'.  tol and floor_tol are the scalar tolerance values.
%
%   There are four different tolerance tests used, depending on the comparison
%   type and the tolerance type:
%
%   1. Comparison type: 'elementwise'     Tolerance type: 'relative'
%
%       all( abs(A(:) - B(:)) <= tol * max(abs(A(:)), abs(B(:))) + floor_tol )
%
%   2. Comparison type: 'elementwise'     Tolerance type: 'absolute'
%
%       all( abs(A(:) - B(:) <= tol )
%
%   3. Comparison type: 'vector'          Tolerance type: 'relative'
%
%       norm(A(:) - B(:) <= tol * max(norm(A(:)), norm(B(:))) + floor_tol
%
%   4. Comparison type: 'vector'          Tolerance type: 'absolute'
%
%       norm(A(:) - B(:)) <= tol
%
%   Note that floor_tol is not used when the tolerance type is 'absolute'.
%
%   compare_type, tol_type, tol, and floor_tol are all optional inputs.  The
%   default value for compare_type is 'elementwise'.  The default value for
%   tol_type is 'relative'.  If both A and B are double, then the default value
%   for tol is sqrt(eps), and the default value for floor_tol is eps.  If either
%   A or B is single, then the default value for tol is sqrt(eps('single')), and
%   the default value for floor_tol is eps('single').
%
%   If A or B is complex, then the tolerance test is applied independently to
%   the real and imaginary parts.
%
%   For elementwise comparisons, compareFloats returns true for two elements
%   that are both NaN, or for two infinite elements that have the same sign.
%   For vector comparisons, compareFloats returns false if any input elements
%   are infinite or NaN.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

if nargin >= 3
    % compare_type specified.  Grab it and then use parseFloatAssertInputs to
    % process the remaining input arguments.
    compare_type = varargin{3};
    varargin(3) = [];
    if isempty(strcmp(compare_type, {'elementwise', 'vector'}))
        error('compareFloats:unrecognizedCompareType', ...
            'COMPARE_TYPE must be ''elementwise'' or ''vector''.');
    end
else
    compare_type = 'elementwise';
end

params = xunit.utils.parseFloatAssertInputs(varargin{:});

A = params.A(:);
B = params.B(:);

switch compare_type
    case 'elementwise'
        magFcn = @abs;
        
    case 'vector'
        magFcn = @norm;
        
    otherwise
        error('compareFloats:unrecognizedCompareType', ...
            'COMPARE_TYPE must be ''elementwise'' or ''vector''.');
end

switch params.ToleranceType
    case 'relative'
        coreCompareFcn = @(A, B) magFcn(A - B) <= ...
              params.Tolerance * max(magFcn(A), magFcn(B)) + ...
              params.FloorTolerance;
        
    case 'absolute'
        coreCompareFcn = @(A, B) magFcn(A - B) <= params.Tolerance;
        
    otherwise
        error('compareFloats:unrecognizedToleranceType', ...
            'TOL_TYPE must be ''relative'' or ''absolute''.');
end

if strcmp(compare_type, 'elementwise')
    compareFcn = @(A, B) ( coreCompareFcn(A, B) | bothNaN(A, B) | sameSignInfs(A, B) ) & ...
        ~oppositeSignInfs(A, B) & ...
        ~finiteAndInfinite(A, B);
else
    compareFcn = @(A, B)  coreCompareFcn(A, B) & ...
        isfinite(magFcn(A)) & ...
        isfinite(magFcn(B));
end

if isreal(A) && isreal(B)
    result = compareFcn(A, B);
else
    result = compareFcn(real(A), real(B)) & compareFcn(imag(A), imag(B));
end

result = all(result);

%===============================================================================
function out = bothNaN(A, B)

out = isnan(A) & isnan(B);

%===============================================================================
function out = oppositeSignInfs(A, B)

out = isinf(A) & isinf(B) & (sign(A) ~= sign(B));

%===============================================================================
function out = sameSignInfs(A, B)

out = isinf(A) & isinf(B) & (sign(A) == sign(B));

%===============================================================================
function out = finiteAndInfinite(A, B)

out = xor(isinf(A), isinf(B));


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/comparisonMessage.m
================================================
function msg = comparisonMessage(user_message, assertion_message, A, B)
%comparisonMessage Generate assertion message when comparing two arrays.
%   msg = comparisonMessage(user_message, assertion_message, A, B) returns a
%   string appropriate to use in a call to throw inside an assertion function
%   that compares two arrays A and B.
%
%   The string returned has the following form:
%
%       <user_message>
%       <assertion_message>
%
%       First input:
%       <string representation of value of A>
%
%       Second input:
%       <string representation of value of B>
%
%   user_message can be the empty string, '', in which case user_message is
%   skipped.

%   Steven L. Eddins
%   Copyright 2009 The MathWorks, Inc.

msg = sprintf('%s\n\n%s\n%s\n\n%s\n%s', ...
    assertion_message, ...
    'First input:', ...
    xunit.utils.arrayToString(A), ...
    'Second input:', ...
    xunit.utils.arrayToString(B));

if ~isempty(user_message)
    msg = sprintf('%s\n%s', user_message, msg);
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/containsRegexp.m
================================================
function tf = containsRegexp(str, exp)
%containsRegexp True if string contains regular expression
%   TF = containsRegexp(str, exp) returns true if the string str contains the
%   regular expression exp.  If str is a cell array of strings, then
%   containsRegexp tests each string in the cell array, returning the results in
%   a logical array with the same size as str.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

% Convert to canonical input form: A cell array of strings.
if ~iscell(str)
   str = {str};
end

matches = regexp(str, exp);
tf = ~cellfun('isempty', matches);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/generateDoc.m
================================================
function generateDoc
%generateDoc Publish the example scripts in the doc directory

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

doc_dir = fullfile(fileparts(which('runtests')), '..', 'doc');
addpath(doc_dir);
cd(doc_dir)
mfiles = dir('*.m');
for k = 1:numel(mfiles)
    publish(mfiles(k).name);
    cd(doc_dir)
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/isAlmostEqual.m
================================================
function same = isAlmostEqual(A, B, reltol)
%isAlmostEqual Equality test using relative tolerance
%   same = isAlmostEqual(A, B, reltol), for two floating-point arrays A and B,
%   tests A and B for equality using the specified relative tolerance.
%   isAlmostEqual returns true if the following relationship is satisfied for
%   all values in A and B:
%
%       abs(A - B) ./ max(abs(A), abs(B)) <= reltol
%
%   same = isAlmostEqual(A, B) uses the following value for the relative
%   tolerance:
%
%       100 * max(eps(class(A)), eps(class(B)))
%
%   If either A or B is not a floating-point array, then isAlmostEqual returns
%   the result of isequal(A, B).

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

if ~isfloat(A) || ~isfloat(B)
    same = isequal(A, B);
    return
end

if nargin < 3
    reltol = 100 * max(eps(class(A)), eps(class(B)));
end

if ~isequal(size(A), size(B))
    same = false;
    return
end

A = A(:);
B = B(:);

delta = abs(A - B) ./ max(max(abs(A), abs(B)), 1);

% Some floating-point values require special handling.
delta((A == 0) & (B == 0)) = 0;
delta(isnan(A) & isnan(B)) = 0;
delta((A == Inf) & (B == Inf)) = 0;
delta((A == -Inf) & (B == -Inf)) = 0;

same = all(delta <= reltol);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/isSetUpString.m
================================================
function tf = isSetUpString(str)
%isSetUpString True if string looks like the name of a setup function
%   tf = isSetUpString(str) returns true if the string str looks like the name
%   of a setup function.  If str is a cell array of strings, then isSetUpString
%   tests each string in the cell array, returning the results in a logical
%   array with the same size as str.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

setup_exp = '^[sS]et[uU]p';
tf = xunit.utils.containsRegexp(str, setup_exp);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/isTearDownString.m
================================================
function tf = isTearDownString(str)
%isTearDownString True if string looks like the name of a teardown function
%   tf = isTearDownString(str) returns true if the string str looks like the
%   name of a teardown function.  If str is a cell array of strings, then
%   isTearDownString tests each string in the cell array, returning the results
%   in a logical array with the same size as str.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

setup_exp = '^[tT]ear[dD]own';
tf = xunit.utils.containsRegexp(str, setup_exp);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/isTestCaseSubclass.m
================================================
function tf = isTestCaseSubclass(name)
%isTestCaseSubclass True for name of a TestCase subclass
%   tf = isTestCaseSubclass(name) returns true if the string name is the name of
%   a TestCase subclass on the MATLAB path.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

tf = false;

class_meta = meta.class.fromName(name);
if isempty(class_meta)
    % Not the name of a class
    return;
end

if strcmp(class_meta.Name, 'TestCase')
    tf = true;
else
    tf = isMetaTestCaseSubclass(class_meta);
end

function tf = isMetaTestCaseSubclass(class_meta)

tf = false;

if strcmp(class_meta.Name, 'TestCase')
    tf = true;
else
    % Invoke function recursively on parent classes.
    super_classes = class_meta.SuperClasses;
    for k = 1:numel(super_classes)
        if isMetaTestCaseSubclass(super_classes{k})
            tf = true;
            break;
        end
    end
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/isTestString.m
================================================
function tf = isTestString(str)
%isTestString True if string looks like the name of a test
%   tf = isTestString(str) returns true if the string str looks like the name of
%   a test.  If str is a cell array of strings, then isTestString tests each
%   string in the cell array, returning the results in a logical array with the
%   same size as str.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

test_at_beginning = '^[tT]est';
test_at_end = '[tT]est$';

tf = xunit.utils.containsRegexp(str, test_at_beginning) | ...
    xunit.utils.containsRegexp(str, test_at_end);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/parseFloatAssertInputs.m
================================================
function params = parseFloatAssertInputs(varargin)
%parseFloatAssertInputs Parse inputs for floating-point assertion functions.
%   params = parseFloatAssertInputs(varargin) parses the input arguments for
%   assertElementsAlmostEqual, assertVectorsAlmostEqual, and compareFcn. It
%   returns a parameter struct containing the fields:
%
%       A    B    Message    ToleranceType    Tolerance    FloorTolerance

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

error(nargchk(2, 6, nargin, 'struct'));

params = struct('A', {[]}, 'B', {[]}, 'ToleranceType', {[]}, ...
    'Tolerance', {[]}, 'FloorTolerance', {[]}, 'Message', {''});

% The first two input arguments are always A and B.
params.A = varargin{1};
params.B = varargin{2};
varargin(1:2) = [];

% If the last argument is a message string, process it and remove it from the list.
if (numel(varargin) >= 1) && ischar(varargin{end}) && ...
        ~any(strcmp(varargin{end}, {'relative', 'absolute'}))
    params.Message = varargin{end};
    varargin(end) = [];
else
    params.Message = '';
end

try
    epsilon = max(eps(class(params.A)), eps(class(params.B)));
catch
    epsilon = eps;
end

if numel(varargin) < 3
    % floor_tol not specified; set default.
    params.FloorTolerance = sqrt(epsilon);
else
    params.FloorTolerance = varargin{3};
end

if numel(varargin) < 2
    % tol not specified; set default.
    params.Tolerance = sqrt(epsilon);
else
    params.Tolerance = varargin{2};
end

if numel(varargin) < 1
    % tol_type not specified; set default.
    params.ToleranceType = 'relative';
else
    params.ToleranceType = varargin{1};
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/+xunit/+utils/stringToCellArray.m
================================================
function c = stringToCellArray(s)
%stringToCellArray Convert string with newlines to cell array of strings.
%   C = stringToCellArray(S) converts the input string S to a cell array of
%   strings, breaking up S at new lines.

%   Steven L. Eddins
%   Copyright 2009 The MathWorks, Inc.

if isempty(s)
    c = cell(0, 1);
else
    c = textscan(s, '%s', 'Delimiter', '\n', 'Whitespace', '');
    c = c{1};
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/CommandWindowTestRunDisplay.m
================================================
classdef CommandWindowTestRunDisplay < TestRunDisplay
    %CommandWindowTestRunDisplay Print test suite execution results to Command Window.
    %   CommandWindowTestRunDisplay is a subclass of TestRunMonitor.  If a
    %   CommandWindowTestRunDisplay object is passed to the run method of a
    %   TestComponent, such as a TestSuite or a TestCase, it will print information
    %   to the Command Window as the test run proceeds.
    %
    %   CommandWindowTestRunDisplay methods:
    %       testComponentStarted  - Update Command Window display
    %       testComponentFinished - Update Command Window display
    %       testCaseFailure       - Log test failure information
    %       testCaseError         - Log test error information
    %
    %   CommandWindowTestRunDisplay properties:
    %       TestCaseCount         - Number of test cases executed
    %       Faults                - Struct array of test fault info
    %
    %   See also TestRunLogger, TestRunMonitor, TestSuite
    
    %   Steven L. Eddins
    %   Copyright 2008-2010 The MathWorks, Inc.
    
    methods
        function self = CommandWindowTestRunDisplay
            self = self@TestRunDisplay(1);
        end
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/Contents.m
================================================
% MATLAB xUnit Test Framework
% Version 3.1 (R2010b) 19-Nov-2010
%
% Running Unit Tests
%   runtests                  - Run unit tests
%
% Writing Unit Tests
%   assertElementsAlmostEqual - Assert floating-point array elements almost equal
%   assertEqual               - Assert that inputs are equal
%   assertFilesEqual          - Assert that two files have the same content
%   assertExceptionThrown     - Assert that specified exception is thrown
%   assertFalse               - Assert that input condition is false
%   assertTrue                - Assert that input condition is true
%   assertVectorsAlmostEqual  - Assert floating-point vectors almost equal in norm sense
%   initTestSuite             - Utility script used for subfunction-based tests
%
% Framework Classes
%   CommandWindowTestRunDisplay - Print test suite results to command window
%   FunctionHandleTestCase    - Test case based on a function handle
%   TestCase                  - Class defining interface for test cases
%   TestCaseInDir             - Test case requiring temporary directory change
%   TestCaseWithAddPath       - Test case requiring temporary path modification
%   TestComponent             - Abstract base class for TestCase and TestSuite
%   TestComponentInDir        - Test component requiring temporary directory change
%   TestLogger                - Collect data (silently) from running test suite
%   TestRunDisplay            - Print test suite execution results
%   TestRunMonitor            - Abstract base class for monitoring test suite
%   TestSuite                 - Collection of TestComponent objects
%   TestSuiteInDir            - Test suite requiring temporary directory change
%   %VerboseTestRunDisplay    - Print test suite execution results

% Steven L. Eddins
% Copyright 2008-2010 The MathWorks, Inc.

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/FunctionHandleTestCase.m
================================================
classdef FunctionHandleTestCase < TestCase
%FunctionHandleTestCase Test case based on a function handle
%   FunctionHandleTestCase is a TestCase subclass. It defines a test case object
%   that executes by running a function handle instead of by running a method of
%   the TestCase subclass. 
%
%   FunctionHandleTestCase methods:
%       FunctionHandleTestCase - Constructor
%       runTestCase            - Run function handle test  
%       setUp                  - Run test-fixture setup function
%       tearDown               - Run test-fixture teardown function
%
%   FunctionHandleTestCase properties:
%       TestFcn     - Function handle of test function
%       SetupFcn    - Function handle of setup function
%       TeardownFcn - Function handle of teardown function
%       TestData    - Data needed by test function or teardown function
%
%   See also TestCase, TestSuite

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

    properties (SetAccess = protected, GetAccess = protected, Hidden = true)
        %TestFcn - Function handle of test function
        %   If SetupFcn has one or more output arguments, then TestFcn is
        %   called with this syntax:
        %
        %       TestFcn(data)
        %
        %   where data is the return value from SetupFcn.  Otherwise, TestFcn is
        %   called with no input and no output arguments.
        TestFcn;
        
        %SetupFcn - Function handle of setup function
        %   If SetupFcn has one or more output arguments, then SetupFcn is
        %   called with this syntax:
        %
        %       data = SetupFcn()
        %
        %   and data will be saved in the TestData property. Otherwise, SetupFcn
        %   is called with no input and no output arguments.
        SetupFcn;
        
        %TeardownFcn - Function handle of teardown function
        %   If SetupFcn has one or more output arguments, then TeardownFcn is
        %   called with this syntax:
        %
        %       TeardownFcn(data)
        %
        %   were data is the return value from SetupFcn.  Otherwise, TeardownFcn
        %   is called with no input and no output arguments.
        TeardownFcn;
        
        %TestData - Data needed by test function or teardown function.
        TestData;
    end

    methods
        function self = FunctionHandleTestCase(testFcn, setupFcn, teardownFcn)
            %FunctionHandleTestCase Constructor
            %   FunctionHandleTestCase(testFcn, setupFcn, teardownFcn) creates a
            %   TestCase object that executes by running the function handle
            %   TestFcn.  setupFcn is a function handle that will be executed
            %   before testFcn, and teardownFcn is a function handle that will
            %   be executed after TestFcn.  Either setupFcn or teardownFcn can
            %   be empty.
            %
            %   If setupFcn is function handle that has one output argument,
            %   then the three test functions will be called using these
            %   syntaxes:
            %
            %       testData = setupFcn();
            %       testFcn(testData);
            %       teardownFcn(testData);
            %
            %   Otherwise, the three test functions are all called with no input
            %   arguments:
            %
            %       setupFcn();
            %       TestFcn();
            %       teardownFcn();
            
            % Call the base class constructor.  Give it the name of the
            % FunctionHandleTestCase method that executes TestFcn.
            self = self@TestCase('runTestCase');
                        
            self.TestFcn = testFcn;
            self.SetupFcn = setupFcn;
            self.TeardownFcn = teardownFcn;

            % Determine the name and M-file location of the function handle.
            functionHandleInfo = functions(testFcn);
            self.Name = functionHandleInfo.function;
            if strcmp(functionHandleInfo.type, 'anonymous')
                % Anonymous function handles don't have an M-file location.
                self.Location = '';
            else
                self.Location = functionHandleInfo.file;
            end
        end

        function runTestCase(self)
            %runTestCase Run function handle test
            %   test_case.run() calls the test function handle.  If a nonempty
            %   SetupFcn was provided and it has at least one output argument,
            %   pass self.TestData to the test function.  Otherwise, call the
            %   test function with no input arguments.
            if ~isempty(self.SetupFcn) && nargout(self.SetupFcn) > 0
                self.TestFcn(self.TestData);
            else
                self.TestFcn();
            end
        end

        function setUp(self)
            %setUp Run test-fixture setup function
            %   If a nonempty SetupFcn was provided, run it.  If the SetupFcn
            %   has at least one output argument, capture the first output
            %   argument in instance data (TestData).
            if ~isempty(self.SetupFcn)
                if nargout(self.SetupFcn) > 0
                    if nargout(self.SetupFcn) > 1
                        message = sprintf(['A test fixture setup function returns more than one output argument. ', ...
                            'The test harness only calls the setup function with one output argument. ', ...
                            'Return a struct or a cell array from your setup function if you need to bundle several parts together.', ...
                            '\nTest name: %s\nTest location: %s'], ...
                            self.Name, self.Location);
                        warning('xunit:FunctionHandleTestCase:TooManySetupOutputs', ...
                            '%s', message);
                    end
                    self.TestData = self.SetupFcn();
                else
                    self.SetupFcn();
                end
            end
        end

        function tearDown(self)
            %tearDown Run test-fixture teardown function
            %   If a nonempty TeardownFcn was provided, run it.  If there is
            %   TestData (the output of the SetupFcn), then pass it to 
            %   TeardownFcn.  Otherwise, call TeardownFcn with no input
            %   arguments.
            if ~isempty(self.TeardownFcn)
                if ~isempty(self.SetupFcn) && (nargout(self.SetupFcn) > 0)
                    self.TeardownFcn(self.TestData);
                else
                    self.TeardownFcn();
                end
            end
        end
    end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestCase.m
================================================
%TestCase Class defining interface for test cases
%   The TestCase class defines an individual test case.
%
%   Normally a test writer will create their own test class that is a subclass
%   of TestCase.  Each instance of the TestCase subclass that gets created will
%   be associated with a single test method.
%
%   If a test fixture is needed, override the setUp() and tearDown() methods.
%
%   TestSuite(subclass_name), where subclass_name is the name of a TestCase
%   subclass, creates a test suite containing one TestCase instance per test
%   method contained in the subclass.
%
%   A simpler test-writing alternative to use subfunction-based M-file tests.
%   See the MATLAB xUnit documentation.
%
%   TestCase methods:
%       TestCase - Constructor
%       run      - Execute the test case
%
%   TestCase properties:
%       Location - Location of M-file containing the test case
%       Name     - Name of test case
%
%   See also TestComponent, TestSuite

%   Steven L. Eddins
%   Copyright 2008-2010 The MathWorks, Inc.

classdef TestCase < TestComponent
    
    properties
        MethodName
    end

    methods
        function self = TestCase(testMethod)
            %TestCase Constructor
            %   TestCase(methodName) constructs a TestCase object using the
            %   specified testMethod (a string).
            
            self.MethodName = testMethod;
            self.Name = testMethod;
            self.Location = which(class(self));
        end
        
        function did_pass = run(self, monitor)
            %run Execute the test case
            %    test_case.run(monitor) calls the TestCase object's setUp()
            %    method, then the test method, then the tearDown() method.
            %    observer is a TestRunObserver object.  The testStarted(),
            %    testFailure(), testError(), and testFinished() methods of
            %    observer are called at the appropriate times.  monitor is a
            %    TestRunMonitor object.  Typically it is either a TestRunLogger
            %    subclass or a CommandWindowTestRunDisplay subclass.
            %
            %    test_case.run() automatically uses a
            %    CommandWindowTestRunDisplay object in order to print test
            %    suite execution information to the Command Window.
            
            if nargin < 2
                monitor = CommandWindowTestRunDisplay();
            end
            
            did_pass = true;
            monitor.testComponentStarted(self);
            
            try
                self.setUp();
                f = str2func(self.MethodName);
                
                try
                    % Call the test method.
                    f(self);
                catch failureException
                    monitor.testCaseFailure(self, failureException);
                    did_pass = false;
                end
                
                self.tearDown();
                
            catch errorException
                monitor.testCaseError(self, errorException);
                did_pass = false;
            end
            
            monitor.testComponentFinished(self, did_pass);
        end
        
        function num = numTestCases(self)
            num = 1;
        end
           
        function print(self, numLeadingBlanks)
            if nargin < 2
                numLeadingBlanks = 0;
            end
            fprintf('%s%s\n', blanks(numLeadingBlanks), self.Name);
        end
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestCaseInDir.m
================================================
%TestCaseInDir Test case requiring temporary directory change
%   The TestCaseInDir class defines a test case that has to be run by first
%   changing to a specified directory.
%
%   The setUp method adds the starting directory to the path and then uses cd to 
%   change into the specified directory.  The tearDown method restores the
%   original path and directory.
%
%   TestCaseInDir is used by MATLAB xUnit's own test suite in order to test itself.
%
%   TestCaseInDir methods:
%       TestCaseInDir - Constructor
%
%   See also TestCase, TestCaseWithAddPath, TestComponent

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

classdef TestCaseInDir < TestCase & TestComponentInDir

    methods
        function self = TestCaseInDir(methodName, testDirectory)
            %TestCaseInDir Constructor
            %   TestCaseInDir(testName, testDirectory) constructs a test case 
            %   using the specified name and located in the specified directory.
            self = self@TestCase(methodName);
            self = self@TestComponentInDir(testDirectory);
        end
    end
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestCaseWithAddPath.m
================================================
%TestCaseInDir Test case requiring temporary path modification
%   The TestCaseInDir class defines a test case that has to be run by first
%   adding a specific directory to the path.
%
%   The setUp method adds the directory to the path, and the tearDown method
%   restores the original path.
%
%   TestCaseWithAddPath is used by MATLAB xUnit's own test suite in order to test
%   itself. 
%
%   TestCaseWithAddPath methods:
%       TestCaseWithAddPath - Constructor
%       setUp               - Add test directory to MATLAB path
%       tearDown            - Restore original MATLAB path
%
%   See also TestCase, TestCaseInDir

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

classdef TestCaseWithAddPath < TestCase
    properties (SetAccess = private, GetAccess = private)
        %TestDirectory - Directory to be added to the path
        TestDirectory
        
        %OriginalPath - Path prior to adding the test directory
        OriginalPath
    end

    methods
        function self = TestCaseWithAddPath(methodName, testDirectory)
            %TestCaseInDir Constructor
            %   TestCaseInDir(testName, testDirectory) constructs a test case 
            %   using the specified name and located in the specified directory.
            self = self@TestCase(methodName);
            self.TestDirectory = testDirectory;
        end

        function setUp(self)
            %setUp Add test directory to MATLAB path.
            %   test_case.setUp() saves the current path in the OriginalPath
            %   property and then adds the TestDirectory to the MATLAB path.
            self.OriginalPath = path;
            addpath(self.TestDirectory);
        end

        function tearDown(self)
            %tearDown Restore original MATLAB path
            %   test_case.tearDown() restores the saved MATLAB path.
            path(self.OriginalPath);
        end
    end
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestComponent.m
================================================
classdef TestComponent < handle
%TestComponent Abstract base class for TestCase and TestSuite
%
%   TestComponent methods:
%       run          - Run all test cases in test component
%       print        - Display summary of test component to Command Window
%       numTestCases - Number of test cases in test component
%       setUp        - Initialize test fixture
%       tearDown     - Clean up text fixture
%
%   TestComponent properties:
%       Name - Name of test component
%       Location - Directory where test component is defined
%
%   See TestCase, TestSuite

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

    properties
       Name = '';
       Location = '';
    end
    
    properties (Access = 'protected')
        PrintIndentationSize = 4
    end
    
    methods (Abstract)
       print()
       %print Display summary of test component to Command Window
       %   obj.print() displays information about the test component to the
       %   Command Window.
       
       run()
       %run Execute test cases
       %   obj.run() executes all the test cases in the test component
       
       numTestCases()
       %numTestCases Number of test cases in test component
    end
    
    methods
        function setUp(self)
            %setUp Set up test fixture
            %   test_component.setUp() is called at the beginning of the run()
            %   method.  Test writers can override setUp if necessary to
            %   initialize a test fixture.
        end
        
        function tearDown(self)
            %tearDown Tear down test fixture
            %   test_component.tearDown() is at the end of the method.  Test
            %   writers can override tearDown if necessary to clean up a test
            %   fixture.
        end
        
    end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestComponentInDir.m
================================================
%TestComponentInDir Test component requiring temporary directory change
%   The TestComponentInDir class defines a test component that has to be run by
%   first changing to a specified directory.
%
%   The setUp method adds the starting directory to the path and then uses cd to 
%   change into the specified directory.  The tearDown method restores the
%   original path and directory.
%
%   TestComponentInDir methods:
%       TestComponentInDir - Constructor
%       setUp              - Add test directory to MATLAB path
%       tearDown           - Restore original MATLAB path
%
%   See also TestComponent

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

classdef TestComponentInDir < TestComponent
    properties (SetAccess = private, GetAccess = protected)
        %TestDirectory - Directory to change to in the test fixture
        TestDirectory
        
        %OriginalPath  - Path prior to adding the starting directory
        OriginalPath
        
        %OriginalDirectory - Starting directory
        OriginalDirectory
    end

    methods
        function self = TestComponentInDir(testDirectory)
            %TestCaseInDir Constructor
            %   TestCaseInDir(testName, testDirectory) constructs a test case 
            %   using the specified name and located in the specified directory.
            self.TestDirectory = testDirectory;
        end

        function setUp(self)
            %setUp Add test directory to MATLAB path
            %   test_case.setUp() saves the current directory in the
            %   OriginalDirectory property, saves the current path in the
            %   OriginalPath property, and then uses cd to change into the test
            %   directory.
            self.OriginalDirectory = pwd;
            self.OriginalPath = path;
            addpath(pwd);
            cd(self.TestDirectory);
        end

        function tearDown(self)
            %tearDown Restore original MATLAB path and directory
            %   test_case.tearDown() restores the original path and directory.
            cd(self.OriginalDirectory);
            path(self.OriginalPath);
        end
    end
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestRunDisplay.m
================================================
classdef TestRunDisplay < TestRunMonitor
%TestRunDisplay Print test suite execution results.
%   TestRunDisplay is a subclass of TestRunMonitor.  If a TestRunDisplay
%   object is passed to the run method of a TestComponent, such as a
%   TestSuite or a TestCase, it will print information to the Command
%   Window (or specified file handle) as the test run proceeds.
%
%   TestRunDisplay methods:
%       testComponentStarted  - Update Command Window display
%       testComponentFinished - Update Command Window display
%       testCaseFailure       - Log test failure information
%       testCaseError         - Log test error information
%
%   TestRunDisplay properties:
%       TestCaseCount         - Number of test cases executed
%       Faults                - Struct array of test fault info
%
%   See also TestRunLogger, TestRunMonitor, TestSuite

%   Steven L. Eddins
%   Copyright 2008-2010 The MathWorks, Inc.
    
    properties (SetAccess = private)
        %TestCaseCount - Number of test cases executed
        TestCaseCount
        
        %Faults - Struct array of test fault info
        %   Faults is a struct array with these fields:
        %       Type      - either 'failure' or 'error'
        %       TestCase  - the TestCase object that suffered the fault
        %       Exception - the MException thrown when the fault occurred
        Faults = struct('Type', {}, 'TestCase', {}, 'Exception', {});
        
    end
    
    properties (SetAccess = private, GetAccess = private)
        %InitialTic - Out of tic at beginning of test run
        InitialTic
        
        %InitialComponent First test component executed
        %   InitialComponent is set to the first test component executed in the
        %   test run.  This component is saved so that the end of the test run
        %   can be identified.
        InitialComponent = []   
        
    end
    
    properties (Access = protected)
        %FileHandle - Handle used by fprintf for displaying results.
        %             Default value of 1 displays to Command Window.
        FileHandle = 1
    end
        
    
    methods
        function self = TestRunDisplay(output)
            if nargin > 0
                if ischar(output)
                    self.FileHandle = fopen(output, 'w');
                    if self.FileHandle < 0
                        error('xunit:TestRunDisplay:FileOpenError', ...
                            'Could not open file "%s" for writing.', ...
                            filename);
                    end
                else
                    self.FileHandle = output;
                end
            end
        end
        
        function testComponentStarted(self, component)
            %testComponentStarted Update Command Window display
            %    If the InitialComponent property is not yet set, 
            %    obj.testComponentStarted(component) sets the property and calls
            %    obj.testRunStarted(component).
            
            if isempty(self.InitialComponent)
                self.InitialComponent = component;
                self.testRunStarted(component);
            end
        end    
            
        function testComponentFinished(self, component, did_pass)
            %testComponentFinished Update Command Window display
            %    If component is a TestCase object, then 
            %    obj.testComponentFinished(component, did_pass) prints pass/fail
            %    information to the Command Window.
            %
            %    If component is the InitialComponent, then
            %    obj.testRunFinished(did_pass) is called.
            
            if isa(component, 'TestCase')
                self.TestCaseCount = self.TestCaseCount + 1;
                if did_pass
                    fprintf(self.FileHandle, '.');
                else
                    fprintf(self.FileHandle, 'F');
                end
                line_length = 20;
                if mod(self.TestCaseCount, line_length) == 0
                    fprintf(self.FileHandle, '\n');
                end
            end
            
            if isequal(component, self.InitialComponent)
                self.testRunFinished(did_pass);
            end
        end
               
        function testCaseFailure(self, test_case, failure_exception)
            %testCaseFailure Log test failure information
            %    obj.testCaseFailure(test_case, failure_exception) logs the test
            %    case failure information.
            
            self.logFault('failure', test_case, ...
                failure_exception);
        end
        
        function testCaseError(self, test_case, error_exception)
            %testCaseError Log test error information
            %    obj.testCaseError(test_case, error_exception) logs the test
            %    case error information.
            
            self.logFault('error', test_case, ...
                error_exception);
        end
        
    end
    
    methods (Access = protected)
        function testRunStarted(self, component)
            %testRunStarted Update Command Window display
            %    obj.testRunStarted(component) displays information about the test
            %    run to the Command Window.
            
            self.InitialTic = tic;
            self.TestCaseCount = 0;
            num_cases = component.numTestCases();
            if num_cases == 1
                str = 'case';
            else
                str = 'cases';
            end
            fprintf(self.FileHandle, 'Starting test run with %d test %s.\n', ...
                num_cases, str);
        end
        
        function testRunFinished(self, did_pass)
            %testRunFinished Update Command Window display
            %    obj.testRunFinished(component) displays information about the test
            %    run results, including any test failures, to the Command Window.
            
            if did_pass
                result = 'PASSED';
            else
                result = 'FAILED';
            end
            
            fprintf(self.FileHandle, '\n%s in %.3f seconds.\n', result, toc(self.InitialTic));
            
            self.displayFaults();
        end
        

        function logFault(self, type, test_case, exception)
            %logFault Log test fault information
            %    obj.logFault(type, test_case, exception) logs test fault
            %    information. type is either 'failure' or 'error'. test_case is a
            %    TestCase object.  exception is an MException object.
            
            self.Faults(end + 1).Type = type;
            self.Faults(end).TestCase = test_case;
            self.Faults(end).Exception = exception;
        end
        
        function displayFaults(self)
            %displayFaults Display test fault info to Command Window
            %    obj.displayFaults() displays a summary of each test failure and
            %    test error to the command window.
            for k = 1:numel(self.Faults)
                faultData = self.Faults(k);
                if strcmp(faultData.Type, 'failure')
                    str = 'Failure';
                else
                    str = 'Error';
                end
                fprintf(self.FileHandle, '\n===== Test Case %s =====\nLocation: %s\nName:     %s\n\n', str, ...
                    faultData.TestCase.Location, faultData.TestCase.Name);
                displayStack(filterStack(faultData.Exception.stack), ...
                    self.FileHandle);
                fprintf(self.FileHandle, '\n%s\n', faultData.Exception.message);

                fprintf(self.FileHandle, '\n');
            end
        end
        
    end
    
end

function displayStack(stack, file_handle)
%displayStack Display stack trace from MException instance
%   displayStack(stack) prints information about an exception stack to the
%   command window. 

for k = 1:numel(stack)
    filename = stack(k).file;
    linenumber = stack(k).line;
    href = sprintf('matlab: opentoline(''%s'',%d)', filename, linenumber);
    fprintf(file_handle, '%s at <a href="%s">line %d</a>\n', filename, href, linenumber);
end
end

function new_stack = filterStack(stack)
%filterStack Remove unmeaningful stack trace calls
%    new_stack = filterStack(stack) removes from the input stack trace calls
%    that are framework functions and methods that are not likely to be
%    meaningful to the user.

% Testing stack traces follow this common pattern:
%
% 1. The first function call in the trace is often one of the assert functions
% in the framework directory.  This is useful to see.
%
% 2. The next function calls are in the user-written test functions/methods and
% the user-written code under test.  These calls are useful to see.
%
% 3. The final set of function calls are methods in the various framework
% classes.  There are usually several of these calls, which clutter up the 
% stack display without being that useful.
%
% The pattern above suggests the following stack filtering strategy: Once the
% stack trace has left the framework directory, do not follow the stack trace back
% into the framework directory.

mtest_directory = fileparts(which('runtests'));
last_keeper = numel(stack);
have_left_mtest_directory = false;
for k = 1:numel(stack)
    directory = fileparts(stack(k).file);
    if have_left_mtest_directory
        if strcmp(directory, mtest_directory)
            % Stack trace has reentered mtest directory.
            last_keeper = k - 1;
            break;
        end
    else
        if ~strcmp(directory, mtest_directory)
            have_left_mtest_directory = true;
        end
    end
end

new_stack = stack(1:last_keeper);
            
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestRunLogger.m
================================================
%TestRunLogger Collect data (silently) from running test suite
%   TestRunLogger is a subclass of TestRunMonitor uses to collect information 
%   from an executing test component (either a test case or a test suite).
%   It maintains a record of event notifications received, as well as any test
%   failures or test errors.
%
%   TestRunLogger methods:
%       testComponentStarted  - Log test component started
%       testComponentFinished - Log test component finished
%       testCaseFailure       - Log test case failure
%       testCaseError         - Log test case error
%
%   TestRunLogger properties:
%       Log          - Cell array of test notification strings
%       NumFailures  - Number of test failures during execution
%       NumErrors    - Number of test errors during execution
%       NumTestCases - Total number of test cases executed
%       Faults       - Struct array of test fault information
%
%   See also CommandWindowTestRunDisplay, TestRunMonitor, TestSuite

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

classdef TestRunLogger < TestRunMonitor

    properties (SetAccess = protected)  
        %Log Cell array of test notification strings
        %   Test notification strings include 'TestRunStarted',
        %   'TestRunFinished', 'TestComponentStarted', 'TestComponentFinished',
        %   'TestCaseFailure', and 'TestCaseError'.
        Log
        
        %NumFailures Number of test failures during execution
        NumFailures = 0
        
        %NumErrors Number of test errors during execution
        NumErrors = 0
        
        %NumTestCases Total number of test cases executed
        NumTestCases = 0
        
        %Faults Struct array of test fault information
        %   Faults is a struct array with the fields Type, TestCase, and
        %   Exception.  Type is either 'failure' or 'error'.  TestCase is the
        %   test case object that triggered the fault.  Exception is the
        %   MException object thrown during the fault.
        Faults = struct('Type', {}, 'TestCase', {}, 'Exception', {});
    end
    
    properties (SetAccess = private, GetAccess = private)
        InitialTestComponent = []
    end

    methods
        
        function testComponentStarted(self, component)
            if isempty(self.InitialTestComponent)
                self.InitialTestComponent = component;
                self.appendToLog('TestRunStarted');
            end
            
            self.appendToLog('TestComponentStarted');
            
            if isa(component, 'TestCase')
                self.NumTestCases = self.NumTestCases + 1;
            end
        end
            
        function testComponentFinished(self, component, did_pass)
            self.appendToLog('TestComponentFinished');
            
            if isequal(component, self.InitialTestComponent)
                self.appendToLog('TestRunFinished');
            end
        end
        
        function testCaseFailure(self, test_case, failure_exception)
            self.appendToLog('TestCaseFailure');
            self.NumFailures = self.NumFailures + 1;
            self.logFault('failure', test_case, ...
                failure_exception);
        end
        
        function testCaseError(self, test_case, error_exception)
            self.appendToLog('TestCaseError');
            self.NumErrors = self.NumErrors + 1;
            self.logFault('error', test_case, ...
                error_exception);
        end
    end
    
    methods (Access = private)
        function appendToLog(self, item)
            self.Log{end+1} = item;
        end
        
        function logFault(self, type, test_case, exception)
            self.Faults(end + 1).Type = type;
            self.Faults(end).TestCase = test_case;
            self.Faults(end).Exception = exception;
        end
    end
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestRunMonitor.m
================================================
%TestRunMonitor Abstract base class for monitoring a running test suite
%   The abstract TestRunMonitor class defines an object that can observe and
%   record the results of running a test suite.  The run() method of a
%   TestComponent object takes a TestRunMonitor object as an input argument.
%
%   Different test suite logging or reporting functionality can be achieved by
%   subclassing TestRunMonitor.  For example, see the TestRunLogger and the
%   CommandWindowTestRunDisplay classes.
%
%   TestRunMonitor methods:
%       TestRunMonitor        - Constructor
%       testComponentStarted  - Called at beginning of test component run
%       testComponentFinished - Called when test component run finished
%       testCaseFailure       -   Called when a test case fails
%       testCaseError         - Called when a test case causes an error
%
%   See also CommandWindowTestRunDisplay, TestRunLogger, TestCase, TestSuite

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

classdef TestRunMonitor < handle

    methods (Abstract)
        
        testComponentStarted(self, component)
            
        testComponentFinished(self, component, did_pass)
        
        testCaseFailure(self, test_case, failure_exception)
        
        testCaseError(self, test_case, error_exception)
        
    end
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestSuite.m
================================================
%TestSuite Collection of TestComponent objects
%   The TestSuite class defines a collection of TestComponent objects.
%
%   TestSuite methods:
%       TestSuite             - Constructor
%       add                   - Add test component to test suite
%       print                 - Display test suite summary to Command Window
%       run                   - Run the test suite
%       keepMatchingTestCase  - Keep only the named test component
%       fromName              - Construct test suite from directory or MATLAB function file name
%       fromTestCaseClassName - Construct test suite from TestCase class name
%       fromPackageName       - Construct test suite from package name
%       fromPwd               - Construct test suite from present directory
%
%   TestSuite properties:
%       TestComponents - Cell array of TestComponent objects
%
%   Examples
%   --------
%   Run all the test cases in the SampleTests1 class.  Display test suite
%   progress and a summary of results in the Command Window.
%
%       TestSuite('SampleTests1').run()
%
%   Construct a test suite from all test components found in the current
%   directory.
%
%       suite = TestSuite.fromPwd();
%
%   Construct a test suite from all test components found in the package
%   'mytool.tests'. (Note that the "+" character at the beginning of the package
%   folder name on disk is not part of the package name.)
%
%       suite = TestSuite.fromPackageName('mytool.tests');
%
%   Run all the test cases in the SampleTests class.  Display no output to the
%   Command Window.  Upon completion, query the number of test failures and test
%   errors.
%
%       logger = TestRunLogger();
%       TestSuite('SampleTests1').run(logger);
%       numFailures = logger.NumFailures
%       numErrors = logger.NumErrors
%
%   See also CommandWindowTestRunDisplay, TestCase, TestComponent, TestRunLogger

%   Steven L. Eddins
%   Copyright 2008-2010 The MathWorks, Inc.

classdef TestSuite < TestComponent
    
    properties (SetAccess = protected)
        TestComponents = {};
    end
    
    methods
        
        function self = TestSuite(name)
            %TestSuite Constructor
            %   suite = TestSuite constructs an empty test suite. suite =
            %   TestSuite(name) constructs a test suite by searching for test
            %   cases defined in an M-file with the specified name.
            
            if nargin >= 1
                self = TestSuite.fromName(name);
            end
        end
        
        function did_pass_out = run(self, monitor)
            %run Execute test cases in test suite
            %   did_pass = suite.run() executes all test cases in the test
            %   suite, returning a logical value indicating whether or not all
            %   test cases passed.
            
            if nargin < 2
                monitor = CommandWindowTestRunDisplay();
            end
            
            monitor.testComponentStarted(self);
            did_pass = true;
            
            self.setUp();
            
            for k = 1:numel(self.TestComponents)
                this_component_passed = self.TestComponents{k}.run(monitor);
                did_pass = did_pass && this_component_passed;
            end
            
            self.tearDown();
            
            monitor.testComponentFinished(self, did_pass);
            
            if nargout > 0
                did_pass_out = did_pass;
            end
        end
        
        function num = numTestCases(self)
            %numTestCases Number of test cases in test suite
            
            num = 0;
            for k = 1:numel(self.TestComponents)
                component_k = self.TestComponents{k};
                num = num + component_k.numTestCases();
            end
        end
        
        function print(self, numLeadingBlanks)
            %print Display test suite summary to Command Window
            %   test_suite.print() displays a summary of the test suite to the
            %   Command Window.
            
            if nargin < 2
                numLeadingBlanks = 0;
            end
            fprintf('%s%s\n', blanks(numLeadingBlanks), self.Name);
            for k = 1:numel(self.TestComponents)
                self.TestComponents{k}.print(numLeadingBlanks + ...
                    self.PrintIndentationSize);
            end
        end
        
        function add(self, component)
            %add Add test component to test suite
            %   test_suite.add(component) adds the TestComponent object to the
            %   test suite.
            
            if iscell(component)
                self.TestComponents((1:numel(component)) + end) = component;
            else
                self.TestComponents{end + 1} = component;
            end
        end
        
        function keepMatchingTestCase(self, name)
            %keepMatchingTestCase Keep only the named test component
            %   test_suite.keepMatchingTestCase(name) keeps only the test
            %   component with a matching name and discards the rest.
            
            idx = [];
            for k = 1:numel(self.TestComponents)
                if strcmp(self.TestComponents{k}.Name, name)
                    idx = k;
                    break;
                end
            end
            if isempty(idx)
                self.TestComponents = {};
            else
                self.TestComponents = self.TestComponents(idx);
            end
        end
        
    end
    
    methods (Static)
        function suite = fromTestCaseClassName(class_name)
            %fromTestCaseClassName Construct test suite from TestCase class name
            %   suite = TestSuite.fromTestCaseClassName(name) constructs a
            %   TestSuite object from the name of a TestCase subclass.
            
            if ~xunit.utils.isTestCaseSubclass(class_name)
                error('xunit:fromTestCaseClassName', ...
                    'Input string "%s" is not the name of a TestCase class.', ...
                    class_name);
            end
            
            suite = TestSuite;
            suite.Name = class_name;
            suite.Location = which(class_name);
            
            methods = getClassMethods(class_name);
            for k = 1:numel(methods)
                if methodIsConstructor(methods{k})
                    continue
                end
                
                method_name = methods{k}.Name;
                if xunit.utils.isTestString(method_name)
                    suite.add(feval(class_name, method_name));
                end
            end
            
        end
        
        function suite = fromName(name)
            %fromName Construct test suite from M-file name
            %   test_suite = TestSuite.fromName(name) constructs a TestSuite
            %   object from an M-file with the given name.  The name can be of a
            %   directory, a TestCase subclass, or an M-file containing a simple
            %   test or containing subfunction-based tests.
            %
            %   Optionally, name can contain a colon (':') followed by filter
            %   string.  The filter string is used to select a particular named
            %   test case.  For example, TestSuite.fromName('MyTests:testA')
            %   constructs a TestSuite object containing only the test case
            %   named 'testA' found in the TestCase subclass MyTests.
            
            if isdir(name)
                suite = TestSuiteInDir(name);
                suite.gatherTestCases();
                return;
            end
            
            [name, filter_string] = strtok(name, ':');
            if ~isempty(filter_string)
                filter_string = filter_string(2:end);
            end
            
            if xunit.utils.isTestCaseSubclass(name)
                suite = TestSuite.fromTestCaseClassName(name);
                
            elseif ~isempty(meta.class.fromName(name))
                % Input is the name of a class that is not a TestCase subclass.
                % Return an empty test suite.
                suite = TestSuite();
                suite.Name = name;
                
            elseif isPackage(name)
                suite = TestSuite.fromPackageName(name);
                
            else
                
                try
                    if nargout(name) == 0
                        suite = TestSuite();
                        suite.Name = name;
                        suite.add(FunctionHandleTestCase(str2func(name), [], []));
                        suite.Location = which(name);
                        
                    else
                        suite = feval(name);
                        if ~isa(suite, 'TestSuite')
                            error('Function did not return a TestSuite object.');
                        end
                    end
                    
                catch
                    % Ordinary function does not appear to contain tests.
                    % Return an empty test suite.
                    suite = TestSuite();
                    suite.Name = name;
                end
            end
            
            if ~isempty(filter_string)
                suite.keepMatchingTestCase(filter_string);
            end
        end
        
        function test_suite = fromPwd()
            %fromPwd Construct test suite from present directory
            %   test_suite = TestSuite.fromPwd() constructs a TestSuite object
            %   from all the test components in the present working directory.
            %   all TestCase subclasses will be found, as well as simple and
            %   subfunction-based M-file tests beginning with the string 'test'
            %   or 'Test'.
            
            test_suite = TestSuite();
            test_suite.Name = pwd;
            test_suite.Location = pwd;
            
            mfiles = dir(fullfile('.', '*.m'));
            for k = 1:numel(mfiles)
                [path, name] = fileparts(mfiles(k).name);
                if xunit.utils.isTestCaseSubclass(name)
                    test_suite.add(TestSuite.fromTestCaseClassName(name));
                elseif xunit.utils.isTestString(name)
                    suite_k = TestSuite.fromName(name);
                    if ~isempty(suite_k.TestComponents)
                        test_suite.add(suite_k);
                    end
                end
            end
        end
        
        function test_suite = fromPackageName(name)
            %fromPackageName Construct test suite from package name
            %   test_suite = TestSuite.fromPackageName(name) constructs a
            %   TestSuite object from all the test components found in the
            %   specified package.

            package_info = meta.package.fromName(name);
            if isempty(package_info)
                error('xunit:fromPackageName:invalidName', ...
                    'Input string "%s" is not the name of a package.', ...
                    name);
            end
            test_suite = TestSuite();
            test_suite.Name = name;
            test_suite.Location = 'Package';
            
            for k = 1:numel(package_info.Packages)
                pkg_name = package_info.Packages{k}.Name;
                pkg_suite = TestSuite.fromPackageName(pkg_name);
                if ~isempty(pkg_suite.TestComponents)
                    test_suite.add(TestSuite.fromPackageName(pkg_name));
                end
            end
            
            class_names = cell(1, numel(package_info.Classes));
            for k = 1:numel(package_info.Classes)
                class_name = package_info.Classes{k}.Name;
                class_names{k} = class_name;
                if xunit.utils.isTestCaseSubclass(class_name)
                    test_suite.add(TestSuite.fromTestCaseClassName(class_name));
                end
            end
            
            for k = 1:numel(package_info.Functions)
                function_name = package_info.Functions{k}.Name;
                if xunit.utils.isTestString(function_name)
                    full_function_name = [package_info.Name '.' package_info.Functions{k}.Name];
                    if ~ismember(full_function_name, class_names)
                        suite_k = TestSuite.fromName(full_function_name);
                        if ~isempty(suite_k.TestComponents)
                            test_suite.add(suite_k);
                        end
                    end
                end
            end
        end
    end
end

function tf = isPackage(name)
tf = ~isempty(meta.package.fromName(name));
end

function methods = getClassMethods(class_name)
class_meta = meta.class.fromName(class_name);
methods = class_meta.Methods;
end

function result = methodIsConstructor(method)
method_name = method.Name;
if ~isempty(method.DefiningClass.ContainingPackage)
    method_name = [method.DefiningClass.ContainingPackage.Name, '.', ...
        method_name];
end
result = strcmp(method_name, method.DefiningClass.Name);
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/TestSuiteInDir.m
================================================
%TestSuiteInDir Test suite requiring temporary directory change
%   The TestSuiteInDir class defines a test suite that has to be run by first
%   changing to a specified directory.
%
%   The setUp method adds the starting directory to the path and then uses cd to
%   change into the specified directory.  The tearDown method restores the
%   original path and directory.
%
%   TestSuiteInDir methods:
%       TestSuiteInDir  - Constructor
%       gatherTestCases - Add test cases found in the target directory
%
%   See also TestSuite

%   Steven L. Eddins
%   Copyright 2009 The MathWorks, Inc.

classdef TestSuiteInDir < TestSuite & TestComponentInDir
    
    methods
        function self = TestSuiteInDir(testDirectory)
            %TestCaseInDir Constructor
            %   TestCaseInDir(testName, testDirectory) constructs a test case
            %   using the specified name and located in the specified directory.
            self = self@TestComponentInDir(testDirectory);
            
            if strcmp(testDirectory, '.')
                self.Name = pwd;
                self.Location = pwd;
            else
                [pathstr, name] = fileparts(testDirectory);
                self.Name = name;
                self.Location = testDirectory;
            end
        end
        
        function gatherTestCases(self)
            %gatherTestCases Add test cases found in the target directory
            %   suite.gatherTestCases() automaticall finds all the test cases in
            %   the directory specified in the constructor call and adds them to
            %   the suite.
            current_dir = pwd;
            c = onCleanup(@() cd(current_dir));
            
            cd(self.TestDirectory);
            tmp = TestSuite.fromPwd();
            self.TestComponents = tmp.TestComponents;
        end
    end
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/VerboseTestRunDisplay.m
================================================
classdef VerboseTestRunDisplay < TestRunDisplay
%VerboseTestRunDisplay Print test suite execution results.
%   VerboseTestRunDisplay is a subclass of
%   TestRunDisplay.  It supports the -verbose option of runtests.
%
%   Overriddent methods:
%       testComponentStarted  - Update Command Window display
%       testComponentFinished - Update Command Window display
%       testRunFinished       - Update Command Window display at end of run
%
%   See also TestRunDisplay, TestRunLogger, TestRunMonitor, TestSuite

%   Steven L. Eddins
%   Copyright 2010 The MathWorks, Inc.     
    
    properties (SetAccess = private, GetAccess = private)
        TicStack = uint64([])
    end
    
    methods
        function self = VerboseTestRunDisplay(output)
            if nargin < 1
                output = 1;
            end
            
            self = self@TestRunDisplay(output);
        end
        
        function testComponentStarted(self, component)
            %testComponentStarted Update Command Window display
            
            self.pushTic();
            
            if ~isa(component, 'TestCase')
                fprintf(self.FileHandle, '\n');
            end
            
            fprintf(self.FileHandle, '%s%s', self.indentationSpaces(), component.Name);
            
            if ~isa(component, 'TestCase')
                fprintf(self.FileHandle, '\n');
            else
                fprintf(self.FileHandle, ' %s ', self.leaderDots(component.Name));
            end
        end    
            
        function testComponentFinished(self, component, did_pass)
            %testComponentFinished Update Command Window display

            if ~isa(component, 'TestCase')
                fprintf(self.FileHandle, '%s%s %s ', self.indentationSpaces(), component.Name, ...
                    self.leaderDots(component.Name));
            end
            
            component_run_time = toc(self.popTic());
            
            if did_pass
                fprintf(self.FileHandle, 'passed in %12.6f seconds\n', component_run_time);
            else
                fprintf(self.FileHandle, 'FAILED in %12.6f seconds\n', component_run_time);
            end
            
            if ~isa(component, 'TestCase')
                fprintf(self.FileHandle, '\n');
            end
            
            if isempty(self.TicStack)
                self.testRunFinished();
            end
                
        end
        
    end
    
    methods (Access = protected)
        function testRunFinished(self)
            %testRunFinished Update Command Window display
            %    obj.testRunFinished(component) displays information about the test
            %    run results, including any test failures, to the Command
            %    Window.
            
            self.displayFaults();
        end
    end
    
    methods (Access = private)
        function pushTic(self)
            self.TicStack(end+1) = tic;
        end
        
        function t1 = popTic(self)
            t1 = self.TicStack(end);
            self.TicStack(end) = [];
        end
        
        function str = indentationSpaces(self)
            str = repmat(' ', 1, self.numIndentationSpaces());
        end
        
        function n = numIndentationSpaces(self)
            indent_level = numel(self.TicStack) - 1;
            n = 3 * indent_level;
        end
        
        function str = leaderDots(self, name)
            num_dots = max(0, 60 - self.numIndentationSpaces() - numel(name));
            str = repmat('.', 1, num_dots);
        end
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertElementsAlmostEqual.m
================================================
function assertElementsAlmostEqual(varargin)
%assertElementsAlmostEqual Assert floating-point array elements almost equal.
%   assertElementsAlmostEqual(A, B, tol_type, tol, floor_tol) asserts that all
%   elements of floating-point arrays A and B are equal within some tolerance.
%   tol_type can be 'relative' or 'absolute'.  tol and floor_tol are scalar
%   tolerance values.
%
%   If the tolerance type is 'relative', then the tolerance test used is:
%
%       all( abs(A(:) - B(:)) <= tol * max(abs(A(:)), abs(B(:))) + floor_tol )
%
%   If the tolerance type is 'absolute', then the tolerance test used is:
%
%       all( abs(A(:) - B(:)) <= tol )
%
%   tol_type, tol, and floor_tol are all optional.  The default value for
%   tol_type is 'relative'.  If both A and B are double, then the default value
%   for tol and floor_tol is sqrt(eps).  If either A or B is single, then the
%   default value for tol and floor_tol is sqrt(eps('single')).
%
%   If A or B is complex, then the tolerance test is applied independently to
%   the real and imaginary parts.
%
%   Corresponding elements in A and B that are both NaN, or are both infinite
%   with the same sign, are considered to pass the tolerance test.
%
%   assertElementsAlmostEqual(A, B, ..., msg) prepends the string msg to the
%   output message if A and B fail the tolerance test.

%   Steven L. Eddins
%   Copyright 2008-2010 The MathWorks, Inc.

params = xunit.utils.parseFloatAssertInputs(varargin{:});

if ~isequal(size(params.A), size(params.B))
    message = xunit.utils.comparisonMessage(params.Message, ...
        'Inputs are not the same size.', ...
        params.A, params.B);
    throwAsCaller(MException('assertElementsAlmostEqual:sizeMismatch', ...
        '%s', message));
end

if ~(isfloat(params.A) && isfloat(params.B))
    message = xunit.utils.comparisonMessage(params.Message, ...
        'Inputs are not both floating-point.', ...
        params.A, params.B);
    throwAsCaller(MException('assertElementsAlmostEqual:notFloat', ...
        '%s', message));
end

if ~xunit.utils.compareFloats(params.A, params.B, 'elementwise', ...
        params.ToleranceType, params.Tolerance, params.FloorTolerance)
    
    tolerance_message = sprintf('Input elements are not all equal within %s tolerance: %g', ...
        params.ToleranceType, params.Tolerance);
    message = xunit.utils.comparisonMessage(params.Message, tolerance_message, ...
        params.A, params.B);
    
    throwAsCaller(MException('assertElementsAlmostEqual:tolExceeded', ...
        '%s', message));
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertEqual.m
================================================
function assertEqual(A, B, custom_message)
%assertEqual Assert that inputs are equal
%   assertEqual(A, B) throws an exception if A and B are not equal.  A and B
%   must have the same class and sparsity to be considered equal.
%
%   assertEqual(A, B, MESSAGE) prepends the string MESSAGE to the assertion
%   message if A and B are not equal.
%
%   Examples
%   --------
%   % This call returns silently.
%   assertEqual([1 NaN 2], [1 NaN 2]);
%
%   % This call throws an error.
%   assertEqual({'A', 'B', 'C'}, {'A', 'foo', 'C'});
%
%   See also assertElementsAlmostEqual, assertVectorsAlmostEqual

%   Steven L. Eddins
%   Copyright 2008-2010 The MathWorks, Inc.

if nargin < 3
    custom_message = '';
end

if ~ (issparse(A) == issparse(B))
    message = xunit.utils.comparisonMessage(custom_message, ...
        'One input is sparse and the other is not.', A, B);
    throwAsCaller(MException('assertEqual:sparsityNotEqual', '%s', message));
end

if ~strcmp(class(A), class(B))
    message = xunit.utils.comparisonMessage(custom_message, ...
        'The inputs differ in class.', A, B);
    throwAsCaller(MException('assertEqual:classNotEqual', '%s', message));
end

if ~isequalwithequalnans(A, B)
    message = xunit.utils.comparisonMessage(custom_message, ...
        'Inputs are not equal.', A, B);
    throwAsCaller(MException('assertEqual:nonEqual', '%s', message));
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertExceptionThrown.m
================================================
function assertExceptionThrown(f, expectedId, custom_message)
%assertExceptionThrown Assert that specified exception is thrown
%   assertExceptionThrown(F, expectedId) calls the function handle F with no
%   input arguments.  If the result is a thrown exception whose identifier is
%   expectedId, then assertExceptionThrown returns silently.  If no exception is
%   thrown, then assertExceptionThrown throws an exception with identifier equal
%   to 'assertExceptionThrown:noException'.  If a different exception is thrown,
%   then assertExceptionThrown throws an exception identifier equal to
%   'assertExceptionThrown:wrongException'.
%
%   assertExceptionThrown(F, expectedId, msg) prepends the string msg to the
%   assertion message.
%
%   Example
%   -------
%   % This call returns silently.
%   f = @() error('a:b:c', 'error message');
%   assertExceptionThrown(f, 'a:b:c');
%
%   % This call returns silently.
%   assertExceptionThrown(@() sin, 'MATLAB:minrhs');
%
%   % This call throws an error because calling sin(pi) does not error.
%   assertExceptionThrown(@() sin(pi), 'MATLAB:foo');

%   Steven L. Eddins
%   Copyright 2008-2010 The MathWorks, Inc.

noException = false;
try
    f();
    noException = true;
    
catch exception
    if ~strcmp(exception.identifier, expectedId)
        message = sprintf('Expected exception %s but got exception %s.', ...
            expectedId, exception.identifier);
        if nargin >= 3
            message = sprintf('%s\n%s', custom_message, message);
        end
        throwAsCaller(MException('assertExceptionThrown:wrongException', ...
            '%s', message));
    end
end

if noException
    message = sprintf('Expected exception "%s", but none thrown.', ...
        expectedId);
    if nargin >= 3
        message = sprintf('%s\n%s', custom_message, message);
    end
    throwAsCaller(MException('assertExceptionThrown:noException', '%s', message));
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertFalse.m
================================================
function assertFalse(condition, message)
%assertFalse Assert that input condition is false
%   assertFalse(CONDITION, MESSAGE) throws an exception containing the string
%   MESSAGE if CONDITION is not false.
%
%   MESSAGE is optional.
%
%   Examples
%   --------
%   assertFalse(isreal(sqrt(-1)))
%
%   assertFalse(isreal(sqrt(-1)), ...
%       'Expected isreal(sqrt(-1)) to be false.')
%
%   See also assertTrue

%   Steven L. Eddins
%   Copyright 2008-2010 The MathWorks, Inc.

if nargin < 2
   message = 'Asserted condition is not false.';
end

if ~isscalar(condition) || ~islogical(condition)
   throwAsCaller(MException('assertFalse:invalidCondition', ...
      'CONDITION must be a scalar logical value.'));
end

if condition
   throwAsCaller(MException('assertFalse:trueCondition', '%s', message));
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertFilesEqual.m
================================================
function assertFilesEqual(filename1, filename2, user_message)
%assertFilesEqual Assert that files contain the same contents.
%   assertFilesEqual(filename1, filename2) throws an exception if the two
%   specified files do not contain the same contents.
%
%   assertFilesEqual(filename1, filename2, message) prepends the specified
%   message string to the assertion message.

%   Steven L. Eddins
%   Copyright 2009-2010 The MathWorks, Inc.

if nargin < 3
    user_message = '';
end

fid1 = fopen(filename1, 'r');
if (fid1 < 0)
    message = sprintf('%s\nCould not open file for reading: %s', ...
        user_message, filename1);
    throwAsCaller(MException('assertFilesEqual:readFailure', ...
        '%s', message));
else
    c1 = onCleanup(@() fclose(fid1));
end

fid2 = fopen(filename2, 'r');
if (fid2 < 0)
    message = sprintf('%s\nCould not open file for reading: %s', ...
        user_message, filename2);
    throwAsCaller(MException('assertFilesEqual:readFailure', '%s', message));
else
    c2 = onCleanup(@() fclose(fid2));
end

block_size = 100000;
num_blocks = 0;
done = false;
while ~done
    block_from_file1 = fread(fid1, block_size, '*uint8');
    block_from_file2 = fread(fid2, block_size, '*uint8');
    
    if numel(block_from_file1) ~= numel(block_from_file2)
        fseek(fid1, 0, 'eof');
        fseek(fid2, 0, 'eof');
        message = sprintf('The two files are not the same size. File "%s" has %d bytes and file "%s" has %d bytes', ...
            filename1, ftell(fid1), filename2, ftell(fid2));
        if ~isempty(user_message)
            message = sprintf('%s\n%s', user_message, message);
        end
        throwAsCaller(MException('assertFilesEqual:sizeMismatch', '%s', message));
    end
    
    if ~isequal(block_from_file1, block_from_file2)
        first_difference_in_block = find(block_from_file1 ~= block_from_file2);
        first_difference = num_blocks * block_size + first_difference_in_block;
        
        message = sprintf('Files are not equal. First difference is at byte %d, where file "%s" contains 0x%X and file "%s" contains 0x%X', ...
            first_difference, filename1, block_from_file1(first_difference_in_block), ...
            filename2, block_from_file2(first_difference_in_block));
        if ~isempty(user_message)
            message = sprintf('%s\n%s', user_message, message);
        end
        throwAsCaller(MException('assertFilesEqual:valuesDiffer', '%s', message));
    end
    
    done = numel(block_from_file1) < block_size;
    num_blocks = num_blocks + 1;
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertTrue.m
================================================
function assertTrue(condition, message)
%assertTrue Assert that input condition is true
%   assertTrue(CONDITION, MESSAGE) throws an exception containing the string
%   MESSAGE if CONDITION is not true.
%
%   MESSAGE is optional.
%
%   Examples
%   --------
%   % This call returns silently.
%   assertTrue(rand < 1, 'Expected output of rand to be less than 1')
%
%   % This call throws an error.
%   assertTrue(sum(sum(magic(3))) == 0, ...
%       'Expected sum of elements of magic(3) to be 0')
%
%   See also assertEqual, assertFalse

%   Steven L. Eddins
%   Copyright 2008-2010 The MathWorks, Inc.

if nargin < 2
   message = 'Asserted condition is not true.';
end

if ~isscalar(condition) || ~islogical(condition)
   throwAsCaller(MException('assertTrue:invalidCondition', ...
      'CONDITION must be a scalar logical value.'));
end

if ~condition
   throwAsCaller(MException('assertTrue:falseCondition', '%s', message));
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/assertVectorsAlmostEqual.m
================================================
function assertVectorsAlmostEqual(varargin)
%assertVectorsAlmostEqual Assert floating-point vectors almost equal in norm sense.
%   assertVectorsAlmostEqual(A, B, tol_type, tol, floor_tol) asserts that the
%   vectors A and B are equal, in the L2-norm sense and within some tolerance.
%   tol_type can be 'relative' or 'absolute'.  tol and floor_tol are scalar
%   tolerance values.
%
%   If the tolerance type is 'relative', then the tolerance test used is:
%
%       all( norm(A - B) <= tol * max(norm(A), norm(B)) + floor_tol )
%
%   If the tolerance type is 'absolute', then the tolerance test used is:
%
%       all( norm(A - B) <= tol )
%
%   tol_type, tol, and floor_tol are all optional.  The default value for
%   tol_type is 'relative'.  If both A and B are double, then the default value
%   for tol and floor_tol is sqrt(eps).  If either A or B is single, then the
%   default value for tol and floor_tol is sqrt(eps('single')).
%
%   If A or B is complex, then the tolerance test is applied independently to
%   the real and imaginary parts.
%
%   Any infinite or NaN element of A or B will cause an assertion failure.
%
%   assertVectorsAlmostEqual(A, B, ..., msg) prepends the string msg to the
%   assertion message if A and B fail the tolerance test.

%   Steven L. Eddins
%   Copyright 2008-2010 The MathWorks, Inc.

params = xunit.utils.parseFloatAssertInputs(varargin{:});

if ~isequal(size(params.A), size(params.B))
    message = xunit.utils.comparisonMessage(params.Message, ...
        'Inputs are not the same size.', ...
        params.A, params.B);
    throwAsCaller(MException('assertVectorsAlmostEqual:sizeMismatch', ...
        '%s', message));
end

if ~(isfloat(params.A) && isfloat(params.B))
    message = xunit.utils.comparisonMessage(params.Message, ...
        'Inputs are not both floating-point.', ...
        params.A, params.B);
    throwAsCaller(MException('assertVectorsAlmostEqual:notFloat', ...
        '%s', message));
end

if ~xunit.utils.compareFloats(params.A, params.B, 'vector', ...
        params.ToleranceType, params.Tolerance, params.FloorTolerance)
    
    tolerance_message = sprintf('Inputs are not equal within %s vector tolerance: %g', ...
        params.ToleranceType, params.Tolerance);
    message = xunit.utils.comparisonMessage(params.Message, tolerance_message, ...
        params.A, params.B);
    throwAsCaller(MException('assertVectorsAlmostEqual:tolExceeded', ...
        '%s', message));
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/initTestSuite.m
================================================
%findSubfunctionTests Utility script used for subfunction-based tests
%   This file is a script that is called at the top of M-files containing
%   subfunction-based tests.
%
%   The top of a typical M-file using this script looks like this:
%
%       function test_suite = testFeatureA
%
%       findSubfunctionTests;
%
%   IMPORTANT NOTE
%   --------------
%   The output variable name for an M-file using this script must be test_suite.

%   Steven L. Eddins
%   Copyright 2008-2009 The MathWorks, Inc.

[ST,I] = dbstack('-completenames');
caller_name = ST(I + 1).name;
caller_file = ST(I + 1).file;
subFcns = which('-subfun', caller_file);

setup_fcn_name = subFcns(xunit.utils.isSetUpString(subFcns));
if numel(setup_fcn_name) > 1
    error('findSubfunctionTests:tooManySetupFcns', ...
        'Found more than one setup subfunction.')
elseif isempty(setup_fcn_name)
    setup_fcn = [];
else
    setup_fcn = str2func(setup_fcn_name{1});
end

teardown_fcn_name = subFcns(xunit.utils.isTearDownString(subFcns));
if numel(teardown_fcn_name) > 1
    error('findSubfunctionTests:tooManyTeardownFcns', ...
        'Found more than one teardown subfunction.')
elseif isempty(teardown_fcn_name)
    teardown_fcn = [];
else
    teardown_fcn = str2func(teardown_fcn_name{1});
end

test_fcns = cellfun(@str2func, subFcns(xunit.utils.isTestString(subFcns)), ...
    'UniformOutput', false);

suite = TestSuite;
suite.Name = caller_name;
suite.Location = which(caller_file);
for k = 1:numel(test_fcns)
    suite.add(FunctionHandleTestCase(test_fcns{k}, setup_fcn, teardown_fcn));
end

if nargout > 0
    test_suite = suite;
else
    suite.run();
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/matlab_xunit/xunit/runtests.m
================================================
function out = runtests(varargin)
%runtests Run unit tests
%   runtests runs all the test cases that can be found in the current directory
%   and summarizes the results in the Command Window.
%
%   Test cases can be found in the following places in the current directory:
%
%       * An M-file function whose name starts or ends with "test" or
%         "Test" and that returns no output arguments.
%
%       * An M-file function whose name starts or ends with "test" or
%         "Test" and that contains subfunction tests and uses the
%         initTestSuite script to return a TestSuite object.
%
%       * An M-file defining a subclass of TestCase.
%
%   runtests(dirname) runs all the test cases found in the specified directory.
%
%   runtests(packagename) runs all the test cases found in the specified
%   package. (This option requires R2009a or later).
%
%   runtests(mfilename) runs test cases found in the specified function or class
%   name. The function or class needs to be in the current directory or on the
%   MATLAB path.
%
%   runtests('mfilename:testname') runs the specific test case named 'testname'
%   found in the function or class 'name'.
%
%   Multiple directories or file names can be specified by passing multiple
%   names to runtests, as in runtests(name1, name2, ...) or
%   runtests({name1, name2, ...}, ...)
%
%   runtests(..., '-verbose') displays the name and result, result, and time
%   taken for each test case to the Command Window.
%
%   runtests(..., '-logfile', filename) directs the output of runtests to
%   the specified log file instead of to the Command Window.
%
%   out = runtests(...) returns a logical value that is true if all the
%   tests passed.
%
%   Examples
%   --------
%   Find and run all the test cases in the current directory.
%
%       runtests
%
%   Find and run all the test cases in the current directory. Display more
%   detailed information to the Command Window as the test cases are run.
%
%       runtests -verbose
%
%   Save verbose runtests output to a log file.
%
%       runtests -verbose -logfile my_test_log.txt
%
%   Find and run all the test cases contained in the M-file myfunc.
%
%       runtests myfunc
%
%   Find and run all the test cases contained in the TestCase subclass
%   MyTestCase.
%
%       runtests MyTestCase
%
%   Run the test case named 'testFeature' contained in the M-file myfunc.
%
%       runtests myfunc:testFeature
%
%   Run all the tests in a specific directory.
%
%       runtests c:\Work\MyProject\tests
%
%   Run all the tests in two directories.
%
%       runtests c:\Work\MyProject\tests c:\Work\Book\tests

%   Steven L. Eddins
%   Copyright 2009-2010 The MathWorks, Inc.

verbose = false;
logfile = '';
if nargin < 1
    suite = TestSuite.fromPwd();
else
    [name_list, verbose, logfile] = getInputNames(varargin{:});
    if numel(name_list) == 0
        suite = TestSuite.fromPwd();
    elseif numel(name_list) == 1
        suite = TestSuite.fromName(name_list{1});
    else
        suite = TestSuite();
        for k = 1:numel(name_list)
            suite.add(TestSuite.fromName(name_list{k}));
        end
    end
end

if isempty(suite.TestComponents)
    error('xunit:runtests:noTestCasesFound', 'No test cases found.');
end

if isempty(logfile)
    logfile_handle = 1; % File handle corresponding to Command Window
else
    logfile_handle = fopen(logfile, 'w');
    if logfile_handle < 0
        error('xunit:runtests:FileOpenFailed', ...
            'Could not open "%s" for writing.', logfile);
    else
        cleanup = onCleanup(@() fclose(logfile_handle));
    end
end

fprintf(logfile_handle, 'Test suite: %s\n', suite.Name);
if ~strcmp(suite.Name, suite.Location)
    fprintf(logfile_handle, 'Test suite location: %s\n', suite.Location);
end
fprintf(logfile_handle, '%s\n\n', datestr(now));

if verbose
    monitor = VerboseTestRunDisplay(logfile_handle);
else
    monitor = TestRunDisplay(logfile_handle);
end
did_pass = suite.run(monitor);

if nargout > 0
    out = did_pass;
end

function [name_list, verbose, logfile] = getInputNames(varargin)
name_list = {};
verbose = false;
logfile = '';
k = 1;
while k <= numel(varargin)
    arg = varargin{k};
    if iscell(arg)
        name_list = [name_list; arg];
    elseif ~isempty(arg) && (arg(1) == '-')
        if strcmp(arg, '-verbose')
            verbose = true;
        elseif strcmp(arg, '-logfile')
            if k == numel(varargin)
                error('xunit:runtests:MissingLogfile', ...
                    'The option -logfile must be followed by a filename.');
            else
                logfile = varargin{k+1};
                k = k + 1;
            end
        else
            warning('runtests:unrecognizedOption', 'Unrecognized option: %s', arg);
        end
    else
        name_list{end+1} = arg;
    end
    k = k + 1;
end
    

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/assertEqual.m
================================================
function [] = assertEqual(a, b)
  if (a != b)
    testFailed;
  end

function [] = testFailed()
  [ST, I] = dbstack(2);
  disp(strcat("FAILED: ",  ST(1).name));


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/assertVectorsAlmostEqual.m
================================================
function [] = assertVectorsAlmostEqual(a, b, comparetype, tolerance)
  if (max(abs(reshape(a-b,[],1))) > tolerance)
    testFailed(a,b);
  elseif (min(size(a) == size(b)) < 1)
    testFailed(a,b);
  end

function [] = testFailed(a, b)
  [ST, I] = dbstack(2);
  disp(strcat("FAILED: ",  ST(1).name));
  disp(a)
  disp("--")
  disp(b)


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/runtests.m
================================================
path('../../bin', path)
test_mdwt
test_midwt
test_mirdwt
test_mrdwt
test_makesig
test_denoise
test_setopt


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_denoise.m
================================================
function test_denoise
  disp("denoise")
  test_denoise_default
  test_denoise_2d
  test_denoise_threshold_low
  test_denoise_thresh_multiplier
  test_denoise_std
  test_denoise_hard
  test_denoise_levels
  test_denoise_actual_thresh
  
function test_denoise_default
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h);
  signal_denoised_corr = [0.0741827688375062 0.0791701902526268 0.0760842615272340 0.0750476831774179 0.111279774779568 0.163475053283544 -0.0498263815350539 0.0946073088237311 0.135126562486911 -0.0186090620958193 -0.0748812479991294 -0.103470206059426 0.0234254843251780 0.239772540836257 0.0920583398962312 -0.152180640366891 -0.116682073306156 -0.0459389850762785 -0.00245240039778375 0.0755739164104836 0.102548333512214 0.121099911744184 0.177390507921620 0.240386041553093 0.231105933317157 0.198210924493273 0.175672812990725 0.138822049613034 0.127491615387826 0.121409597186325 0.0994935320130783 0.0760019340865427];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_2d
  x = [1 2 3 4; 5 6 7 8 ; 9 10.09 11 12; 13 13.91 15 16];
  h = daubcqf(4);
  [signal_denoised, subtracted_noise, actual_options] = denoise(x, h);
  signal_denoised_corr = [1.093495801587334   2.052784169768518   3.036985129109070   4.014510779767102;  5.037416383975946   6.006178652683398   6.994963120759174   7.978382656683513;  9.047593546684929  10.003998510025589  10.977825887256145  11.94698494275469; 13.009489364401729  13.937038667522501  14.939852728547271  15.9224996584731398];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_threshold_low
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [1 3.0 0 2 0 0]);
  signal_denoised_corr = [0.0187742354278351 0.0237616568429558 0.0206757281175629 0.0196391497677469 0.0558712413698966 0.108066519873873 -0.105234914944725 0.0391987754140600 0.0797180290772401 -0.0740175955054904 -0.130289781408801 -0.158878739469097 -0.0319830490844931 0.184364007426586 0.0366498064865601 -0.207589173776562 -0.172090606715827 -0.101347518485950 -0.0578609338074549 0.0201653830008125 0.0471398001025425 0.0656913783345127 0.121981974511949 0.184977508143422 0.175697399907486 0.142802391083602 0.120264279581054 0.0834135162033633 0.0720830819781554 0.0660010637766539 0.0440849986034073 0.0205934006768717];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_thresh_multiplier
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [1 3.5 0 2 0 0]);
  signal_denoised_corr = [0.00563527074803461 0.0110853052404048 0.0101590193471916 0.0116789518546074 0.0354625658443208 0.0691904606426981 -0.0647010252187970 0.0393485097012034 0.0302297746478269 -0.0658230296401878 -0.0947938063374137 -0.147943151851009 -0.0355607514547514 0.143027827800490 0.0126752977970079 -0.200577663821584 -0.149059259007655 -0.0564432101940217 -0.0281365070661950 0.0201021371871464 0.0438412772787373 0.0596866399869512 0.0967101937989458 0.136451641917565 0.130716307107088 0.109146914388131 0.0925200849653435 0.0657607417363412 0.0550584910898860 0.0469636231448182 0.0277268486177313 0.00667135407398081];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_std
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 1 2 0 0]);
  signal_denoised_corr = [0.0686926069658060 0.0706216045196474 0.0719769032529757 0.0743568305131058 0.0754251996534692 0.0763549103855611 0.0783972750744446 0.0807092136475563 0.0763109954998047 0.0693017683604205 0.0628697537191382 0.0547492531677562 0.0755519478401559 0.107931256046656 0.0859959791464885 0.0494376118339224 0.0602059364595448 0.0785077229738383 0.0791999606842265 0.0809410605777517 0.0844652184548917 0.0873749084881920 0.0911535278085727 0.0952027332951270 0.0936316016468421 0.0898878427420561 0.0866734185917041 0.0820709685744921 0.0793481432323076 0.0768306965269240 0.0727995727792393 0.0684196591566048];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_hard
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 1 0 0]);
  signal_denoised_corr = [0.0977394160103721 0.0994161560983385 0.0832447407807381 0.0666983311697188 0.177420971595413 0.340230583897110 -0.354597069671295 0.0250017872275015 0.394418485343238 -0.0595745304374512 -0.452401570793399 -0.175707560852101 -0.00622320325130765 0.437867065411816 0.187485346584306 -0.241060664687049 -0.306285896120773 -0.373946536466370 -0.246165924475657 0.00210496326791051 0.0528629966064817 0.0967383656953347 0.275410693617439 0.487298926169970 0.454985253718689 0.348603331393631 0.288205743942248 0.186806596496260 0.172147260405660 0.180050851714681 0.142136445826288 0.104484725401481];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_levels
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 2 4 0]);
  signal_denoised_corr = [0.164259992817262 0.156379071218712 0.142212685671703 0.125038963573761 0.150297815252073 0.191536767978636 -0.0381639580765735 0.0881092032192094 0.119629284458486 -0.0406090725365491 -0.105645426731493 -0.141820831994602 -0.0280318977202704 0.173171960129832 0.0117537437282443 -0.247115729957293 -0.206759297285911 -0.123147866042363 -0.0685808245422524 0.0255826360141400 0.0635302930397082 0.0930381970490923 0.165728084463140 0.246884147157615 0.246603211345582 0.220210934934003 0.206436991723089 0.177172675548210 0.178948997433275 0.188010177892750 0.179798128181065 0.170937023676945];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_actual_thresh
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 2 0 0.5]);
  signal_denoised_corr = [0.0607099183942295 0.0654351521193524 0.0684154759800610 0.0742018934148454 0.0758845005390013 0.0769511530643110 0.0810856606730252 0.0858023375316036 0.0704706443350518 0.0472060906047587 0.0254329679518446 -0.00154590940405266 0.0598455182579352 0.156556707841878 0.0864272987162393 -0.0287835335280487 0.00606017120154721 0.0659592575432934 0.0713958080495586 0.0812891735076492 0.0953701981347179 0.107554576791239 0.123739146895592 0.141180422640726 0.137085044622601 0.124838366760086 0.114852957437233 0.0997294000571788 0.0922174665178409 0.0857758976557685 0.0737052631031342 0.0605470542090229];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_makesig.m
================================================
function test_suite = test_makesig
  disp("makesig")
  test_makesig_heavisine
  test_makesig_bumps
  test_makesig_blocks
  test_makesig_doppler
  test_makesig_ramp
  test_makesig_cusp
  test_makesig_sing
  test_makesig_hisine
  test_makesig_losine
  test_makesig_linchirp
  test_makesig_twochirp
  test_makesig_quadchirp
  test_makesig_mishmash
  test_makesig_wernersorrows
  test_makesig_leopold

function test_makesig_heavisine
  x = makesig('HeaviSine', 8);
  y = [4.0000    0.0000   -6.0000   -2.0000    2.0000    0.0000   -4.0000   -0.0000];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_bumps
  x = makesig('Bumps', 8);
  y = [0.3206    5.0527    0.3727    0.0129    0.0295    0.0489    0.0004    0.0000];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_blocks
  x = makesig('Blocks', 8);
  y = [4.0000    0.5000    3.0000    0.9000    0.9000    5.2000   -0.0000   -0.0000];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_doppler
  x = makesig('Doppler', 12);
  y = [-0.1954 -0.3067 0.0000 -0.4703 0.4930 -0.2703 -0.4127 0.1025 0.4001 0.3454 0.1425 0];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_ramp
  x = makesig('Ramp', 8);
  y = [0.1250    0.2500   -0.6250   -0.5000   -0.3750   -0.2500   -0.1250         0];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_cusp
  x = makesig('Cusp', 8);
  y = [0.4950    0.3464    0.0707    0.3606    0.5050    0.6164    0.7106    0.7937];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_sing
  x = makesig('Sing', 8);
  y = [5.3333   16.0000   16.0000    5.3333    3.2000    2.2857    1.7778    1.4545];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_hisine
  x = makesig('HiSine', 8);
  y = [0.8267   -0.9302    0.2200    0.6827   -0.9882    0.4292    0.5053   -0.9977];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_losine
  x = makesig('LoSine', 8);
  y = [0.8660    0.8661    0.0003   -0.8658   -0.8663   -0.0006    0.8657    0.8664];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_linchirp
  x = makesig('LinChirp', 8);
  y = [0.0491    0.1951    0.4276    0.7071    0.9415    0.9808    0.6716    0.0000];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_twochirp
  x = makesig('TwoChirp', 8);
  y = [0.5132    1.5000    0.5412    0.8660   -0.5132         0    0.5132    0.8660];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_quadchirp
  x = makesig('QuadChirp', 8);
  y = [0.0164    0.1305    0.4276    0.8660    0.8895   -0.3827   -0.6217    0.8660];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_mishmash
  x = makesig('MishMash', 8);
  y = [0.8922   -0.6046    1.0751    2.2558    0.8429    1.0273    0.5551   -0.1317];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_wernersorrows
  x = makesig('WernerSorrows', 8);
  y = [1.5545    5.3175    0.8252    1.6956   -1.2678    0.6466    1.7332   -0.9977];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_leopold
  x = makesig('Leopold', 8);
  y = [0     1     0     0     0     0     0     0];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_mdwt.m
================================================
function test_suite = test_mdwt
  disp("mdwt")
  test_mdwt_1D
  test_mdwt_2D
  test_mdwt_compute_L1
  test_mdwt_compute_L2
  test_mdwt_compute_L3

function test_mdwt_1D
  x = makesig('LinChirp', 8);
  h = daubcqf(4, 'min');
  L = 2;  % For 8 values in x we would normally be L=2 
  [y, L] = mdwt(x, h, L);
  y_corr = [1.1097 0.8767 0.8204 -0.5201 -0.0339 0.1001 0.2201 -0.1401];
  L_corr = 2;
assertVectorsAlmostEqual(y, y_corr, 'relative', 0.001);
assertEqual(L, L_corr);

function test_mdwt_2D
  x = [1 2 3 4; 5 6 7 8 ; 9 10 11 12; 13 14 15 16];
  h = daubcqf(4);
  y = mdwt(x, h);
  y_corr = [34.0000 -3.4641 0.0000 -2.0000; -13.8564 0.0000 0.0000 -2.0000; -0.0000 0.0000 -0.0000 -0.0000; -8.0000 -8.0000 0.0000 -0.0000];
assertVectorsAlmostEqual(y, y_corr, 'relative', 0.001);

function test_mdwt_compute_L1
  x = [1 2];
  h = daubcqf(4, 'min');
  [y, L] = mdwt(x, h);
assertEqual(L, 1);

function test_mdwt_compute_L2
  x = [1 2 3 4];
  h = daubcqf(4, 'min');
  [y, L] = mdwt(x, h);
assertEqual(L, 2);

function test_mdwt_compute_L3
  x = [1 2 3 4 5 6 7 8];
  h = daubcqf(4, 'min');
  [y, L] = mdwt(x, h);
assertEqual(L, 3);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_midwt.m
================================================
function test_suite = test_midwt
  disp("midwt")
  test_midwt_1D
  test_midwt_2D

function test_midwt_1D
       x = makesig('LinChirp',8);
       h = daubcqf(4,'min');
       L = 2;
       [y,L] = mdwt(x,h,L);
       [x_new,L] = midwt(y,h,L);
assertVectorsAlmostEqual(x, x_new,'relative',0.0001);

function test_midwt_2D
       load ../lena512; 
       x = lena512;
       h = daubcqf(6);
       [y,L] = mdwt(x,h);
       [x_new,L] = midwt(y,h);
assertEqual(L,9);
assertVectorsAlmostEqual(x, x_new,'relative',0.0001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_mirdwt.m
================================================
function test_suite = test_mirdwt
  disp("mrdwt")
  test_mirdwt_1
  test_mirdwt_2

function test_mirdwt_1     
       xin = makesig('Leopold',8);
       h = daubcqf(4,'min');
       Lin = 1;
       [yl,yh,L] = mrdwt(xin,h,Lin);
       [x,L] = mirdwt(yl,yh,h,L);

assertEqual(L,Lin);
assertVectorsAlmostEqual(x, xin,'relative',0.0001);

function test_mirdwt_2
       load ../lena512; 
       x = lena512;
       h = daubcqf(6);
       [yl,yh,L] = mrdwt(x,h);
assertEqual(L,9);
       [x_new,L] = mirdwt(yl,yh,h);
assertEqual(L,9);
assertVectorsAlmostEqual(x, x_new,'relative',0.0001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_mrdwt.m
================================================
function test_suite = test_mrdwt
  disp("mrdwt")
  test_mrdwt_1
  test_mrdwt_2
  test_mrdwt_2L2

function test_mrdwt_1
  x = makesig('Leopold',8);
  h = daubcqf(4,'min');
  L = 1;
  [yl, yh, L] = mrdwt(x, h, L);
  yl_corr = [0.8365  0.4830 0 0 0 0 -0.1294 0.2241];
  yh_corr = [-0.2241 -0.1294 0 0 0 0 -0.4830 0.8365];
  L_corr = 1;
assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001);
assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001);
assertEqual(L, L_corr);

function test_mrdwt_2
  x = [1 3 5 2; 3 4 8 1; 3 9 2 0; 1 2 3 0];
  h = daubcqf(4, 'min');
  [yl, yh, L] = mrdwt(x, h, 1);
  yl_corr = [
      9.0111   10.7799    5.8795    4.1107;
     11.1393    8.7766    2.5502    4.9130;
      6.9465    5.7578    1.6630    2.8517;
      4.8182    7.7611    4.9922    2.0494];
  yh_corr = [
      4.5724    0.4285   -1.8828    2.2611    4.8714   -3.1026   -1.7978    0.0290   -2.9620   -1.1818   -1.1295    5.2733;
     -2.4441   -2.4318   -1.4465   -1.4587    1.8861   -4.2488   -1.9776    4.3403   -0.0233    0.0356    0.9498   -0.9620;
     -1.7488   -0.5870    0.5592   -0.6026    1.1663   -2.3550   -1.7398    2.9285   -0.6965    1.8583   -0.7120   -0.4498;
     -0.3795    2.5903    2.7700   -0.1998    4.1516   -1.2087   -1.5601   -1.3828    3.6818   -0.7120    0.8917   -3.8615];
assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001);
assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001);

function test_mrdwt_2L2
  x = [1 3 5 2; 3 4 8 1; 3 9 2 0; 1 2 3 0];
  h = daubcqf(4, 'min');
  [yl, yh, L] = mrdwt(x, h, 2);
  yl_corr = [
   11.7500   11.7500   11.7500   11.7500;
   11.7500   11.7500   11.7500   11.7500;
   11.7500   11.7500   11.7500   11.7500;
   11.7500   11.7500   11.7500   11.7500];
  yh_corr = [
    4.5724    0.4285   -1.8828    2.2611    4.8714   -3.1026   -1.7978    0.0290   -2.9620   -1.1818   -1.1295    5.2733 ...
    3.1405    3.1405    3.1405    3.1405    4.2075    4.7877   -4.2075   -4.7877   -1.0760    1.8816    1.0760   -1.8816;
   -2.4441   -2.4318   -1.4465   -1.4587    1.8861   -4.2488   -1.9776    4.3403   -0.0233    0.0356    0.9498   -0.9620 ...
    1.9396    1.9396    1.9396    1.9396    4.2075    4.7877   -4.2075   -4.7877    4.3816   -0.9240   -4.3816    0.9240;
   -1.7488   -0.5870    0.5592   -0.6026    1.1663   -2.3550   -1.7398    2.9285   -0.6965    1.8583   -0.7120   -0.4498 ...
   -3.1405   -3.1405   -3.1405   -3.1405    4.2075    4.7877   -4.2075   -4.7877    1.0760   -1.8816   -1.0760    1.8816;
   -0.3795    2.5903    2.7700   -0.1998    4.1516   -1.2087   -1.5601   -1.3828    3.6818   -0.7120    0.8917   -3.8615 ...
   -1.9396   -1.9396   -1.9396   -1.9396    4.2075    4.7877   -4.2075   -4.7877   -4.3816    0.9240    4.3816   -0.9240];
assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001);
assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/octave/test_setopt.m
================================================
function test_suite = test_setopt
  disp("setopt")
  test_setopt_all_defaults
  test_setopt_nonzero_becomes_zero

function test_setopt_all_defaults
  x            = [];
  default_opts = [5 6 7 8];
  z = setopt(x, default_opts);
  z_corr       = [5 6 7 8];
assertVectorsAlmostEqual(z, z_corr, 'relative', 0.0001);

function test_setopt_nonzero_becomes_zero
  x            = [1 0 3];
  default_opts = [5 6 7 8];
  z = setopt(x, default_opts);
  z_corr       = [1 6 3 8];
  %z_corr       = [1 0 3 8];   % This would be more intuitive 
assertVectorsAlmostEqual(z, z_corr, 'relative', 0.0001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/readme
================================================
test


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/runtests.m
================================================
path(path, '../bin')
path(path, 'matlab_xunit/xunit')
test_mdwt
test_midwt
test_mirdwt
test_mrdwt
test_makesig
test_denoise
test_setopt
test_daubcqf


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_daubcqf.m
================================================
function test_suite = test_daubcqf
initTestSuite;

function test_daubcqf_min
  [a, b] = daubcqf(4);
  ax = [0.482962913144534   0.836516303737808   0.224143868042013  -0.129409522551260];
  bx = [0.129409522551260   0.224143868042013  -0.836516303737808   0.482962913144534];
assertVectorsAlmostEqual(a, ax, 'relative', 0.001);
assertVectorsAlmostEqual(b, bx, 'relative', 0.001);

function test_daubcqf_max
  [a, b] = daubcqf(4, 'max');
  ax = [-0.129409522551260   0.224143868042013   0.836516303737808   0.482962913144534];
  bx = [-0.482962913144534   0.836516303737808  -0.224143868042013  -0.129409522551260];
assertVectorsAlmostEqual(a, ax, 'relative', 0.001);
assertVectorsAlmostEqual(b, bx, 'relative', 0.001);

function test_daubcqf_mid_even_k
  [a, b] = daubcqf(4, 'mid');
  ax = [0.482962913144534   0.836516303737808   0.224143868042013  -0.129409522551260];
  bx = [0.129409522551260   0.224143868042013  -0.836516303737808   0.482962913144534];
assertVectorsAlmostEqual(a, ax, 'relative', 0.001);
assertVectorsAlmostEqual(b, bx, 'relative', 0.001);

function test_daubcqf_mid_odd_k
  [a, b] = daubcqf(6, 'mid');
  ax = [0.332670552950083   0.806891509311093   0.459877502118491  -0.135011020010255  -0.085441273882027   0.035226291885710];
  bx = [-0.035226291885710  -0.085441273882027   0.135011020010255   0.459877502118491 -0.806891509311093   0.332670552950083];
assertVectorsAlmostEqual(a, ax, 'relative', 0.001);
assertVectorsAlmostEqual(b, bx, 'relative', 0.001);

function test_daubcqf_odd
  handle = @() daubcqf(9);
assertExceptionThrown(handle, '');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_denoise.m
================================================
function test_suite = test_denoise
initTestSuite;

% We could throw an exception if someone specified zero for SoftTH vs HardTH but currently we don't

%function test_denoise_old_invalid_arg_dwt
%  signal = makesig('Doppler', 32);
%  h = daubcqf(6);
%  badarg_handle = @() denoise(signal, h, 0, [0 3.0 0 0 0 0]);
%assertExceptionThrown(badarg_handle, '');
 
%function test_denoise_old_invalid_arg_udwt
%  signal = makesig('Doppler', 32);
%  h = daubcqf(6);
%  badarg_handle = @() denoise(signal, h, 1, [0 3.6 0 0 0 0]);
%assertExceptionThrown(badarg_handle, '');
  
function test_denoise_default
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h);
  signal_denoised_corr = [0.0741827688375062 0.0791701902526268 0.0760842615272340 0.0750476831774179 0.111279774779568 0.163475053283544 -0.0498263815350539 0.0946073088237311 0.135126562486911 -0.0186090620958193 -0.0748812479991294 -0.103470206059426 0.0234254843251780 0.239772540836257 0.0920583398962312 -0.152180640366891 -0.116682073306156 -0.0459389850762785 -0.00245240039778375 0.0755739164104836 0.102548333512214 0.121099911744184 0.177390507921620 0.240386041553093 0.231105933317157 0.198210924493273 0.175672812990725 0.138822049613034 0.127491615387826 0.121409597186325 0.0994935320130783 0.0760019340865427];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_2d
  x = [1 2 3 4; 5 6 7 8 ; 9 10.09 11 12; 13 13.91 15 16];
  h = daubcqf(4);
  [signal_denoised, subtracted_noise, actual_options] = denoise(x, h);
  signal_denoised_corr = [1.093495801587334   2.052784169768518   3.036985129109070   4.014510779767102;  5.037416383975946   6.006178652683398   6.994963120759174   7.978382656683513;  9.047593546684929  10.003998510025589  10.977825887256145  11.94698494275469; 13.009489364401729  13.937038667522501  14.939852728547271  15.9224996584731398];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_udwt
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1);
  signal_denoised_corr = [0.126244615385152 0.0952319712425300 0.0671343607152503 0.0513902979722585 0.0430402732682634 0.0586932575131794 0.0861069751902698 0.0989949047763016 0.0908418658128637 -0.0141454670119059 -0.144791527437026 -0.0185533166035902 0.278351613782131 0.279033706376659 -0.0205012032054263 -0.212367658407976 -0.241484343697995 -0.248582298831059 -0.213374214781743 -0.101963712141109 0.0454248851310567 0.181104333949749 0.275294407293259 0.309076259882059 0.298600450385073 0.259080737796607 0.211123535801718 0.183021783525739 0.171966340866576 0.171616812586097 0.168720006300193 0.151066428184072];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_udwt_2d
  x = [1 2 3 4; 5 6 7 8 ; 9 10.09 11 12; 13 13.91 15 16];
  h = daubcqf(4);
  [signal_denoised, subtracted_noise, actual_options] = denoise(x, h, 1);
  signal_denoised_corr = [
   1.007040488866197   1.993405274521765   3.006268404030089   3.996424654030090;
   4.995935171857875   6.002401216530091   7.001252328142127   8.005847881693983;
   9.009508189685661  10.059981743374523  11.001190131625481  11.999030274521770;
  12.987516149590270  13.944211765573623  14.991289136202310  15.998697189754166];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_threshold_low
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [1 3.0 0 2 0 0]);
  signal_denoised_corr = [0.0187742354278351 0.0237616568429558 0.0206757281175629 0.0196391497677469 0.0558712413698966 0.108066519873873 -0.105234914944725 0.0391987754140600 0.0797180290772401 -0.0740175955054904 -0.130289781408801 -0.158878739469097 -0.0319830490844931 0.184364007426586 0.0366498064865601 -0.207589173776562 -0.172090606715827 -0.101347518485950 -0.0578609338074549 0.0201653830008125 0.0471398001025425 0.0656913783345127 0.121981974511949 0.184977508143422 0.175697399907486 0.142802391083602 0.120264279581054 0.0834135162033633 0.0720830819781554 0.0660010637766539 0.0440849986034073 0.0205934006768717];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_thresh_multiplier
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [1 3.5 0 2 0 0]);
  signal_denoised_corr = [0.00563527074803461 0.0110853052404048 0.0101590193471916 0.0116789518546074 0.0354625658443208 0.0691904606426981 -0.0647010252187970 0.0393485097012034 0.0302297746478269 -0.0658230296401878 -0.0947938063374137 -0.147943151851009 -0.0355607514547514 0.143027827800490 0.0126752977970079 -0.200577663821584 -0.149059259007655 -0.0564432101940217 -0.0281365070661950 0.0201021371871464 0.0438412772787373 0.0596866399869512 0.0967101937989458 0.136451641917565 0.130716307107088 0.109146914388131 0.0925200849653435 0.0657607417363412 0.0550584910898860 0.0469636231448182 0.0277268486177313 0.00667135407398081];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_std
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 1 2 0 0]);
  signal_denoised_corr = [0.0686926069658060 0.0706216045196474 0.0719769032529757 0.0743568305131058 0.0754251996534692 0.0763549103855611 0.0783972750744446 0.0807092136475563 0.0763109954998047 0.0693017683604205 0.0628697537191382 0.0547492531677562 0.0755519478401559 0.107931256046656 0.0859959791464885 0.0494376118339224 0.0602059364595448 0.0785077229738383 0.0791999606842265 0.0809410605777517 0.0844652184548917 0.0873749084881920 0.0911535278085727 0.0952027332951270 0.0936316016468421 0.0898878427420561 0.0866734185917041 0.0820709685744921 0.0793481432323076 0.0768306965269240 0.0727995727792393 0.0684196591566048];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_hard
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 1 0 0]);
  signal_denoised_corr = [0.0977394160103721 0.0994161560983385 0.0832447407807381 0.0666983311697188 0.177420971595413 0.340230583897110 -0.354597069671295 0.0250017872275015 0.394418485343238 -0.0595745304374512 -0.452401570793399 -0.175707560852101 -0.00622320325130765 0.437867065411816 0.187485346584306 -0.241060664687049 -0.306285896120773 -0.373946536466370 -0.246165924475657 0.00210496326791051 0.0528629966064817 0.0967383656953347 0.275410693617439 0.487298926169970 0.454985253718689 0.348603331393631 0.288205743942248 0.186806596496260 0.172147260405660 0.180050851714681 0.142136445826288 0.104484725401481];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_levels
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 2 4 0]);
  signal_denoised_corr = [0.164259992817262 0.156379071218712 0.142212685671703 0.125038963573761 0.150297815252073 0.191536767978636 -0.0381639580765735 0.0881092032192094 0.119629284458486 -0.0406090725365491 -0.105645426731493 -0.141820831994602 -0.0280318977202704 0.173171960129832 0.0117537437282443 -0.247115729957293 -0.206759297285911 -0.123147866042363 -0.0685808245422524 0.0255826360141400 0.0635302930397082 0.0930381970490923 0.165728084463140 0.246884147157615 0.246603211345582 0.220210934934003 0.206436991723089 0.177172675548210 0.178948997433275 0.188010177892750 0.179798128181065 0.170937023676945];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_actual_thresh
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 0, [0 3.0 0 2 0 0.5]);
  signal_denoised_corr = [0.0607099183942295 0.0654351521193524 0.0684154759800610 0.0742018934148454 0.0758845005390013 0.0769511530643110 0.0810856606730252 0.0858023375316036 0.0704706443350518 0.0472060906047587 0.0254329679518446 -0.00154590940405266 0.0598455182579352 0.156556707841878 0.0864272987162393 -0.0287835335280487 0.00606017120154721 0.0659592575432934 0.0713958080495586 0.0812891735076492 0.0953701981347179 0.107554576791239 0.123739146895592 0.141180422640726 0.137085044622601 0.124838366760086 0.114852957437233 0.0997294000571788 0.0922174665178409 0.0857758976557685 0.0737052631031342 0.0605470542090229];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_udwt_threshold_low
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [1 3.0 0 1 0 0]);
  signal_denoised_corr = [0.135039400483741 0.117805175604609 0.0967709584177031 0.0142060292567307 -0.0239840294603812 0.323425861331697 -0.212285200125643 0.166066657685731 0.136653739821785 -0.0361708285655289 -0.244622217319313 -0.0751486112344819 0.279128997196628 0.299915294672821 0.00822389077239383 -0.232180770499244 -0.330137263335199 -0.293955318206172 -0.175538926380835 -0.0733568677543535 0.049241196655251 0.200165899490694 0.304615650610263 0.337325376378116 0.325593984310807 0.282048956150932 0.228861081870546 0.196656880842149 0.180959366486141 0.175210410022406 0.169828050229736 0.155033256209497];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_udwt_thresh_multiplier
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [1 3.5 0 1 0 0]);
  signal_denoised_corr = [0.0479478506866607 0.0160653046305043 -0.012660890293452 -0.0292521383561941 -0.0383355043751224 -0.0239494802109215 0.00200042536526626 0.0135636610003902 0.00399637041195728 -0.100521378500944 -0.229923524965501 -0.102614225576592 0.195850596270724 0.197593413336102 -0.100882406775293 -0.291163630119251 -0.318524834100706 -0.324752887320235 -0.288916218874243 -0.176658530913858 -0.028536592326759 0.108409816572649 0.204063702017061 0.239170248556769 0.230108690684778 0.190119394184444 0.14091827822899 0.11174543739754 0.0991301032767805 0.0977198505254529 0.0937639547688583 0.0745251447941448];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_udwt_std
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [0 3.0 1 1 0 0]);
  signal_denoised_corr = [0.0847626939447046 0.0648669375488877 0.0505127048998841 0.0431477690668965 0.0443458995091662 0.0638361516754724 0.0926698200065443 0.122716357496751 0.135591683864019 0.0377466753027189 -0.0889166586897228 -0.0310700016943258 0.16530654803759 0.237349858169585 0.0577692051497442 -0.137751577705709 -0.18354744395111 -0.188205427540335 -0.157902857480421 -0.055391323576937 0.0791892398460303 0.198068185997372 0.271471422836112 0.282275886815228 0.246689293630916 0.205546705496588 0.16546007731141 0.145130898382968 0.1471329636038 0.142472749823065 0.132163448290946 0.111958195551385];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_udwt_soft
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [0 3.0 0 2 0 0]);
  signal_denoised_corr = [0.086668016749428   0.078090652632278   0.070455842749544   0.062824684205684  0.064249795534642   0.086899924318641   0.053549539548214   0.100644175366308  0.100726560037458   0.051479406046214  -0.011299945211104   0.036115394710961  0.147624998547612   0.159516308766960   0.059119062682569  -0.020817294484415 -0.042170912413038  -0.046825168298822  -0.027179285827824   0.017379645805457  0.071225126011476   0.123532780238470   0.153926034241219   0.160138755049699  0.153562168658336   0.138748019440599   0.123707805352361   0.115223425612607  0.110890877355381   0.107909648973443   0.103630954238181   0.095849084980685];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_udwt_levels
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [0 3.0 0 1 4 0]);
  signal_denoised_corr = [0.137633389000662   0.120676804147327   0.099782758215143   0.015698574020267 -0.025118098815379   0.319788331991522  -0.217919217670089   0.160238201773756  0.131270340429534  -0.041415802797292  -0.249853610380694  -0.080126740883778  0.275034335985338   0.296982831400265   0.006200146572810  -0.234309647934845 -0.332731251852120  -0.296826946748889  -0.178550726178275  -0.074849412517890  0.050375266010248   0.203803428830869   0.310249668154709   0.343153832290091  0.330977383703058   0.287293930382695   0.234092474931927   0.201635010491445  0.185054027697432   0.178142873294961   0.171851794429319   0.157162133645098];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);

function test_denoise_udwt_actual_thresh
  signal = makesig('Doppler', 32);
  noise = [1.54421189550395 0.0859311331754255 -1.49159031063761 -0.742301837259857 -1.06158173331999 2.35045722400204 -0.615601881466894 0.748076783703985 -0.192418510588264 0.888610425420721 -0.764849236567874 -1.40226896933876 -1.42237592509150 0.488193909859941 -0.177375156618825 -0.196053487807333 1.41931015064255 0.291584373984183 0.197811053464361 1.58769908997406 -0.804465956349547 0.696624415849607 0.835088165072682 -0.243715140377952 0.215670086403744 -1.16584393148205 -1.14795277889859 0.104874716016494 0.722254032225002 2.58549125261624 -0.666890670701386 0.187331024578940];
  with_noise = signal + noise / 10; 
  h = daubcqf(6);
  [signal_denoised, subtracted_noise, actual_options] = denoise(with_noise, h, 1, [0 3.0 0 1 0 0.5]);
  signal_denoised_corr = [0.126244615385152 0.09523197124253 0.0671343607152503 0.0513902979722585 0.0430402732682634 0.0586932575131794 0.0861069751902698 0.0989949047763016 0.0908418658128637 -0.0141454670119059 -0.144791527437026 -0.0185533166035902 0.278351613782131 0.279033706376659 -0.0205012032054263 -0.212367658407976 -0.241484343697995 -0.248582298831059 -0.213374214781743 -0.101963712141109 0.0454248851310567 0.181104333949749 0.275294407293258 0.309076259882059 0.298600450385073 0.259080737796607 0.211123535801717 0.183021783525739 0.171966340866576 0.171616812586097 0.168720006300193 0.151066428184072];
assertVectorsAlmostEqual(signal_denoised, signal_denoised_corr, 'relative', 0.0001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_makesig.m
================================================
function test_suite = test_makesig
initTestSuite;

function test_makesig_heavisine
  x = makesig('HeaviSine', 8);
  y = [4.0000    0.0000   -6.0000   -2.0000    2.0000    0.0000   -4.0000   -0.0000];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_bumps
  x = makesig('Bumps', 8);
  y = [0.3206    5.0527    0.3727    0.0129    0.0295    0.0489    0.0004    0.0000];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_blocks
  x = makesig('Blocks', 8);
  y = [4.0000    0.5000    3.0000    0.9000    0.9000    5.2000   -0.0000   -0.0000];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_doppler
  x = makesig('Doppler', 12);
  y = [-0.1954 -0.3067 0.0000 -0.4703 0.4930 -0.2703 -0.4127 0.1025 0.4001 0.3454 0.1425 0];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_ramp
  x = makesig('Ramp', 8);
  y = [0.1250    0.2500   -0.6250   -0.5000   -0.3750   -0.2500   -0.1250         0];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_cusp
  x = makesig('Cusp', 8);
  y = [0.4950    0.3464    0.0707    0.3606    0.5050    0.6164    0.7106    0.7937];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_sing
  x = makesig('Sing', 8);
  y = [5.3333   16.0000   16.0000    5.3333    3.2000    2.2857    1.7778    1.4545];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_hisine
  x = makesig('HiSine', 8);
  y = [0.8267   -0.9302    0.2200    0.6827   -0.9882    0.4292    0.5053   -0.9977];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_losine
  x = makesig('LoSine', 8);
  y = [0.865973039158459   0.866130104544730   0.000314159260191  -0.865815888304075  -0.866287084447387  -0.000628318489377   0.865658651997088   0.866443978850937];
assertVectorsAlmostEqual(x, y, 'relative', 0.0000001);

function test_makesig_linchirp
  x = makesig('LinChirp', 8);
  y = [0.0491    0.1951    0.4276    0.7071    0.9415    0.9808    0.6716    0.0000];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_twochirp
  x = makesig('TwoChirp', 8);
  y = [0.5132    1.5000    0.5412    0.8660   -0.5132         0    0.5132    0.8660];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_quadchirp
  x = makesig('QuadChirp', 8);
  y = [0.0164    0.1305    0.4276    0.8660    0.8895   -0.3827   -0.6217    0.8660];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_mishmash
  x = makesig('MishMash', 8);
  y = [0.8922   -0.6046    1.0751    2.2558    0.8429    1.0273    0.5551   -0.1317];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_wernersorrows
  x = makesig('WernerSorrows', 8);
  y = [1.5545    5.3175    0.8252    1.6956   -1.2678    0.6466    1.7332   -0.9977];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);

function test_makesig_leopold
  x = makesig('Leopold', 8);
  y = [0     1     0     0     0     0     0     0];
assertVectorsAlmostEqual(x, y, 'relative', 0.0001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_mdwt.m
================================================
function test_suite = test_mdwt
initTestSuite;

function test_mdwt_1D
  x = makesig('LinChirp', 8);
  h = daubcqf(4, 'min');
  L = 2;  % For 8 values in x we would normally be L=2 
  [y, L] = mdwt(x, h, L);
  y_corr = [1.1097 0.8767 0.8204 -0.5201 -0.0339 0.1001 0.2201 -0.1401];
  L_corr = 2;
assertVectorsAlmostEqual(y, y_corr, 'relative', 0.001);
assertEqual(L, L_corr);

function test_mdwt_2D
  x = [1 2 3 4; 5 6 7 8 ; 9 10 11 12; 13 14 15 16];
  h = daubcqf(4);
  y = mdwt(x, h);
  y_corr = [34.0000 -3.4641 0.0000 -2.0000; -13.8564 0.0000 0.0000 -2.0000; -0.0000 0.0000 -0.0000 -0.0000; -8.0000 -8.0000 0.0000 -0.0000];
assertVectorsAlmostEqual(y, y_corr, 'relative', 0.001);

function test_mdwt_compute_L1
  x = [1 2];
  h = daubcqf(4, 'min');
  [y, L] = mdwt(x, h);
assertEqual(L, 1);

function test_mdwt_compute_L2
  x = [1 2 3 4];
  h = daubcqf(4, 'min');
  [y, L] = mdwt(x, h);
assertEqual(L, 2);

function test_mdwt_compute_L3
  x = [1 2 3 4 5 6 7 8];
  h = daubcqf(4, 'min');
  [y, L] = mdwt(x, h);
assertEqual(L, 3);

function test_mdwt_compute_bad_L
  L = -1;
  x = [1 2 3 4 5 6 7 8 9];
  h = daubcqf(4, 'min');
  mdwtHandle = @() mdwt(x, h);
assertExceptionThrown(mdwtHandle, '');

function test_mdwt_empty_input
  mdwtHandle = @() mdwt([], [0 0 0 0]);
assertExceptionThrown(mdwtHandle, '');


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_midwt.m
================================================
function test_suite = test_midwt
initTestSuite;


function test_midwt_1D
       x = makesig('LinChirp',8);
       h = daubcqf(4,'min');
       L = 2;
       [y,L] = mdwt(x,h,L);
       [x_new,L] = midwt(y,h,L);
assertVectorsAlmostEqual(x, x_new,'relative',0.0001);

function test_midwt_2D
       load lena512; 
       x = lena512;
       h = daubcqf(6);
       [y,L] = mdwt(x,h);
       [x_new,L] = midwt(y,h);
assertEqual(L,9);
assertVectorsAlmostEqual(x, x_new,'relative',0.0001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_mirdwt.m
================================================
function test_suite = test_mirdwt
initTestSuite;

function test_mirdwt_1     
       xin = makesig('Leopold',8);
       h = daubcqf(4,'min');
       Lin = 1;
       [yl,yh,L] = mrdwt(xin,h,Lin);
       [x,L] = mirdwt(yl,yh,h,L);

assertEqual(L,Lin);
assertVectorsAlmostEqual(x, xin,'relative',0.0001);

function test_mirdwt_2D
       load lena512; 
       x = lena512;
       h = daubcqf(6);
       [yl,yh,L] = mrdwt(x,h);
assertEqual(L,9);
       [x_new,L] = mirdwt(yl,yh,h);
assertEqual(L,9);
assertVectorsAlmostEqual(x, x_new,'relative',0.0001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_mrdwt.m
================================================
function test_suite = test_mrdwt
initTestSuite;

function test_mrdwt_1
  x = makesig('Leopold',8);
  h = daubcqf(4,'min');
  L = 1;
  [yl, yh, L] = mrdwt(x, h, L);
  yl_corr = [0.8365  0.4830 0 0 0 0 -0.1294 0.2241];
  yh_corr = [-0.2241 -0.1294 0 0 0 0 -0.4830 0.8365];
  L_corr = 1;
assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001);
assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001);
assertEqual(L, L_corr);

function test_mrdwt_2
  x = [1 3 5 2; 3 4 8 1; 3 9 2 0; 1 2 3 0];
  h = daubcqf(4, 'min');
  [yl, yh, L] = mrdwt(x, h, 1);
  yl_corr = [
      9.0111   10.7799    5.8795    4.1107;
     11.1393    8.7766    2.5502    4.9130;
      6.9465    5.7578    1.6630    2.8517;
      4.8182    7.7611    4.9922    2.0494];
  yh_corr = [
      4.5724    0.4285   -1.8828    2.2611    4.8714   -3.1026   -1.7978    0.0290   -2.9620   -1.1818   -1.1295    5.2733;
     -2.4441   -2.4318   -1.4465   -1.4587    1.8861   -4.2488   -1.9776    4.3403   -0.0233    0.0356    0.9498   -0.9620;
     -1.7488   -0.5870    0.5592   -0.6026    1.1663   -2.3550   -1.7398    2.9285   -0.6965    1.8583   -0.7120   -0.4498;
     -0.3795    2.5903    2.7700   -0.1998    4.1516   -1.2087   -1.5601   -1.3828    3.6818   -0.7120    0.8917   -3.8615];
assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001);
assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001);

function test_mrdwt_2L2
  x = [1 3 5 2; 3 4 8 1; 3 9 2 0; 1 2 3 0];
  h = daubcqf(4, 'min');
  [yl, yh, L] = mrdwt(x, h, 2);
  yl_corr = [
   11.7500   11.7500   11.7500   11.7500;
   11.7500   11.7500   11.7500   11.7500;
   11.7500   11.7500   11.7500   11.7500;
   11.7500   11.7500   11.7500   11.7500];
  yh_corr = [
    4.5724    0.4285   -1.8828    2.2611    4.8714   -3.1026   -1.7978    0.0290   -2.9620   -1.1818   -1.1295    5.2733 ...
    3.1405    3.1405    3.1405    3.1405    4.2075    4.7877   -4.2075   -4.7877   -1.0760    1.8816    1.0760   -1.8816;
   -2.4441   -2.4318   -1.4465   -1.4587    1.8861   -4.2488   -1.9776    4.3403   -0.0233    0.0356    0.9498   -0.9620 ...
    1.9396    1.9396    1.9396    1.9396    4.2075    4.7877   -4.2075   -4.7877    4.3816   -0.9240   -4.3816    0.9240;
   -1.7488   -0.5870    0.5592   -0.6026    1.1663   -2.3550   -1.7398    2.9285   -0.6965    1.8583   -0.7120   -0.4498 ...
   -3.1405   -3.1405   -3.1405   -3.1405    4.2075    4.7877   -4.2075   -4.7877    1.0760   -1.8816   -1.0760    1.8816;
   -0.3795    2.5903    2.7700   -0.1998    4.1516   -1.2087   -1.5601   -1.3828    3.6818   -0.7120    0.8917   -3.8615 ...
   -1.9396   -1.9396   -1.9396   -1.9396    4.2075    4.7877   -4.2075   -4.7877   -4.3816    0.9240    4.3816   -0.9240];
assertVectorsAlmostEqual(yl, yl_corr, 'relative', 0.001);
assertVectorsAlmostEqual(yh, yh_corr, 'relative', 0.001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/PWMBF/rwt/tests/test_setopt.m
================================================
function test_suite = test_setopt
initTestSuite;

function test_setopt_all_defaults
  x            = [];
  default_opts = [5 6 7 8];
  z = setopt(x, default_opts);
  z_corr       = [5 6 7 8];
assertVectorsAlmostEqual(z, z_corr, 'relative', 0.0001);

function test_setopt_nonzero_becomes_zero
  x            = [1 0 3];
  default_opts = [5 6 7 8];
  z = setopt(x, default_opts);
  z_corr       = [1 6 3 8];
  %z_corr       = [1 0 3 8];   % This would be more intuitive 
assertVectorsAlmostEqual(z, z_corr, 'relative', 0.0001);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/D_lambda.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Quality with No Reference (QNR). Spectral distortion index. 
% 
% Interface:
%           D_lambda_index = D_lambda(I_F,I_MS,I_MS_LR,S,ratio,p)
%
% Inputs:
%           I_F:                Pansharpened image;
%           I_MS:               MS image resampled to panchromatic scale;
%           I_MS_LR:            Original MS image;
%           S:                  Block size (optional); Default value: 32;
%           ratio:              Resolution ratio;
%           p:                  Exponent value (optional); Default value: p = 1.
% 
% Outputs:
%           D_lambda_index:     D_lambda index.
% 
% References:
%           [Alparone08]        L. Alparone, B. Aiazzi, S. Baronti, A. Garzelli, F. Nencini, and M. Selva, "Multispectral and panchromatic data fusion assessment without reference,"
%                               Photogrammetric Engineering and Remote Sensing, vol. 74, no. 2, pp. 193200, February 2008. 
%           [Vivone14]          G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transaction on Geoscience and Remote Sensing, 2014. (Accepted)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function D_lambda_index = D_lambda(I_F,I_MS,I_MS_LR,S,ratio,p)

flag_orig_paper = 0; % if 0, Toolbox 1.0, otherwise, original QNR paper 

if (size(I_F) ~= size(I_MS))
    error('The two input images must have the same dimensions')
end

[N,M,Nb] = size(I_F);

if (rem(N,S) ~= 0)
    error('The number of rows must be multiple of the block size')
end

if (rem(M,S) ~= 0)
    error('The number of columns must be multiple of the block size')
end

D_lambda_index = 0;
for i = 1:Nb-1
    for j = i+1:Nb 
        if flag_orig_paper == 0
            %%%%%%% Opt. 1 (as toolbox 1.0)
            band1 = I_MS(:,:,i);
            band2 = I_MS(:,:,j);
            fun_uqi = @(bs) uqi(bs.data,...
            band2(bs.location(1):bs.location(1)+S-1,...
            bs.location(2):bs.location(2)+S-1));
            Qmap_exp = blockproc(band1,[S S],fun_uqi);
        else
            %%%%%%% Opt. 2 (as paper QNR)
            band1 = I_MS_LR(:,:,i);
            band2 = I_MS_LR(:,:,j);
            fun_uqi = @(bs) uqi(bs.data,...
            band2(bs.location(1):bs.location(1)+S/ratio-1,...
            bs.location(2):bs.location(2)+S/ratio-1));
            Qmap_exp = blockproc(band1,[S/ratio S/ratio],fun_uqi);
        end
        Q_exp = mean2(Qmap_exp);
        
        band1 = I_F(:,:,i);
        band2 = I_F(:,:,j);
        fun_uqi = @(bs) uqi(bs.data,...
            band2(bs.location(1):bs.location(1)+S-1,...
            bs.location(2):bs.location(2)+S-1));
        Qmap_fused = blockproc(band1,[S S],fun_uqi);
        Q_fused = mean2(Qmap_fused);
        D_lambda_index = D_lambda_index + abs(Q_fused-Q_exp)^p;
    end
end
s = ((Nb^2)-Nb)/2;
D_lambda_index = (D_lambda_index/s)^(1/p);

end

%%%%%%% Q-index on x and y images
function Q = uqi(x,y)

x = double(x(:));
y = double(y(:));
mx = mean(x);
my = mean(y);
C = cov(x,y);

Q = 4 * C(1,2) * mx * my / (C(1,1)+C(2,2)) / (mx^2 + my^2);  

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/D_lambda_K.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Spectral distortion index of the Hybrid Quality with No Reference (HQNR).  
% 
% Interface:
%           Dl = D_lambda_K(fused,ms,ratio,sensor,S)
%
% Inputs:
%           fused:              Pansharpened image;
%           msexp:              MS image resampled to panchromatic scale;
%           sensor:             Type of sensor;
%           ratio:              Resolution ratio;
%           S:                  Block size (optional); Default value: 32.
% 
% Outputs:
%           Dl:                 D_lambda index.
% 
% Reference:
%           [Khan09]            M. M. Khan, L. Alparone, and J. Chanussot, "Pansharpening quality assessment using the modulation transfer functions of instruments,"
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 47, no. 11, pp. 3880-3891, 2009.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Dl = D_lambda_K(fused,msexp,ratio,sensor,S)

if (size(fused,1) ~= size(msexp,1) || size(fused,2) ~= size(msexp,2))
    error('The two images must have the same dimensions')
end

[N,M,~] = size(fused);
if (rem(N,S) ~= 0)
    error('number of rows must be multiple of the block size')
end
if (rem(M,S) ~= 0)
    error('number of columns must be multiple of the block size')
end

fused_degraded = MTF(fused,sensor,ratio);

[Q2n_index,~] = q2n(msexp,fused_degraded,S,S);
Dl = 1-Q2n_index;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/D_s.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Quality with No Reference (QNR). Spatial distortion index.
% 
% Interface:
%           D_s_index = D_s(I_F,I_MS,I_MS_LR,I_PAN,ratio,S,q)
%
% Inputs:
%           I_F:                Pansharpened image;
%           I_MS:               MS image resampled to panchromatic scale;
%           I_MS_LR:            Original MS image;
%           I_PAN:              Panchromatic image;
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value;
%           S:                  Block size (optional); Default value: 32;
%           q:                  Exponent value (optional); Default value: q = 1.
% 
% Outputs:
%           D_s_index:          D_s index.
% 
% References:
%           [Alparone08]        L. Alparone, B. Aiazzi, S. Baronti, A. Garzelli, F. Nencini, and M. Selva, "Multispectral and panchromatic data fusion assessment without reference,"
%                               Photogrammetric Engineering and Remote Sensing, vol. 74, no. 2, pp. 193200, February 2008. 
%           [Vivone14]          G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transaction on Geoscience and Remote Sensing, 2014. (Accepted)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function D_s_index = D_s(I_F,I_MS,I_MS_LR,I_PAN,ratio,S,q)

flag_orig_paper = 0; % if 0, Toolbox 1.0, otherwise, original QNR paper 

if (size(I_F) ~= size(I_MS))
    error('The two images must have the same dimensions')
end

[N, M, Nb] = size(I_F);

if (rem(N,S) ~= 0)
    error('number of rows must be multiple of the block size')
end

if (rem(M,S) ~= 0)
    error('number of columns must be multiple of the block size')
end

if flag_orig_paper == 0
    %%%%%%% Opt. 1 (as toolbox 1.0) 
    pan_filt = interp23tap(imresize(I_PAN,1./ratio),ratio);
else
    %%%%%%% Opt. 2 (as paper QNR)
    pan_filt = imresize(I_PAN,1./ratio);
end

D_s_index = 0;
for i = 1:Nb
        band1 = I_F(:,:,i);
        band2 = I_PAN;
        fun_uqi = @(bs) uqi(bs.data,...
            band2(bs.location(1):bs.location(1)+S-1,...
            bs.location(2):bs.location(2)+S-1));
        Qmap_high = blockproc(band1,[S S],fun_uqi);
        Q_high = mean2(Qmap_high);
        
        if flag_orig_paper == 0
            %%%%%%% Opt. 1 (as toolbox 1.0)
            band1 = I_MS(:,:,i);
            band2 = pan_filt;
            fun_uqi = @(bs) uqi(bs.data,...
            band2(bs.location(1):bs.location(1)+S-1,...
            bs.location(2):bs.location(2)+S-1));
            Qmap_low = blockproc(band1,[S S],fun_uqi);
        else
            %%%%%%% Opt. 2 (as paper QNR)
            band1 = I_MS_LR(:,:,i);
            band2 = pan_filt;
            fun_uqi = @(bs) uqi(bs.data,...
            band2(bs.location(1):bs.location(1)+S/ratio-1,...
            bs.location(2):bs.location(2)+S/ratio-1));
            Qmap_low = blockproc(band1,[S/ratio S/ratio],fun_uqi);
        end
        Q_low = mean2(Qmap_low);
        D_s_index = D_s_index + abs(Q_high-Q_low)^q;
end

D_s_index = (D_s_index/Nb)^(1/q);

end

%%%%%%% Q-index on x and y images
function Q = uqi(x,y)

x = double(x(:));
y = double(y(:));
mx = mean(x);
my = mean(y);
C = cov(x,y);

Q = 4 * C(1,2) * mx * my / (C(1,1)+C(2,2)) / (mx^2 + my^2);  

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/ERGAS.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Erreur Relative Globale Adimensionnelle de Synthse (ERGAS).
% 
% Interface:
%           ERGAS_index = ERGAS(I1,I2,ratio)
%
% Inputs:
%           I1:             First multispectral image;
%           I2:             Second multispectral image;
%           ratio:          Scale ratio between MS and PAN. Pre-condition: Integer value.
% 
% Outputs:
%           ERGAS_index:    ERGAS index.
% References:
%           [Ranchin00]     T. Ranchin and L. Wald, Fusion of high spatial and spectral resolution images: the ARSIS concept and its implementation,
%                           Photogrammetric Engineering and Remote Sensing, vol. 66, no. 1, pp. 4961, January 2000.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function ERGAS_index = ERGAS(I1,I2,ratio)

I1 = double(I1);
I2 = double(I2);

Err=I1-I2;
ERGAS_index=0;
for iLR=1:size(Err,3),
    ERGAS_index = ERGAS_index+mean2(Err(:,:,iLR).^2)/(mean2((I1(:,:,iLR))))^2;   
end

ERGAS_index = (100/ratio) * sqrt((1/size(Err,3)) * ERGAS_index);

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/HQNR.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Hybrid Quality with No Reference (HQNR) index. 
% 
% Interface:
%           [HQNR_value,Dl,Ds] = HQNR(ps_ms,ms,msexp,pan,S,sensor,ratio)
%
% Inputs:
%           ps_ms:              Pansharpened image;
%           ms:                 Original MS image;
%           msexp:              MS image resampled to panchromatic scale;
%           pan:                Panchromatic image;
%           S:                  Block size (optional); Default value: 32;
%           sensor:             String for type of sensor (e.g. 'WV2','IKONOS');
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value.
% 
% Outputs:
%           HQNR_value:          QNR index;
%           Dl:                  D_lambda index;
%           Ds:                  D_s index.
% 
% References:
%           [Alparone08]        L. Alparone, B. Aiazzi, S. Baronti, A. Garzelli, F. Nencini, and M. Selva, "Multispectral and panchromatic data fusion assessment without reference,"
%                               Photogrammetric Engineering and Remote Sensing, vol. 74, no. 2, pp. 193200, February 2008. 
%           [Khan09]            M. M. Khan, L. Alparone, and J. Chanussot, "Pansharpening quality assessment using the modulation transfer functions of instruments", 
%                               IEEE Trans. Geosci. Remote Sens., vol. 11, no. 47, pp. 38803891, Nov. 2009.
%           [Aiazzi14]          B. Aiazzi, L. Alparone, S. Baronti, R. Carl, A. Garzelli, and L. Santurri, 
%                               "Full scale assessment of pansharpening methods and data products", 
%                               in SPIE Remote Sensing, pp. 924 402  924 402, 2014.
%           [Vivone20]          G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                               IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [HQNR_value,Dl,Ds] = HQNR(ps_ms,ms,msexp,pan,S,sensor,ratio)

Dl = D_lambda_K(ps_ms,msexp,ratio,sensor,S);

Ds = D_s(ps_ms,msexp,ms,pan,ratio,S,1);

HQNR_value = (1-Dl)*(1-Ds);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/Q.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Q/SSIM averaged on all bands.
% 
% Interface:
%           Q_avg = Q(I1,I2,L)
%
% Inputs:
%           I1:         First multispectral image;
%           I2:         Second multispectral image;
%           L:          Radiometric resolution.
%
% Outputs:
%           Q_avg:      Q index averaged on all bands.
% 
% References:
%           [Wang02]    Z. Wang and A. C. Bovik, A universal image quality index, IEEE Signal Processing Letters, vol. 9, no. 3, pp. 8184, March 2002.
%           [Vivone20]  G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                       IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Q_avg = Q(I1,I2,L)

Q_orig = zeros(1,size(I1,3));

for idim=1:size(I1,3),
%     Q_orig(idim) = ssim(I_GT(:,:,idim),I1U(:,:,idim), [0.01 0.03],fspecial('gaussian', 11, 1.5), L);
    Q_orig(idim) = img_qi(I1(:,:,idim),I2(:,:,idim), 32);
end

Q_avg = mean(Q_orig);

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/QNR.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Quality with No Reference (QNR) index. 
% 
% Interface:
%           [QNR_index,D_lambda_index,D_s_index] = QNR(I_F,I_MS,I_MS_LR,I_PAN,ratio,S,p,q,alpha,beta)
%
% Inputs:
%           I_F:                Pansharpened image;
%           I_MS:               MS image resampled to panchromatic scale;
%           I_MS_LR:            Original MS image;
%           I_PAN:              Panchromatic image;
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value;
%           S:                  Block size (optional); Default value: 32;
%           p, q, alpha, beta:  Exponent values (optional); Default values: p = q = alpha = beta = 1.
% 
% Outputs:
%           QNR_index:          QNR index;
%           D_lambda_index:     D_lambda index;
%           D_s_index:          D_s index.
% 
% References:
%           [Alparone08]        L. Alparone, B. Aiazzi, S. Baronti, A. Garzelli, F. Nencini, and M. Selva, "Multispectral and panchromatic data fusion assessment without reference,"
%                               Photogrammetric Engineering and Remote Sensing, vol. 74, no. 2, pp. 193200, February 2008. 
%           [Vivone15]          G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%           [Vivone20]          G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                               IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [QNR_index,D_lambda_index,D_s_index] = QNR(I_F,I_MS,I_MS_LR,I_PAN,ratio,S,p,q,alpha,beta)

if nargin < 11, beta=1; end
if nargin < 10, alpha=1; end
if nargin < 9, q=1; end
if nargin < 8, p=1; end
if nargin < 7, S=32; end

D_lambda_index = D_lambda(I_F,I_MS,I_MS_LR,S,ratio,p);

D_s_index = D_s(I_F,I_MS,I_MS_LR,I_PAN,ratio,S,q);

QNR_index = (1-D_lambda_index)^alpha * (1-D_s_index)^beta;

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/SAM.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Spectral Angle Mapper (SAM).
% 
% Interface:
%           [SAM_index,SAM_map] = SAM(I1,I2)
%
% Inputs:
%           I1:         First multispectral image;
%           I2:         Second multispectral image.
% 
% Outputs:
%           SAM_index:  SAM index;
%           SAM_map:    Image of SAM values.
% 
% References:
%           [Yuhas92]   R. H. Yuhas, A. F. H. Goetz, and J. W. Boardman, "Discrimination among semi-arid landscape endmembers using the Spectral Angle Mapper (SAM) algorithm," 
%                       in Proceeding Summaries 3rd Annual JPL Airborne Geoscience Workshop, 1992, pp. 147149.
%           [Vivone20]  G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                       IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [SAM_index,SAM_map] = SAM(I1,I2)

[M,N,~] = size(I2);
prod_scal = dot(I1,I2,3); 
norm_orig = dot(I1,I1,3);
norm_fusa = dot(I2,I2,3);
prod_norm = sqrt(norm_orig.*norm_fusa);
prod_map = prod_norm;
prod_map(prod_map==0)=eps;
SAM_map = acos(prod_scal./prod_map);
prod_scal = reshape(prod_scal,M*N,1);
prod_norm = reshape(prod_norm, M*N,1);
z=find(prod_norm==0);
prod_scal(z)=[];prod_norm(z)=[];
angolo = sum(sum(acos(prod_scal./prod_norm)))/(size(prod_norm,1));
SAM_index = real(angolo)*180/pi;

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/SCC.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           spatial Correlation Coefficient (sCC).
% 
% Interface:
%           [sCC,SCCMap] = SCC(I_F,I_GT)
%
% Inputs:
%           I_F:        Fused image;
%           I_GT:       Ground-truth image.
% 
% Outputs:
%           sCC:        spatial correlation coefficient;
%           SCCMap:     Image of sCC values.
% 
% Reference:
%           [Vivone15]  G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                       IEEE Transaction on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 2565-2586, May 2015.
%           [Vivone20]  G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                       IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [sCC,SCCMap]=SCC(I_F,I_GT)

Im_Lap_F = zeros(size(I_F,1)-2,size(I_F,2)-2,size(I_F,3));
for idim=1:size(I_F,3)
    Im_Lap_F_y= imfilter(I_F(2:end-1,2:end-1,idim),fspecial('sobel'));
    Im_Lap_F_x= imfilter(I_F(2:end-1,2:end-1,idim),fspecial('sobel')');
    Im_Lap_F(:,:,idim) = sqrt(Im_Lap_F_y.^2+Im_Lap_F_x.^2);
end

Im_Lap_GT = zeros(size(I_GT,1)-2,size(I_GT,2)-2,size(I_GT,3));
for idim=1:size(I_GT,3)
    Im_Lap_GT_y= imfilter(I_GT(2:end-1,2:end-1,idim),fspecial('sobel'));
    Im_Lap_GT_x= imfilter(I_GT(2:end-1,2:end-1,idim),fspecial('sobel')');
    Im_Lap_GT(:,:,idim) = sqrt(Im_Lap_GT_y.^2+Im_Lap_GT_x.^2);
end

sCC=sum(sum(sum(Im_Lap_F.*Im_Lap_GT)));
sCC = sCC/sqrt(sum(Im_Lap_F(:).^2));
sCC = sCC/sqrt(sum(Im_Lap_GT(:).^2));

SCCMap=sum(Im_Lap_F.*Im_Lap_GT,3)/sqrt(sum(Im_Lap_GT(:).^2))...
    /sqrt(sum(Im_Lap_GT(:).^2));

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/img_qi.m
================================================
function [quality, quality_map] = img_qi(img1, img2, block_size)

%========================================================================
%
%Copyright (c) 2001 The University of Texas at Austin
%All Rights Reserved.
% 
%This program is free software; you can redistribute it and/or modify
%it under the terms of the GNU General Public License as published by
%the Free Software Foundation; either version 2 of the License, or
%(at your option) any later version.
% 
%This program is distributed in the hope that it will be useful,
%but WITHOUT ANY WARRANTY; without even the implied warranty of
%MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%GNU General Public License for more details.
% 
%The GNU Public License is available in the file LICENSE, or you
%can write to the Free Software Foundation, Inc., 59 Temple Place -
%Suite 330, Boston, MA 02111-1307, USA, or you can find it on the
%World Wide Web at http://www.fsf.org.
%
%Author  : Zhou Wang 
%Version : 1.0
% 
%The authors are with the Laboratory for Image and Video Engineering
%(LIVE), Department of Electrical and Computer Engineering, The
%University of Texas at Austin, Austin, TX.
%
%Kindly report any suggestions or corrections to zwang@ece.utexas.edu
%
%Acknowledgement:
%The author would like to thank Mr. Umesh Rajashekar, the Matlab master
%in our lab, for spending his precious time and giving his kind help
%on writing this program. Without his help, this program would not
%achieve its current efficiency.
%
%========================================================================
%
%This is an efficient implementation of the algorithm for calculating
%the universal image quality index proposed by Zhou Wang and Alan C. 
%Bovik. Please refer to the paper "A Universal Image Quality Index"
%by Zhou Wang and Alan C. Bovik, published in IEEE Signal Processing
%Letters, 2001. In order to run this function, you must have Matlab's
%Image Processing Toobox.
%
%Input : an original image and a test image of the same size
%Output: (1) an overall quality index of the test image, with a value
%            range of [-1, 1].
%        (2) a quality map of the test image. The map has a smaller
%            size than the input images. The actual size is
%            img_size - BLOCK_SIZE + 1.
%
%Usage:
%
%1. Load the original and the test images into two matrices
%   (say img1 and img2)
%
%2. Run this function in one of the two ways:
%
%   % Choice 1 (suggested):
%   [qi qi_map] = img_qi(img1, img2);
%
%   % Choice 2:
%   [qi qi_map] = img_qi(img1, img2, BLOCK_SIZE);
%
%   The default BLOCK_SIZE is 8 (Choice 1). Otherwise, you can specify
%   it by yourself (Choice 2).
%
%3. See the results:
%
%   qi                    %Gives the over quality index.
%   imshow((qi_map+1)/2)  %Shows the quality map as an image.
%
%========================================================================

if (nargin == 1 | nargin > 3)
   quality = -Inf;
   quality_map = -1*ones(size(img1));
   return;
end

if (size(img1) ~= size(img2))
   quality = -Inf;
   quality_map = -1*ones(size(img1));
   return;
end

if (nargin == 2)
   block_size = 8;
end

N = block_size.^2;
sum2_filter = ones(block_size);

img1_sq   = img1.*img1;
img2_sq   = img2.*img2;
img12 = img1.*img2;

img1_sum   = filter2(sum2_filter, img1, 'valid');
img2_sum   = filter2(sum2_filter, img2, 'valid');
img1_sq_sum = filter2(sum2_filter, img1_sq, 'valid');
img2_sq_sum = filter2(sum2_filter, img2_sq, 'valid');
img12_sum = filter2(sum2_filter, img12, 'valid');

img12_sum_mul = img1_sum.*img2_sum;
img12_sq_sum_mul = img1_sum.*img1_sum + img2_sum.*img2_sum;
numerator = 4*(N*img12_sum - img12_sum_mul).*img12_sum_mul;
denominator1 = N*(img1_sq_sum + img2_sq_sum) - img12_sq_sum_mul;
denominator = denominator1.*img12_sq_sum_mul;

quality_map = ones(size(denominator));
index = (denominator1 == 0) & (img12_sq_sum_mul ~= 0);
quality_map(index) = 2*img12_sum_mul(index)./img12_sq_sum_mul(index);
index = (denominator ~= 0);
quality_map(index) = numerator(index)./denominator(index);

quality = mean2(quality_map);

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/norm_blocco.m
================================================
%%%%%%%%%%%%%% Q2n aux. function
function [y,a,c] = norm_blocco(x)

a=mean2(x);
c=std2(x);

if(c==0)
	c = eps;
end

y=((x-a)/c)+1;

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/onion_mult.m
================================================
%%%%%%%%%%%%%% Q2n aux. function
function ris=onion_mult(onion1,onion2)

N=length(onion1);

if N>1
  
    L=N/2;

    a=onion1(1:L);
    b=onion1(L+1:end);
    b=[b(1),-b(2:end)];
    c=onion2(1:L);
    d=onion2(L+1:end);
    d=[d(1),-d(2:end)];


    if N==2
        ris=[a*c-d*b,a*d+c*b];
    else
        ris1=onion_mult(a,c);
        ris2=onion_mult(d,[b(1),-b(2:end)]); %%
        ris3=onion_mult([a(1),-a(2:end)],d); %%
        ris4=onion_mult(c,b);

        aux1=ris1-ris2;
        aux2=ris3+ris4;

        ris=[aux1,aux2];
    end
   
else
    ris = onion1*onion2;
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/onion_mult2D.m
================================================
%%%%%%%%%%%%%% Q2n aux. function
function ris = onion_mult2D(onion1,onion2)

[~,~,N3]=size(onion1);

if N3>1
   
    L=N3/2;

    a=onion1(:,:,1:L);
    b=onion1(:,:,L+1:end);
    b=cat(3,b(:,:,1),-b(:,:,2:end));
    c=onion2(:,:,1:L);
    d=onion2(:,:,L+1:end);
    d=cat(3,d(:,:,1),-d(:,:,2:end));


    if N3==2
        ris=cat(3,a.*c-d.*b,a.*d+c.*b); 
    else
        ris1=onion_mult2D(a,c);
        ris2=onion_mult2D(d,cat(3,b(:,:,1),-b(:,:,2:end)));
        ris3=onion_mult2D(cat(3,a(:,:,1),-a(:,:,2:end)),d);
        ris4=onion_mult2D(c,b);

        aux1=ris1-ris2;
        aux2=ris3+ris4;

        ris=cat(3,aux1,aux2);
    end
    
else
    ris = onion1.*onion2;   
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/onions_quality.m
================================================
%%%%%%%%%%%%%% Q2n aux. function
function q = onions_quality(dat1,dat2,size1)

dat1=double(dat1);
dat2=double(dat2);
dat2=cat(3,dat2(:,:,1),-dat2(:,:,2:end));
[~,~,N3]=size(dat1);
size2=size1;

% Block normalization
for i=1:N3
  [a1,s,t]=norm_blocco(squeeze(dat1(:,:,i)));
  dat1(:,:,i)=a1;
  clear a1
  if s==0
      if i==1
        dat2(:,:,i)=dat2(:,:,i)-s+1;
      else
        dat2(:,:,i)=-(-dat2(:,:,i)-s+1);   
      end
  else
      if i==1
        dat2(:,:,i)=((dat2(:,:,i)-s)/t)+1;
      else
        dat2(:,:,i)=-(((-dat2(:,:,i)-s)/t)+1);    
      end
  end
end

m1=zeros(1,N3);
m2=zeros(1,N3);

mod_q1m=0;
mod_q2m=0;
mod_q1=zeros(size1,size2);
mod_q2=zeros(size1,size2);

for i=1:N3
    m1(i)=mean2(squeeze(dat1(:,:,i)));
    m2(i)=mean2(squeeze(dat2(:,:,i)));
    mod_q1m=mod_q1m+(m1(i)^2);
    mod_q2m=mod_q2m+(m2(i)^2);
    mod_q1=mod_q1+((squeeze(dat1(:,:,i))).^2);
    mod_q2=mod_q2+((squeeze(dat2(:,:,i))).^2);
end

mod_q1m=sqrt(mod_q1m);
mod_q2m=sqrt(mod_q2m);
mod_q1=sqrt(mod_q1);
mod_q2=sqrt(mod_q2);

termine2 = (mod_q1m*mod_q2m);
termine4 = ((mod_q1m^2)+(mod_q2m^2));
int1=(size1*size2)/((size1*size2)-1)*mean2(mod_q1.^2);
int2=(size1*size2)/((size1*size2)-1)*mean2(mod_q2.^2);
termine3=int1+int2-(size1*size2)/((size1*size2)-1)*((mod_q1m^2)+(mod_q2m^2));

mean_bias=2*termine2/termine4;
if termine3==0
    q=zeros(1,1,N3);
    q(:,:,N3)=mean_bias;
else
    cbm=2/termine3;
    qu=onion_mult2D(dat1,dat2);
    
    qm=onion_mult(m1,m2);
    qv=zeros(1,N3);
    for i=1:N3
        qv(i)=(size1*size2)/((size1*size2)-1)*mean2(squeeze(qu(:,:,i)));
    end
    q=qv-(size1*size2)/((size1*size2)-1)*qm;
    
    q=q*mean_bias*cbm;
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/q2n.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Q2n index. 
% 
% Interface:
%           [Q2n_index, Q2n_index_map] = q2n(I_GT, I_F, Q_blocks_size, Q_shift)
%
% Inputs:
%           I_GT:               Ground-Truth image;
%           I_F:                Fused Image;
%           Q_blocks_size:      Block size of the Q-index locally applied;
%           Q_shift:            Block shift of the Q-index locally applied.
%
% Outputs:
%           Q2n_index:          Q2n index;
%           Q2n_index_map:      Map of Q2n values.
%
% References:
%           [Garzelli09]        A. Garzelli and F. Nencini, "Hypercomplex quality assessment of multi/hyper-spectral images," 
%                               IEEE Geoscience and Remote Sensing Letters, vol. 6, no. 4, pp. 662665, October 2009.
%           [Vivone20]          G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                               IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [Q2n_index, Q2n_index_map] = q2n(I_GT, I_F, Q_blocks_size, Q_shift)

[N1,N2,N3]=size(I_GT);
size2=Q_blocks_size;

stepx=ceil(N1/Q_shift);
stepy=ceil(N2/Q_shift);

if stepy<=0
    stepy=1;
    stepx=1;
end

est1=(stepx-1)*Q_shift+Q_blocks_size-N1;
est2=(stepy-1)*Q_shift+Q_blocks_size-N2;

if sum([(est1~=0),(est2~=0)])>0
  refref=[];
  fusfus=[];
  
  for i=1:N3
      a1=squeeze(I_GT(:,:,1));
    
      ia1=zeros(N1+est1,N2+est2);
      ia1(1:N1,1:N2)=a1;
      ia1(:,N2+1:N2+est2)=ia1(:,N2:-1:N2-est2+1);
      ia1(N1+1:N1+est1,:)=ia1(N1:-1:N1-est1+1,:);
      refref=cat(3,refref,ia1);
      
      if i<N3
          I_GT=I_GT(:,:,2:end);
      end
  end

  I_GT=refref;
  clear refref
  
  for i=1:N3
      a2=squeeze(I_F(:,:,1));
      
      ia2=zeros(N1+est1,N2+est2);
      ia2(1:N1,1:N2)=a2;
      ia2(:,N2+1:N2+est2)=ia2(:,N2:-1:N2-est2+1);
      ia2(N1+1:N1+est1,:)=ia2(N1:-1:N1-est1+1,:);
      fusfus=cat(3,fusfus,ia2);
      
      if i<N3
          I_F=I_F(:,:,2:end);
      end
  end
  
  I_F=fusfus;
  clear fusfus a1 a2 ia1 ia2

end

I_F=uint16(I_F);
I_GT=uint16(I_GT);

[N1,N2,N3]=size(I_GT);

if ((ceil(log2(N3)))-log2(N3))~=0
    Ndif=(2^(ceil(log2(N3))))-N3;
    dif=zeros(N1,N2,Ndif);
    dif=uint16(dif);
    I_GT=cat(3,I_GT,dif);
    I_F=cat(3,I_F,dif);
end
[~,~,N3]=size(I_GT);

valori=zeros(stepx,stepy,N3);

for j=1:stepx
    for i=1:stepy
        o=onions_quality(I_GT(((j-1)*Q_shift)+1:((j-1)*Q_shift)+Q_blocks_size,((i-1)*Q_shift)+1:((i-1)*Q_shift)+size2,:),I_F(((j-1)*Q_shift)+1:((j-1)*Q_shift)+Q_blocks_size,((i-1)*Q_shift)+1:((i-1)*Q_shift)+size2,:),Q_blocks_size);
        valori(j,i,:)=o;    
    end
end

Q2n_index_map=sqrt(sum((valori.^2),3));

Q2n_index=mean2(Q2n_index_map);

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Quality_Indices/ssim.m
================================================
function [mssim, ssim_map] = ssim(img1, img2, K, window, L)

% ========================================================================
% SSIM Index with automatic downsampling, Version 1.0
% Copyright(c) 2009 Zhou Wang
% All Rights Reserved.
%
% ----------------------------------------------------------------------
% Permission to use, copy, or modify this software and its documentation
% for educational and research purposes only and without fee is hereby
% granted, provided that this copyright notice and the original authors'
% names appear on all copies and supporting documentation. This program
% shall not be used, rewritten, or adapted as the basis of a commercial
% software or hardware product without first obtaining permission of the
% authors. The authors make no representations about the suitability of
% this software for any purpose. It is provided "as is" without express
% or implied warranty.
%----------------------------------------------------------------------
%
% This is an implementation of the algorithm for calculating the
% Structural SIMilarity (SSIM) index between two images
%
% Please refer to the following paper and the website with suggested usage
%
% Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image
% quality assessment: From error visibility to structural similarity,"
% IEEE Transactios on Image Processing, vol. 13, no. 4, pp. 600-612,
% Apr. 2004.
%
% http://www.ece.uwaterloo.ca/~z70wang/research/ssim/
%
% Note: This program is different from ssim_index.m, where no automatic
% downsampling is performed. (downsampling was done in the above paper
% and was described as suggested usage in the above website.)
%
% Kindly report any suggestions or corrections to zhouwang@ieee.org
%
%----------------------------------------------------------------------
%
%Input : (1) img1: the first image being compared
%        (2) img2: the second image being compared
%        (3) K: constants in the SSIM index formula (see the above
%            reference). defualt value: K = [0.01 0.03]
%        (4) window: local window for statistics (see the above
%            reference). default widnow is Gaussian given by
%            window = fspecial('gaussian', 11, 1.5);
%        (5) L: dynamic range of the images. default: L = 255
%
%Output: (1) mssim: the mean SSIM index value between 2 images.
%            If one of the images being compared is regarded as 
%            perfect quality, then mssim can be considered as the
%            quality measure of the other image.
%            If img1 = img2, then mssim = 1.
%        (2) ssim_map: the SSIM index map of the test image. The map
%            has a smaller size than the input images. The actual size
%            depends on the window size and the downsampling factor.
%
%Basic Usage:
%   Given 2 test images img1 and img2, whose dynamic range is 0-255
%
%   [mssim, ssim_map] = ssim(img1, img2);
%
%Advanced Usage:
%   User defined parameters. For example
%
%   K = [0.05 0.05];
%   window = ones(8);
%   L = 100;
%   [mssim, ssim_map] = ssim(img1, img2, K, window, L);
%
%Visualize the results:
%
%   mssim                        %Gives the mssim value
%   imshow(max(0, ssim_map).^4)  %Shows the SSIM index map
%========================================================================


if (nargin < 2 || nargin > 5)
   mssim = -Inf;
   ssim_map = -Inf;
   return;
end

if (size(img1) ~= size(img2))
   mssim = -Inf;
   ssim_map = -Inf;
   return;
end

[M N] = size(img1);

if (nargin == 2)
   if ((M < 11) || (N < 11))
	   mssim = -Inf;
	   ssim_map = -Inf;
      return
   end
   window = fspecial('gaussian', 11, 1.5);	%
   K(1) = 0.01;					% default settings
   K(2) = 0.03;					%
   L = 255;                                     %
end

if (nargin == 3)
   if ((M < 11) || (N < 11))
	   mssim = -Inf;
	   ssim_map = -Inf;
      return
   end
   window = fspecial('gaussian', 11, 1.5);
   L = 255;
   if (length(K) == 2)
      if (K(1) < 0 || K(2) < 0)
		   mssim = -Inf;
   		ssim_map = -Inf;
	   	return;
      end
   else
	   mssim = -Inf;
   	ssim_map = -Inf;
	   return;
   end
end

if (nargin == 4)
   [H W] = size(window);
   if ((H*W) < 4 || (H > M) || (W > N))
	   mssim = -Inf;
	   ssim_map = -Inf;
      return
   end
   L = 255;
   if (length(K) == 2)
      if (K(1) < 0 || K(2) < 0)
		   mssim = -Inf;
   		ssim_map = -Inf;
	   	return;
      end
   else
	   mssim = -Inf;
   	ssim_map = -Inf;
	   return;
   end
end

if (nargin == 5)
   [H W] = size(window);
   if ((H*W) < 4 || (H > M) || (W > N))
	   mssim = -Inf;
	   ssim_map = -Inf;
      return
   end
   if (length(K) == 2)
      if (K(1) < 0 || K(2) < 0)
		   mssim = -Inf;
   		ssim_map = -Inf;
	   	return;
      end
   else
	   mssim = -Inf;
   	ssim_map = -Inf;
	   return;
   end
end


img1 = double(img1);
img2 = double(img2);

% automatic downsampling
f = max(1,round(min(M,N)/256));
%downsampling by f
%use a simple low-pass filter 
if(f>1)
    lpf = ones(f,f);
    lpf = lpf/sum(lpf(:));
    img1 = imfilter(img1,lpf,'symmetric','same');
    img2 = imfilter(img2,lpf,'symmetric','same');

    img1 = img1(1:f:end,1:f:end);
    img2 = img2(1:f:end,1:f:end);
end

C1 = (K(1)*L)^2;
C2 = (K(2)*L)^2;
window = window/sum(sum(window));

mu1   = filter2(window, img1, 'valid');
mu2   = filter2(window, img2, 'valid');
mu1_sq = mu1.*mu1;
mu2_sq = mu2.*mu2;
mu1_mu2 = mu1.*mu2;
sigma1_sq = filter2(window, img1.*img1, 'valid') - mu1_sq;
sigma2_sq = filter2(window, img2.*img2, 'valid') - mu2_sq;
sigma12 = filter2(window, img1.*img2, 'valid') - mu1_mu2;

if (C1 > 0 && C2 > 0)
   ssim_map = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))./((mu1_sq + mu2_sq + C1).*(sigma1_sq + sigma2_sq + C2));
else
   numerator1 = 2*mu1_mu2 + C1;
   numerator2 = 2*sigma12 + C2;
	denominator1 = mu1_sq + mu2_sq + C1;
   denominator2 = sigma1_sq + sigma2_sq + C2;
   ssim_map = ones(size(mu1));
   index = (denominator1.*denominator2 > 0);
   ssim_map(index) = (numerator1(index).*numerator2(index))./(denominator1(index).*denominator2(index));
   index = (denominator1 ~= 0) & (denominator2 == 0);
   ssim_map(index) = numerator1(index)./denominator1(index);
end

mssim = mean2(ssim_map);

return

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/RRpansharp.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%     This method performs pansharpening. We assume that
%     the noisy satellite images yi, i=1,...,L, where y1 is the PAN image
%     and yi, i=2,...,L are the observed MS images, are related to the full
%     resolution target images by
% 
%       yi = Mi*Bi*xi + ni, i=1,...,L 
% 
%     where Mi is a downsampling operator, Bi is a circulant blurring matrix,
%     and ni is noise.  The method solves
%            min (1/2) sum_{i=1}^L || y_i - Mi*Bi*G*fi ||^2  + lambda * phi(G)
%            F, G
%     where phi is a regularizer function.
%     The function returns Xhat=G*F'. See [1] and [2] for details.
% 
% Interface:
%       Xhat_im = RRpansharp(Yim,varargin)
% 
% Inputs:
%         Yim : 1xL cell array containing the observed images the first image
%               is the PAN image and the last L-1 images are the MS images;
%       CDiter: Number of cyclic descent iterations. 
%               CDiter=100 is the default;
%            r: The subspace dimension;
%       lambda: The regularization parameter, lambda=0.005 is the 
%               default;
%            q: penalty weights;
%           X0: Initial value for X = G * F'.
% 
% Outputs:
%    Xhat_im: estimated image (3D) at high resolution for each 
%             spectral channel.
% 
% References:
%           [Ulfarsson19]   M.O. Ulfarsson, F. Palsson, M.Dalla Mura, J.R. Sveinsson, "Sentinel-2 Sharpening using a Reduced-Rank Method", 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 57, no. 9, pp. 6408-6420, 2019.
%           [Palsson19]     F. Palsson, MO. Ulfarsson, and JR. Sveinsson, "Model-Based Reduced-Rank Pansharpening", 
%                           IEEE Geoscience and Remote Sensing Letters, 2019
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Xhat_im = RRpansharp(Yim,varargin)

    % import the manopt optimizer
    addpath('./manopt')
    p1=pwd;
    cd('./manopt');
    importmanopt
    cd(p1)
    % initialization
    CDiter=10;
    r=7;
    lambda=0.005;
    X0 = '';
    tolgradnorm = 0.1;
    if(r==7)
        q = [1, 1.5, 4, 8, 15, 15, 20 ]';
    else
        q = ones(r,1);
    end
    Gstep_only=0;
    GCV = 0;
    for i=1:2:(length(varargin)-1)
        switch varargin{i}
            case 'CDiter'
                CDiter=varargin{i+1};
            case 'r'
                r=varargin{i+1};
            case 'lambda'
                lambda=varargin{i+1};
            case 'q'
                q=varargin{i+1};
            case 'X0'
                X0 = varargin{i+1};
            case 'tolgradnorm'
                tolgradnorm = varargin{i+1};
            case 'Gstep_only'
                Gstep_only = varargin{i+1};
            case 'GCV'
                GCV = varargin{i+1};
            case 'd'
                d = varargin{i+1};
            case 'mtf'
                mtf = varargin{i+1};
        end
    end
    tic;
    if(length(q)~=r), error('The length of q has to match r'); end
    % dimensions of the inputs
    L=length(Yim);
    for i=1:L, Yim{i}=double(Yim{i}); end
    [nl,nc] = size(Yim{1});
    n = nl*nc;
    [Yim2, av] = normaliseData(Yim);
    % Sequence of bands
    % [B1 B2 B3 B4 B5 B6 B7 B8 B8A B9 B11 B12]
    % subsampling factors (in pixels)
    %d = [6 1 1 1 2 2 2 1 2 6 2 2]';
    % convolution  operators (Gaussian convolution filters), taken from ref [5]
    %mtf = [ .32 .26 .28 .24 .38 .34 .34 .26 .33 .26 .22 .23];
    sdf = d.*sqrt(-2*log(mtf)/pi^2)';
    % Do not sharpen high-res bands
    sdf(d==1) = 0;
    % remove border for computing the subspace and the result (because of
    % circular assumption
    limsub = 2;
    % kernel filter support
    dx = 12;
    dy = 12;
    % Define blurring operators
    FBM = createConvKernel(sdf,d,nl,nc,L,dx,dy);
    % IMPORTANT!!!
    % Note that the blur kernels are shifted to accomodate the co-registration
    % of real images with different resolutions.
    [Y,M,F]=initialization(Yim2,sdf,nl,nc,L,dx,dy,d,limsub,r);
    Mask=reshape(M,[n,L])';
    % CD
    if isempty(X0)
        Z = zeros(r,n); 
    else
        [X0, ~] = normaliseData(X0);
        X0 = reshape(X0,[n,L])';
        [F,D,V]=svd(X0,'econ');
        F = F(:,1:r);
        Z = D(1:r,1:r)*V(:,1:r)';
    end
    % Operators for differences
    [FDH,FDV,FDHC,FDVC] = createDiffkernels(nl,nc,r);
    % Compute weights
    sigmas = 1;
    W = computeWeights(Y,d,sigmas,nl);
    Whalf=W.^(1/2);
    if( GCV == 1), Gstep_only=1; end
    if( Gstep_only ~= 0), CDiter=1; end
    for jCD=1:CDiter
       [Z,Jcost(jCD),options]=Zstep(Y,FBM,F,lambda,nl,nc,Z,Mask,q,FDH,FDV,FDHC,FDVC,W,Whalf,tolgradnorm);              
       if(Gstep_only==0) 
           F1=Fstep(F,Z,Y,FBM,nl,nc,Mask);  
           F=F1;
       end
       if( GCV==1 )
            Ynoise = ( abs(Y) > 0 ) .* randn( size(Y) );
            [Znoise]=Zstep(Ynoise,FBM,F,lambda,nl,nc,Z,Mask,q,FDH,FDV,FDHC,FDVC,W,Whalf,tolgradnorm);
            HtHBXnoise = Mask.*ConvCM(F*Znoise,FBM,nl);
            Ynoise = Ynoise([2:end],:); 
            HtHBXnoise = HtHBXnoise([2:end],:);
            den = trace(Ynoise*(Ynoise - HtHBXnoise)');           
            HtHBX=Mask.*ConvCM(F*Z,FBM,nl); 
            num = norm( Y([2:end],:) - HtHBX([2:end],:) , 'fro')^2;         
       end
    end
    
    Xhat_im = conv2im(F*Z,nl,nc,L);
    Xhat_im = unnormaliseData(Xhat_im,av);
    Xhat_im = Xhat_im(:,:,2:end);
end

function [Y,M,F]=initialization(Yim2,sdf,nl,nc,L,dx,dy,d,limsub,r)
    FBM2 = createConvKernelSubspace(sdf,nl,nc,L,dx,dy);
    % Generate LR MS image FOR SUBSPACE
    % Upsample image via interpolation
    for i=1:L
        Ylim(:,:,i) = imresize(Yim2{i},d(i));
    end
    Y2im=real(ifft2(fft2(Ylim).*FBM2));
    Y2tr=Y2im(limsub+1:end-limsub,limsub+1:end-limsub,:);
    Y2n = reshape(Y2tr,[(nl-4)*(nc-4),L]); 
    % SVD analysis
    % Y2n is the image for subspace with the removed border
    [F,D,P] = svd(Y2n','econ');
    F=F(:,1:r);
    [M, Y] = createSubsampling(Yim2,d,nl,nc,L);
end


function [Z, xcost,options]=Zstep(Y,FBM,F,tau,nl,nc,Z,Mask,q,FDH,FDV,FDHC,FDVC,W,Whalf,tolgradnorm)
    r = size(F,2);
    n = nl*nc;     
    UBTMTy=F'*ConvCM(Y,conj(FBM),nl); 
    [Z] = CG(Z,F,Y,UBTMTy,FBM,Mask,nl,nc,r,tau,q,FDH,FDV,FDHC,FDVC,W);
    xcost=1;
    options=[];    
end      

function F1=Fstep(F,Z,Y,FBM,nl,nc,Mask)
     F0=F;%   U; % initialization
     BTXhat =  ConvCM(F0*Z,FBM,nl);
     MBTXhat=Mask.*BTXhat;
     [L,r]=size(F);
     for ii=1:L
        MBZT(:,:,ii)=repmat(Mask(ii,:),[r,1]).*ConvCM(Z,repmat(FBM(:,:,ii),[1,1,r]),nl);
        A(:,:,ii)=MBZT(:,:,ii)*MBZT(:,:,ii)';
        ZBMTy(:,ii)=MBZT(:,:,ii)*Y(ii,:)';
     end
     ZBYT=ZBMTy';%    BTY*Z';
     manifold = stiefelfactory(L,r,1); %euclideanfactory(L,r); 
     problem.M = manifold;
     problem.cost  = @(F) costF(F,MBZT,Y); 
     problem.egrad = @(F) egrad(F,A,ZBYT);  
     warning('off', 'manopt:getHessian:approx') 
     options.tolgradnorm = 1e-2;
     options.verbosity=0;
     [F1, xcost, info, options] = trustregions(problem,F0,options);

end

% Cost functions

function [Ju]=costF(F,MBZT,Y)
    L=size(F,1);
    Ju=0;
    for i=1:L
        fi=F(i,:)';
        yi=Y(i,:)';
        Ju=Ju+0.5*norm(MBZT(:,:,i)'*fi-yi,'fro')^2;
    end
end

function [Du]=egrad(F,A,ZBYT)
    p=size(A,3);
    Du=0*F;
    for ii=1:p
        Du(ii,:)=F(ii,:)*A(:,:,ii)'-ZBYT(ii,:);
    end
end


%%% AUXILILARY FUNCTIONS

function [FDH,FDV,FDHC,FDVC] = createDiffkernels(nl,nc,r)
    dh = zeros(nl,nc);
    dh(1,1) = 1;
    dh(1,nc) = -1;
    dv = zeros(nl,nc);
    dv(1,1) = 1;
    dv(nl,1) = -1;
    FDH = repmat(fft2(dh),1,1,r);
    FDV = repmat(fft2(dv),1,1,r);
    FDHC = conj(FDH);
    FDVC = conj(FDV);
end


function [Yim, av] = normaliseData(Yim)
    % Normalize each cell to unit power
    if iscell(Yim)
        % mean squared power = 1
        nb = length(Yim);
        for i=1:nb
            av(i,1) = mean2(Yim{i}.^2);
            Yim{i,1} = sqrt(Yim{i}.^2/av(i,1));
        end   
    else
        nb = size(Yim,3);
        for i=1:nb
            av(i,1) = mean2(Yim(:,:,i).^2);
            Yim(:,:,i) = sqrt(Yim(:,:,i).^2/av(i,1));
        end
    end
end

function FBM = createConvKernel(sdf,d,nl,nc,L,dx,dy)
    %--------------------------------------------------------------------------
    %   Build convolution kernels
    %--------------------------------------------------------------------------
    
    middlel=((nl)/2);
    middlec=((nc)/2);
    % kernel filters expanded to size [nl,nc]
    B = zeros(nl,nc,L);
    % fft2 of kernels
    FBM = zeros(nl,nc,L);
    for i=1:L
        if d(i) > 1
            h = fspecial('gaussian',[dx,dy],sdf(i));
            B((middlel-dy/2+1:middlel+dy/2)-d(i)/2+1,(middlec-dx/2+1:middlec+dx/2)-d(i)/2+1,i) = h; %run
            % circularly center
            B(:,:,i)= fftshift(B(:,:,i));
            % normalize
            B(:,:,i) = B(:,:,i)/sum(sum(B(:,:,i)));
            FBM(:,:,i) = fft2(B(:,:,i));
        else
            B(1,1,i) = 1;
            FBM(:,:,i) = fft2(B(:,:,i));
        end
    end
end

function FBM2 = createConvKernelSubspace(sdf,nl,nc,L,dx,dy)

    %--------------------------------------------------------------------------
    %   Build convolution kernels FOR SUBSPACE!!!!
    %--------------------------------------------------------------------------
    %
    middlel=round((nl+1)/2);
    middlec=round((nc+1)/2);

    dx = dx+1;
    dy = dy+1;

    % kernel filters expanded to size [nl,nc]
    B = zeros(nl,nc,L);
    % fft2 of kernels
    FBM2 = zeros(nl,nc,L);

    s2 = max(sdf);
    for i=1:L
        if sdf(i) < s2
            h = fspecial('gaussian',[dx,dy],sqrt(s2^2-sdf(i)^2));
            B(middlel-(dy-1)/2:middlel+(dy-1)/2,middlec-(dx-1)/2:middlec+(dx-1)/2,i) = h;
    
            %circularly center
            B(:,:,i)= fftshift(B(:,:,i));
    
            % normalize
            B(:,:,i) = B(:,:,i)/sum(sum(B(:,:,i)));
            FBM2(:,:,i) = fft2(B(:,:,i));
        else
            % unit impulse
            B(1,1,i) = 1;
            FBM2(:,:,i) = fft2(B(:,:,i));
        end
    end
end

function X = ConvCM(X,FKM,nl,nc,L)

    if nargin == 3
        [L,n] = size(X);
        nc = n/nl;
    end
    X = conv2mat(real(ifft2(fft2(conv2im(X,nl,nc,L)).*FKM)));

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % define a circular convolution (the same for all bands) accepting a
    % matrix  and returnig a matrix
    % size(X) is [no_bands_ms,n]
    % FKM is the  of the cube containing the fft2 of the convolution kernels
    % ConvCM = @(X,FKM)  reshape(real(ifft2(fft2(reshape(X', nl,nc,nb)).*FKM)), nl*nc,nb)';

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end

function X = conv2mat(X,nl,nc,L)
    if ndims(X) == 3
        [nl,nc,L] = size(X);
        X = reshape(X,nl*nc,L)';
    elseif ndims(squeeze(X)) == 2
        L = 1;
        [nl,nc] = size(X);
        X = reshape(X,nl*nc,L)';
    end
end

function [M, Y] = createSubsampling(Yim,d,nl,nc,L)

    % subsampling matrix
    M = zeros(nl,nc,L);
    indexes = cell([L 1]);

    for i=1:L
        im = ones(floor(nl/d(i)),floor(nc/d(i)));
        maux = zeros(d(i));
        maux(1,1) = 1;
    
        M(:,:,i) = kron(im,maux);
        indexes{i} = find(M(:,:,i) == 1);
        Y(i,indexes{i}) = conv2mat(Yim{i},nl/d(i),nc/d(i),1);
    end
end

function [Yim] = unnormaliseData(Yim, av)
    if iscell(Yim)
        % mean squared power = 1
        nb = length(Yim);    
        for i=1:nb
            Yim{i,1} = sqrt(Yim{i}.^2*av(i,1));
        end
    else
        nb = size(Yim,3);
        for i=1:nb
            Yim(:,:,i) = sqrt(Yim(:,:,i).^2*av(i,1));
        end
    end
end


function W = computeWeights(Y,d,sigmas,nl)

    % As in eq. (14) and (15)
    % Compute weigts for each pixel based on HR bands
    hr_bands = d==1;
    hr_bands = find(hr_bands)';
    for i=hr_bands
    %     grad(:,:,i) = imgradient(conv2im(Y(i,:),nl),'prewitt').^2;
    %     Intermediate gives also good results compared to prewitt
        grad(:,:,i) = imgradient(conv2im(Y(i,:),nl),'intermediate').^2;
    end
    grad = sqrt(max(grad,[],3));
    grad = grad / quantile(grad(:),0.95);

    Wim = exp(-grad.^2/2/sigmas^2);
    Wim(Wim<0.5) = 0.5;

    W = conv2mat(Wim,nl);
end

function X = conv2im(X,nl,nc,L)

    if size(X,2)==1
        X = conv2mat(X,nl,nc,L);
    end
    if nargin == 2
        [L,n] = size(X);
        if n==1
            X = conv2mat(X,nl,nc,L);
        end
        nc = n/nl;
    end
    X = reshape(X',nl,nc,L);
end

function [J,gradJ,AtAg] = grad_cost_G(Z,F,Y,UBTMTy,FBM,Mask,nl,nc,r,tau,q,FDH,FDV,FDHC,FDVC,W)
    X=F*Z;
    BX=ConvCM(X,FBM,nl);
    HtHBX=Mask.*BX;
    ZH=ConvCM(Z,FDHC,nl);
    Zv=ConvCM(Z,FDVC,nl);
    ZHW=ZH.*W;
    ZVW=Zv.*W;
    grad_pen=ConvCM(ZHW,FDH,nl)+ConvCM(ZVW,FDV,nl);
    AtAg = F'*ConvCM(HtHBX,conj(FBM),nl)+2*tau*(q*ones(1,nl*nc)).*grad_pen;
    gradJ=AtAg-UBTMTy;
    J = 1/2 * sum( sum( Z .* AtAg ) ) - sum( sum( Z.*UBTMTy ) );     
end

function [ Z ] = CG(Z,F,Y,UBTMTy,FBM,Mask,nl,nc,r,tau,q,FDH,FDV,FDHC,FDVC,W)
    maxiter = 1000;
    tolgradnorm = 0.1;%1e-6;    
    [cost,grad] = grad_cost_G(Z,F,Y,UBTMTy,FBM,Mask,nl,nc,r,tau,q,FDH,FDV,FDHC,FDVC,W);
    gradnorm = norm(grad(:));
    iter = 0;
    res = -grad;
    while ( gradnorm > tolgradnorm & iter < maxiter ) 
        iter = iter + 1;
       % fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm);      
        if( iter == 1 )
            desc_dir = res;
        else
            beta = ( res(:).' * res(:) ) / ( old_res(:).' * old_res(:) );
            desc_dir = res + beta * desc_dir;
        end
        [~, ~, AtAp] = grad_cost_G(desc_dir,F,Y,UBTMTy,FBM,Mask,nl,nc,r,tau,q,FDH,FDV,FDHC,FDVC,W);
        alpha = ( res(:).' * res(:) ) / ( desc_dir(:).' * AtAp(:) );
        Z1 = Z + alpha * desc_dir;
        old_res = res;
        res = res - alpha* AtAp;
        gradnorm = norm( res(:) );
        % Transfer iterate info
        Z = Z1;
    end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/CLA.txt
================================================
Thank you for your interest in Manopt. The purpose of this Contributor License Agreement is to
clarify the intellectual property license granted with contributions of software from any person or
entity (the "Contributor") to the owners of Manopt. This license is for your protection as a
Contributor of software to Manopt and does not change your right to use your own contributions for
any other purpose.

The owners of Manopt are the copyright holders of Manopt indicated in the license files distributed
with the software.

You and the owners of Manopt hereby accept and agree to the following terms and conditions:

Your "Contributions" means all of your past, present and future contributions of object code, source
code and documentation to Manopt, however submitted to Manopt, excluding any submissions that are
conspicuously marked or otherwise designated in writing by You as "Not a Contribution."

You hereby grant to the owners of Manopt a non-exclusive, irrevocable, worldwide, no-charge,
transferable copyright license to use, execute, prepare derivative works of, and distribute
(internally and externally, in object code and, if included in your Contributions, source code form)
your Contributions. Except for the rights granted to the owners of Manopt in this paragraph, You
reserve all right, title and interest in and to your Contributions.

You represent that you are legally entitled to grant the above license. If your employer(s) have
rights to intellectual property that you create, you represent that you have received permission to
make the Contibutions on behalf of that employer, or that your employer has waived such rights for
your Contributions to Manopt.

You represent that, except as disclosed in your Contribution submission(s), each of your
Contributions is your original creation. You represent that your Contribution submissions(s)
included complete details of any license or other restriction (including, but not limited to,
related patents and trademarks) associated with any part of your Contribution(s) (including a copy
of any applicable license agreement). You agree to notify the owners of Manopt of any facts or
circumstances of which you become aware that would make Your representations in the Agreement
inaccurate in any respect.

You are not expected to provide support for your Contributions, except to the extent you desire to
provide support. Your may provide support for free, for a fee, or not at all. Your Contributions are
provided as-is, with all faults, defects and errors, and without any warranty of any kind (either
express or implied) including, without limitation, any implied warranty of merchantability and
fitness for a particular purpose and any warranty of non-infringement.


This CLA is a modification of the CLA used by the UW Calendar project of the University of
Washington: <http://www.washington.edu/ucal/CLicense.html>


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/COPYING.txt
================================================
                    GNU GENERAL PUBLIC LICENSE
                       Version 3, 29 June 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU General Public License is a free, copyleft license for
software and other kinds of works.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.  We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors.  You can apply it to
your programs, too.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights.  Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.

  For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received.  You must make sure that they, too, receive
or can get the source code.  And you must show them these terms so they
know their rights.

  Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.

  For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software.  For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.

  Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so.  This is fundamentally incompatible with the aim of
protecting users' freedom to change the software.  The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable.  Therefore, we
have designed this version of the GPL to prohibit the practice for those
products.  If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.

  Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary.  To prevent this, the GPL assures that
patents cannot be used to render the program non-free.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Use with the GNU Affero General Public License.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time.  Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:

    <program>  Copyright (C) <year>  <name of author>
    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.

The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License.  Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<http://www.gnu.org/licenses/>.

  The GNU General Public License does not permit incorporating your program
into proprietary programs.  If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library.  If this is what you want to do, use the GNU Lesser General
Public License instead of this License.  But first, please read
<http://www.gnu.org/philosophy/why-not-lgpl.html>.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/CREDITS.txt
================================================
The core developers of Manopt are

* Nicolas Boumal
* Bamdev Mishra

Through the RANSO group, Manopt is supported by

* Pierre-Antoine Absil
* Yurii Nesterov
* Rodolphe Sepulchre

We are grateful for the excellent contributions of

* Pierre Borckmans
* Bart Vandereycken
* Hiroyuki Sato
* Roberto Tron
* Sarod Yatawatta
* Hiroyuki Kasai
* Bruno Iannazzo
* Margherita Procelli
* Jesus Briales
* Changshuo Liu

Furthermore, code written by the following people can be found in Manopt:

* Chris Baker
* Pierre-Antoine Absil
* Kyle Gallivan
* Paolo de Leva
* Wynton Moore
* Michael Kleder

Each person's contribution is marked by their name in the relevant files.
See http://www.manopt.org/about.html for a more precise breakdown.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/LICENSE.txt
================================================
Manopt, a Matlab toolbox for optimization on manifolds, is copyright by
Nicolas Boumal and is distributed under the terms of the GNU General Public
License (GPL) version 3 (or later). See accompanying file <COPYING.txt> or
<http://www.gnu.org/licenses/gpl.html>.

In short, this means that everyone is free to use Manopt, to modify it and
to redistribute it on a free basis. Manopt is not in the public domain;
it is copyrighted and there are restrictions on its distribution (see the
license). For example, you cannot integrate this version of Manopt (in full
or in parts) in any closed-source software you plan to distribute
(commercially or not). Please contact us for more information.

Contact:
  http://www.manopt.org
  manopttoolbox@gmail.com

The documentation of Manopt (the website) is copyright by Nicolas Boumal,
all rights reserved.


THIRD-PARTY CODE

The following files contain third-party code or extensively rely on
third-party code, and their specific license should be considered before
modifying and/or redistributing them. The license information can be found
either in the comments in the code or in a separate text file in the same
directory as the Matlab files.

/manopt/solvers/trustregions/trustregions.m
/manopt/solvers/trustregions/tCG.m
/manopt/tools/multitransp.m
/manopt/tools/multiprod.m
/manopt/tools/diagsum.m
/manopt/tools/hashmd5.m


CONTRIBUTIONS

Contributions are licensed to the owners of Manopt under the Contributor
License Agreement, see accompanying file <CLA.txt>. Be sure to check the
header comments of Matlab files and look for the "original author" tag.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/README.txt
================================================
Manopt is a Matlab toolbox for optimization on manifolds.

Installation instructions, documentation and updates are available online:
http://www.manopt.org

Manopt is copyright by Nicolas Boumal (nicolasboumal@gmail.com)
and is distributed under the terms of the GNU General Public License (GPL)
version 3 (or later). See the files LICENSE.TXT, COPYING.TXT and CREDITS.TXT.

Contact: manopttoolbox@gmail.com


Quick installation guide
------------------------

* Unzip and copy the whole manopt directory you just downloaded in a
  location of your choice on disk, say, in /my/directory/.

* Go to /my/directory/manopt/ at the command prompt and execute importmanopt.
  You may save this path for your next Matlab sessions: follow the menu
  File  Set Path... and save.

* Go to /my/directory/manopt/checkinstall/ and run the script basicexample.m.
  If there are no errors, you are done! Otherwise, feel free to contact us.


Feedback
--------

Please let us know how you use Manopt: it helps us develop a better toolbox.

Please cite the Manopt paper in your work (as well as relevant solvers/geometries):
http://jmlr.org/papers/v15/boumal14a.html

@article{manopt,
  author  = {Nicolas Boumal and Bamdev Mishra and P.-A. Absil and Rodolphe Sepulchre},
  title   = {{M}anopt, a {M}atlab Toolbox for Optimization on Manifolds},
  journal = {Journal of Machine Learning Research},
  year    = {2014},
  volume  = {15},
  pages   = {1455--1459},
  url     = {http://www.manopt.org}
}


For more info or help: http://www.manopt.org -- we are active on the forum!

This version:
Manopt 4.0, released Sep. 9, 2017.


GitHub: https://github.com/NicolasBoumal/manopt


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/checkinstall/basicexample.m
================================================
function basicexample()
    
    % Verify that Manopt was indeed added to the Matlab path.
    if isempty(which('spherefactory'))
        error(['You should first add Manopt to the Matlab path.\n' ...
		       'Please run importmanopt.']);
    end
    
    % Generate the problem data.
    n = 1000;
    A = randn(n);
    A = .5*(A+A');
    
    % Create the problem structure.
    manifold = spherefactory(n);
    problem.M = manifold;
    
    % Define the problem cost function and its gradient.
    problem.cost  = @(x) -x'*(A*x);
    problem.egrad = @(x) -2*A*x;
    problem.ehess = @(x, xdot) -2*A*xdot;
    
    % Numerically check gradient and Hessian consistency.
    figure;
    checkgradient(problem);
    figure;
    checkhessian(problem);
 
    % Solve.
    [x, xcost, info] = trustregions(problem);          %#ok<ASGLU>
    
    % Display some statistics.
    figure;
    semilogy([info.iter], [info.gradnorm], '.-');
    xlabel('Iteration #');
    ylabel('Gradient norm');
    title('Convergence of the trust-regions algorithm on the sphere');
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/PCA_stochastic.m
================================================
function [X, A] = PCA_stochastic(A, k)
% Example of stochastic gradient algorithm in Manopt on a PCA problem.
% 
% PCA (principal component analysis) on a dataset A of size nxd consists
% in solving
% 
%   minimize_X  f(X) = -.5*norm(A*X, 'fro')^2 / n,
% 
% where X is a matrix of dimension dxk with orthonormal columns. This
% is equivalent to finding k dominant singular vectors of A, or k top
% eigenvectors of A'*A.
% 
% If n is large, this computation can be expensive. Thus,  stochastic
% gradient algorithms take the point of view that f(X) is a sum of many (n)
% terms: each term involves only one of the n rows of A.
%
% To make progress, it may be sufficient to optimize with respect to a
% subset of the terms at each iteration. This way, each individual
% iteration can be very cheap. In particular, individual operations have
% cost independent of n, because f or its gradient need never be evaluated
% completely (or at all in the case of f.)
%
% Stochastic gradient algorithms (this implementation in particular) are
% sensitive to proper parameter tuning. See in code.

% This file is part of Manopt and is copyrighted. See the license file.
% 
% Main author: Bamdev Mishra and Nicolas Boumal, Sept. 6, 2017
% Contributors:
% 
% Change log:
% 


    % If none is given, generate a random data set: n samples in R^d
    if ~exist('A', 'var') || isempty(A)
        d = 1000;
        n = 100000;
        fprintf('Generating data...');
        A = randn(n, d)*diag([[15 10 5], ones(1, d-3)]);
        fprintf(' done (size: %d x %d).\n', size(A));
    else
        [n, d] = size(A);
    end

    % Pick a number of component to compute
    if ~exist('k', 'var') || isempty(k)
        k = 3;
    end
    
    % We are looking for k orthonormal vectors in R^d: Stiefel manifold.
    problem.M = stiefelfactory(d, k);
    
    % The cost function to minimize is a sum of n terms. This parameter
    % must be set for stochastic algorithms.
    problem.ncostterms = n;
    
    % We do not need to specify how to compute the value of the cost
    % function (stochastic algorithms never use this). All we need is to
    % specify how to compute the gradient of the cost function, where the
    % sum is restricted to a subset of the terms (a sample). Notice that we
    % specify a partial Euclidean gradient (hence the 'e' in partialegrad).
    % This way, Manopt will automatically convert the Euclidean vector into
    % a proper Riemannian partial gradient, in the tangent space at X.
    % In particular, if sample = 1:n, then the partial gradient corresponds
    % to the actual (complete) gradient.
    problem.partialegrad = @partialegrad;
    function G = partialegrad(X, sample)
        
        % X is an orthonormal matrix of size dxk
        % sample is a vector if indices between 1 and n: a subset
        % Extract a subset of the dataset
        Asample = A(sample, :);
        
        % Compute the gradient of f restricted to that sample
        G = -Asample'*(Asample*X);
        G = G / n;
        
    end

    % If one wants to use checkgradient to verify one's work, then it is
    % necessary to specify the cost function as well, as below.
    % problem.cost = @(X) -.5*norm(A*X, 'fro')^2 / n;
    % checkgradient(problem); pause;

    % To have the solver record statistics every x iterations, set
    % options.checkperiod to x. This will record simple quantities which
    % are almost free to compute (namely, elapsed time and step size of the
    % last step.) To record more sophisticated quantities, you can use
    % options.statsfun as usual. Time spent computing these statistics is
    % not counted in times reported in the info structure returned by the
    % solver.
    options.checkperiod = 10;
    options.statsfun = statsfunhelper('metric', @(X) norm(A*X, 'fro'));
    
    % Set the parameters for the solver: stochastic gradient algorithms
    % tend to be quite sensitive to proper tuning, especially regarding
    % step size selection. See the solver's documentation for details.
    options.maxiter = 200;
    options.batchsize = 10;
    % options.stepsize_type = 'decay';
    options.stepsize_init = 1e2;
    options.stepsize_lambda = 1e-3;
    options.verbosity = 2;
    
    % Run the solver
    [X, info] = stochasticgradient(problem, [], options);
    
    
    % Plot the special metric recorded by options.statsfun
    plot([info.iter], [info.metric], '.-');
    xlabel('Iteration #');
    ylabel('Frobenius norm of A*X');
    title('Convergence of stochasticgradient on stiefelfactory for PCA');
    
    % Add to that plot a reference: the globally optimal value attained if
    % the true dominant singular vectors are computed.
    fprintf('Running svds... ');
    t = tic();
    [V, ~] = svds(A', k);
    fprintf('done: %g [s] (note: svd may be faster)\n', toc(t));
    hold all;
    bound = norm(A*V, 'fro');
    plot([info.iter], bound*ones(size([info.iter])), '--');
    hold off;
    
    legend('Algorithm', 'SVD bound', 'Location', 'SouthEast');
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/dominant_invariant_subspace.m
================================================
function [X, info] = dominant_invariant_subspace(A, p)
% Returns an orthonormal basis of the dominant invariant p-subspace of A.
%
% function X = dominant_invariant_subspace(A, p)
%
% Input: A real, symmetric matrix A of size nxn and an integer p < n.
% Output: A real, orthonormal matrix X of size nxp such that trace(X'*A*X)
%         is maximized. That is, the columns of X form an orthonormal basis
%         of a dominant subspace of dimension p of A. These are thus
%         eigenvectors associated with the largest eigenvalues of A (in no
%         particular order). Sign is important: 2 is deemed a larger
%         eigenvalue than -5.
%
% The optimization is performed on the Grassmann manifold, since only the
% space spanned by the columns of X matters. The implementation is short to
% show how Manopt can be used to quickly obtain a prototype. To make the
% implementation more efficient, one might first try to use the caching
% system, that is, use the optional 'store' arguments in the cost, grad and
% hess functions. Furthermore, using egrad2rgrad and ehess2rhess is quick
% and easy, but not always efficient. Having a look at the formulas
% implemented in these functions can help rewrite the code without them,
% possibly more efficiently.
%
% See also: dominant_invariant_subspace_complex

% This file is part of Manopt and is copyrighted. See the license file.
%
% Main author: Nicolas Boumal, July 5, 2013
% Contributors:
%
% Change log:
%
%   NB Dec. 6, 2013:
%       We specify a max and initial trust region radius in the options.
    
    % Generate some random data to test the function
    if ~exist('A', 'var') || isempty(A)
        A = randn(128);
        A = (A+A')/2;
    end
    if ~exist('p', 'var') || isempty(p)
        p = 3;
    end
    
    % Make sure the input matrix is square and symmetric
    n = size(A, 1);
	assert(isreal(A), 'A must be real.')
    assert(size(A, 2) == n, 'A must be square.');
    assert(norm(A-A', 'fro') < n*eps, 'A must be symmetric.');
	assert(p<=n, 'p must be smaller than n.');
    
    % Define the cost and its derivatives on the Grassmann manifold
    Gr = grassmannfactory(n, p);
    problem.M = Gr;
    problem.cost = @(X)    -trace(X'*A*X);
    problem.grad = @(X)    -2*Gr.egrad2rgrad(X, A*X);
    problem.hess = @(X, H) -2*Gr.ehess2rhess(X, A*X, A*H, H);
    
    % Execute some checks on the derivatives for early debugging.
    % These can be commented out.
    % checkgradient(problem);
    % pause;
    % checkhessian(problem);
    % pause;
    
    % Issue a call to a solver. A random initial guess will be chosen and
    % default options are selected except for the ones we specify here.
    options.Delta_bar = 8*sqrt(p);
    [X, costX, info, options] = trustregions(problem, [], options); %#ok<ASGLU>
    
    fprintf('Options used:\n');
    disp(options);
    
    % For our information, Manopt can also compute the spectrum of the
    % Riemannian Hessian on the tangent space at (any) X. Computing the
    % spectrum at the solution gives us some idea of the conditioning of
    % the problem. If we were to implement a preconditioner for the
    % Hessian, this would also inform us on its performance.
    %
    % Notice that (typically) all eigenvalues of the Hessian at the
    % solution are positive, i.e., we find an isolated minimizer. If we
    % replace the Grassmann manifold by the Stiefel manifold, hence still
    % optimizing over orthonormal matrices but ignoring the invariance
    % cost(XQ) = cost(X) for all Q orthogonal, then we see
    % dim O(p) = p(p-1)/2 zero eigenvalues in the Hessian spectrum, making
    % the optimizer not isolated anymore.
    if Gr.dim() < 512
        evs = hessianspectrum(problem, X);
        stairs(sort(evs));
        title(['Eigenvalues of the Hessian of the cost function ' ...
               'at the solution']);
        xlabel('Eigenvalue number (sorted)');
        ylabel('Value of the eigenvalue');
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/dominant_invariant_subspace_complex.m
================================================
function [X, info] = dominant_invariant_subspace_complex(A, p)
% Returns a unitary basis of the dominant invariant p-subspace of A.
%
% function X = dominant_invariant_subspace(A, p)
%
% Input: A complex, Hermitian matrix A of size nxn and an integer p < n.
% Output: A complex, unitary matrix X of size nxp such that trace(X'*A*X)
%         is maximized. That is, the columns of X form a unitary basis
%         of a dominant subspace of dimension p of A.
%
% The optimization is performed on the complex Grassmann manifold, since
% only the space spanned by the columns of X matters.
%
% See dominant_invariant_subspace for more details in the real case.
%
% See also: dominant_invariant_subspace grassmanncomplexfactory

% This file is part of Manopt and is copyrighted. See the license file.
%
% Main author: Nicolas Boumal, June 30, 2015
% Contributors:
%
% Change log:
    
    % Generate some random data to test the function
    if ~exist('A', 'var') || isempty(A)
        A = randn(128) + 1i*randn(128);
        A = (A+A')/2;
    end
    if ~exist('p', 'var') || isempty(p)
        p = 3;
    end
    
    % Make sure the input matrix is Hermitian
    n = size(A, 1);
    assert(size(A, 2) == n, 'A must be square.');
    assert(norm(A-A', 'fro') < n*eps, 'A must be Hermitian.');
	assert(p<=n, 'p must be smaller than n.');
    
    % Define the cost and its derivatives on the complex Grassmann manifold
    Gr = grassmanncomplexfactory(n, p);
    problem.M = Gr;
    problem.cost  = @(X)    -real(trace(X'*A*X));
    problem.egrad = @(X)    -2*A*X;
    problem.ehess = @(X, H) -2*A*H;
    
    % Execute some checks on the derivatives for early debugging.
    % These can be commented out.
    % checkgradient(problem);
    % pause;
    % checkhessian(problem);
    % pause;
    
    % Issue a call to a solver. A random initial guess will be chosen and
    % default options are selected except for the ones we specify here.
    options.Delta_bar = 8*sqrt(p);
    [X, costX, info, options] = trustregions(problem, [], options); %#ok<ASGLU>
    
    fprintf('Options used:\n');
    disp(options);
    
    % For our information, Manopt can also compute the spectrum of the
    % Riemannian Hessian on the tangent space at (any) X. Computing the
    % spectrum at the solution gives us some idea of the conditioning of
    % the problem. If we were to implement a preconditioner for the
    % Hessian, this would also inform us on its performance.
    %
    % Notice that (typically) all eigenvalues of the Hessian at the
    % solution are positive, i.e., we find an isolated minimizer. If we
    % replace the Grassmann manifold by the Stiefel manifold, hence still
    % optimizing over orthonormal matrices but ignoring the invariance
    % cost(XQ) = cost(X) for all Q orthogonal, then we see
    % dim O(p) = p(p-1)/2 zero eigenvalues in the Hessian spectrum, making
    % the optimizer not isolated anymore.
    if Gr.dim() < 512
        evs = hessianspectrum(problem, X);
        stairs(sort(evs));
        title(['Eigenvalues of the Hessian of the cost function ' ...
               'at the solution']);
        xlabel('Eigenvalue number (sorted)');
        ylabel('Value of the eigenvalue');
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/elliptope_SDP.m
================================================
function [Y, problem, S] = elliptope_SDP(A, p, Y0)
% Solver for semidefinite programs (SDP's) with unit diagonal constraints.
% 
% function [Y, problem, S] = elliptope_SDP(A)
% function [Y, problem, S] = elliptope_SDP(A, p)
% function [Y, problem, S] = elliptope_SDP(A, p, Y0)
%
% A is a real, symmetric matrix of size n.
%
% This function uses a local optimization method in Manopt to solve the SDP
%
%   min_X  trace(A*X)  s.t.  diag(X) = 1 and X is positive semidefinite.
%
% In practice, the symmetric matrix X of size n is parameterized
% as X = Y*Y', where Y has size n x p. By default, p is taken large enough
% (about sqrt(2n)) to ensure that there exists an optimal X whose rank is
% smaller than p. This ensures that the SDP is equivalent to the new
% problem in Y:
%
%   min_Y  trace(Y'*A*Y)  s.t.  diag(Y*Y') = 1.
%
% The constraints on Y require each row of Y to have unit norm, which is
% why Manopt is appropriate software to solve this problem. An optional
% initial guess can be specified via the input Y0.
%
% See the paper below for theory, specifically, for a proof that, for
% almost all A, second-order critical points of the problem in Y are
% globally optimal. In other words: there are no local traps in Y, despite
% non-convexity.
%
% Outputs:
%
%       Y: is the best point found (an nxp matrix with unit norm rows.)
%          To find X, form Y*Y' (or, more efficiently, study X through Y.)
% 
%       problem: is the Manopt problem structure used to produce Y.
% 
%       S: is a dual optimality certificate (a symmetric matrix of size n,
%          sparse if A is sparse). The optimality gap (in the cost
%          function) is at most n*min(eig(S)), for both Y and X = Y*Y'.
%          Hence, if min(eig(S)) is close to zero, Y is close to globally
%          optimal. This can be computed via eigs(S, 1, 'SR').
% 
% Paper: https://arxiv.org/abs/1606.04970
%
% @inproceedings{boumal2016bmapproach,
%   author  = {Boumal, N. and Voroninski, V. and Bandeira, A.S.},
%   title   = {The non-convex {B}urer-{M}onteiro approach works on smooth semidefinite programs},
%   booktitle={Neural Information Processing Systems (NIPS 2016)},
%   year    = {2016}
% }
% 
% See also: maxcut elliptope_SDP_complex

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 28, 2016
% Contributors:
% Change log:


    % If no inputs are provided, since this is an example file, generate
    % a random Erdos-Renyi graph. This is for illustration purposes only.
    if ~exist('A', 'var') || isempty(A)
        n = 100;
        A = triu(rand(n) <= .1, 1);
        A = (A+A.')/(2*n);
    end

    n = size(A, 1);
    assert(n >= 2, 'A must be at least 2x2.');
    assert(isreal(A), 'A must be real.');
    assert(size(A, 2) == n, 'A must be square.');
    
    % Force A to be symmetric
    A = (A+A.')/2;
    
    % By default, pick a sufficiently large p (number of columns of Y).
    if ~exist('p', 'var') || isempty(p)
        p = ceil(sqrt(8*n+1)/2);
    end
    
    assert(p >= 2 && p == round(p), 'p must be an integer >= 2.');

    % Pick the manifold of n-by-p matrices with unit norm rows.
    manifold = obliquefactory(p, n, true);
    
    problem.M = manifold;
    
    
    % These three, quick commented lines of code are sufficient to define
    % the cost function and its derivatives. This is good code to write
    % when prototyping. Below, a more advanced use of Manopt is shown,
    % where the redundant computation A*Y is avoided between the gradient
    % and the cost evaluation.
    % % problem.cost  = @(Y) .5*sum(sum((A*Y).*Y));
    % % problem.egrad = @(Y) A*Y;
    % % problem.ehess = @(Y, Ydot) A*Ydot;
    
    % Products with A dominate the cost, hence we store the result.
    % This allows to share the results among cost, grad and hess.
    % This is completely optional.
    function store = prepare(Y, store)
        if ~isfield(store, 'AY')
            AY = A*Y;
            store.AY = AY;
            store.diagAYYt = sum(AY .* Y, 2);
        end
    end
    
    % Define the cost function to be /minimized/.
    problem.cost = @cost;
    function [f, store] = cost(Y, store)
        store = prepare(Y, store);
        f = .5*sum(store.diagAYYt);
    end

    % Define the Riemannian gradient.
    problem.grad = @grad;
    function [G, store] = grad(Y, store)
        store = prepare(Y, store);
        G = store.AY - bsxfun(@times, Y, store.diagAYYt);
    end

    % If you want to, you can specify the Riemannian Hessian as well.
    problem.hess = @hess;
    function [H, store] = hess(Y, Ydot, store)
        store = prepare(Y, store);
        SYdot = A*Ydot - bsxfun(@times, Ydot, store.diagAYYt);
        H = manifold.proj(Y, SYdot);
    end


    % If no initial guess is available, tell Manopt to use a random one.
    if ~exist('Y0', 'var') || isempty(Y0)
        Y0 = [];
    end

    % Call your favorite solver.
    opts = struct();
    opts.verbosity = 0;      % Set to 0 for no output, 2 for normal output
    opts.maxinner = 500;     % maximum Hessian calls per iteration
    opts.tolgradnorm = 1e-6; % tolerance on gradient norm
    Y = trustregions(problem, Y0, opts);
    
    % If required, produce an optimality certificate.
    if nargout >= 3
        S = A - spdiags(sum((A*Y).*Y, 2), 0, n, n);
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/elliptope_SDP_complex.m
================================================
function [Y, problem, S] = elliptope_SDP_complex(A, p, Y0)
% Solver for complex semidefinite programs (SDP's) with unit diagonal.
% 
% function [Y, problem, S] = elliptope_SDP_complex(A)
% function [Y, problem, S] = elliptope_SDP_complex(A, p)
% function [Y, problem, S] = elliptope_SDP_complex(A, p, Y0)
%
% A is a Hermitian matrix of size n.
%
% This function uses a local optimization method in Manopt to solve the SDP
%
%   min_X trace(A*X) s.t. diag(X) = 1, X is complex, positive semidefinite.
%
% In practice, the Hermitian matrix X of size n is parameterized as
% X = Y*Y', where Y has size n x p. By default, p is taken large enough
% (that is, sqrt(n)) to ensure that there exists an optimal X whose rank is
% smaller than p. This ensures that the SDP is equivalent to the new
% problem in Y:
%
%   min_Y  trace(Y'*A*Y)  s.t.  diag(Y*Y') = 1, Y complex
%
% The constraints on Y require each row of Y to have unit norm, which is
% why Manopt is appropriate software to solve this problem. An optional
% initial guess can be specified via the input Y0.
%
% See the paper below for theory, specifically, for a proof that, for
% almost all A, second-order critical points of the problem in Y are
% globally optimal. In other words: there are no local traps in Y, despite
% non-convexity.
%
% Outputs:
%
%       Y: is the best point found (an nxp matrix with unit norm rows.)
%          To find X, form Y*Y' (or, more efficiently, study X through Y.)
% 
%       problem: is the Manopt problem structure used to produce Y.
% 
%       S: is a dual optimality certificate (a Hermitian matrix of size n,
%          sparse if A is sparse). The optimality gap (in the cost
%          function) is at most n*min(eig(S)), for both Y and X = Y*Y'.
%          Hence, if min(eig(S)) is close to zero, Y is close to globally
%          optimal. This can be computed via eigs(S, 1, 'SR').
% 
% Paper: https://arxiv.org/abs/1606.04970
%
% @inproceedings{boumal2016bmapproach,
%   author  = {Boumal, N. and Voroninski, V. and Bandeira, A.S.},
%   title   = {The non-convex {B}urer-{M}onteiro approach works on smooth semidefinite programs},
%   booktitle={Neural Information Processing Systems (NIPS 2016)},
%   year    = {2016}
% }
% 
% See also: maxcut elliptope_SDP

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Oct. 21, 2016
% Contributors:
% Change log:


    % If no inputs are provided, since this is an example file, generate
    % a random complex matrix. This is for illustration purposes only.
    if ~exist('A', 'var') || isempty(A)
        n = 100;
        A = randn(n) + 1i*randn(n);
        A = (A+A')/sqrt(2*n);
    end

    n = size(A, 1);
    assert(n >= 2, 'A must be at least 2x2.');
    assert(size(A, 2) == n, 'A must be square.');
    
    % Force A to be Hermitian
    A = (A+A')/2;
    
    % By default, pick a sufficiently large p (number of columns of Y).
    if ~exist('p', 'var') || isempty(p)
        p = floor(sqrt(n)+1);
    end
    
    assert(p >= 1 && p == round(p), 'p must be an integer >= 1.');

    % Pick the manifold of complex n-by-p matrices with unit norm rows.
    manifold = obliquecomplexfactory(p, n, true);
    
    problem.M = manifold;
    
    
    % These three, quick commented lines of code are sufficient to define
    % the cost function and its derivatives. This is good code to write
    % when prototyping. Below, a more advanced use of Manopt is shown,
    % where the redundant computation A*Y is avoided between the gradient
    % and the cost evaluation.
    % % problem.cost  = @(Y) .5*sum(sum(real((A*Y).*conj(Y))));
    % % problem.egrad = @(Y) A*Y;
    % % problem.ehess = @(Y, Ydot) A*Ydot;
    
    % Products with A dominate the cost, hence we store the result.
    % This allows to share the results among cost, grad and hess.
    % This is completely optional.
    function store = prepare(Y, store)
        if ~isfield(store, 'AY')
            AY = A*Y;
            store.AY = AY;
            store.diagAYYt = sum(real(AY .* conj(Y)), 2);
        end
    end
    
    % Define the cost function to be /minimized/.
    problem.cost = @cost;
    function [f, store] = cost(Y, store)
        store = prepare(Y, store);
        f = .5*sum(store.diagAYYt);
    end

    % Define the Riemannian gradient.
    problem.grad = @grad;
    function [G, store] = grad(Y, store)
        store = prepare(Y, store);
        G = store.AY - bsxfun(@times, Y, store.diagAYYt);
    end

    % If you want to, you can specify the Riemannian Hessian as well.
    problem.hess = @hess;
    function [H, store] = hess(Y, Ydot, store)
        store = prepare(Y, store);
        SYdot = A*Ydot - bsxfun(@times, Ydot, store.diagAYYt);
        H = manifold.proj(Y, SYdot);
    end


    % If no initial guess is available, tell Manopt to use a random one.
    if ~exist('Y0', 'var') || isempty(Y0)
        Y0 = [];
    end

    % Call your favorite solver.
    opts = struct();
    opts.verbosity = 0;      % Set to 0 for no output, 2 for normal output
    opts.maxinner = 500;     % maximum Hessian calls per iteration
    opts.tolgradnorm = 1e-6; % tolerance on gradient norm
    Y = trustregions(problem, Y0, opts);
    
    % If required, produce an optimality certificate.
    if nargout >= 3
        S = A - spdiags(sum(real((A*Y).*conj(Y)), 2), 0, n, n);
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/essential_svd.m
================================================
function essential_svd
% Sample solution of an optimization problem on the essential manifold.
%
% Solves the problem \sum_{i=1}^N ||E_i-A_i||^2, where E_i are essential
% matrices. Essential matrices are used in computer vision to represent the
% epipolar constraint between projected points in two perspective views.
%
% Note: the essentialfactory file uses a quotient R1/R2 representation to
% work with essential matrices. On the other hand, from a user point of 
% view, it is convenient to use the E representation  (a matrix of size
% 3-by-3) to give cost, gradient, and Hessian  information. To this end, we
% provide auxiliary files essential_costE2cost, essential_egradE2egrad, and
% essential_ehessE2ehess that convert these ingredients to their R1/R2
% counterparts.
%
% See also: essentialfactory essential_costE2cost essential_egradE2egrad
% essential_ehessE2ehess
 
% This file is part of Manopt: www.manopt.org.
% Original author: Roberto Tron, Aug. 8, 2014
% Contributors: Bamdev Mishra, May 15, 2015.


    % Make data for the test
    N = 2;    % Number of matrices to process in parallel.
    A = multiprod(multiprod(randrot(3, N), essential_hat3([0; 0; 1])), randrot(3, N));
    
    % The essential manifold
    M = essentialfactory(N);
    problem.M = M;
    
    % Function handles of the essential matrix E and Euclidean gradient and Hessian
    costE  = @(E) 0.5*sum(multisqnorm(E-A));
    egradE = @(E) E - A;
    ehessE = @(E, U) U;

    
    % Manopt descriptions
    problem.cost = @cost;
    function val = cost(X)
        val = essential_costE2cost(X, costE); % Cost
    end
    
    problem.egrad = @egrad;
    function g = egrad(X)
        g = essential_egradE2egrad(X, egradE); % Converts gradient in E to X.
    end
    
    problem.ehess = @ehess;
    function gdot = ehess(X, S)
        gdot = essential_ehessE2ehess(X, egradE, ehessE, S); % Converts Hessian in E to X.
    end
    
    
    % Numerically check the differentials.
    % checkgradient(problem); pause;
    % checkhessian(problem); pause;
    
    %Solve the problem
    Xsol = trustregions(problem);
    
    % Distance between original matrices and decompositions
    val = essential_costE2cost(Xsol, costE);
    fprintf('Distance between original matrices and decompositions is %e \n', val);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/generalized_eigenvalue_computation.m
================================================
function [Xsol, Ssol] = generalized_eigenvalue_computation(A, B, p)
% Returns orthonormal basis of the dominant invariant p-subspace of B^-1 A.
%
% function [Xsol, Ssol] = generalized_eigenvalue_computation(A, B, p)
%
% Input: A is a real, symmetric matrix of size nxn,
%        B is a symmetric positive definite matrix, same size as A
%        p is an integer such that p <= n.
%
% Output: Xsol: a real, B-orthonormal matrix X of size nxp such that
%         trace(X'*A*X) is maximized, subject to X'*B*X = identity. 
%         That is, the columns of X form a B-orthonormal basis of a
%         dominant subspace of dimension p of B^(-1)*A. These are thus
%         generalized eigenvectors associated with the largest generalized
%         eigenvalues of B^(-1)*A  (in no particular order). Sign is
%         important: 2 is deemed a larger eigenvalue than -5.
%         Ssol: the eigenvalues associated with the eigenvectors Xsol, in a
%         vector.
% 
% We intend to solve the homogeneous system A*X = B*X*S,
% where S is a diagonal matrix of dominant eigenvalues of B^-1 A.
%
%
% The optimization is performed on the generalized Grassmann manifold, 
% since only the space spanned by the columns of X matters in the
% optimization problem.
%
% The optimization problem that we are solving here is 
% maximize trace(X'*A*X) subject to X'*B*X = eye(p). 
% Consequently, the solutions remain invariant to transformation
% X --> XQ, where Q is a p-by-p orthogonal matrix. The search space, in
% essence, is set of equivalence classes
% [X] = {XQ : X'*B*X = I and Q is orthogonal matrix}. This space is called
% the generalized Grassmann manifold.
% Before returning, Q is chosen such that Xsol = Xq matches the output one
% would expect from eigs.
%
% See also dominant_invariant_subspace nonlinear_eigenspace


% This file is part of Manopt and is copyrighted. See the license file.
%
% Main author: Bamdev Mishra, June 30, 2015.
% Contributors:
% Change log:
%
%     Aug. 10, 2016 (NB): the eigenvectors Xsol are now rotated by Vsol
%     before they are returned, to ensure the output matches what you would
%     normally expect calling eigs.
    
    % Generate some random data to test the function
    if ~exist('A', 'var') || isempty(A)
        n = 128;
        A = randn(n);
        A = (A+A')/2;
    end
    if ~exist('B', 'var') || isempty(B)
        n = size(A, 1);
        e = ones(n, 1);
        B = spdiags([-e 2*e -e], -1:1, n, n); % Symmetric positive definite
    end
    
    if ~exist('p', 'var') || isempty(p)
        p = 3;
    end
    
    % Make sure the input matrix is square and symmetric
    n = size(A, 1);
	assert(isreal(A), 'A must be real.')
    assert(size(A, 2) == n, 'A must be square.');
    assert(norm(A-A', 'fro') < n*eps, 'A must be symmetric.');
	assert(p <= n, 'p must be smaller than n.');
    
    % Define the cost and its derivatives on the generalized 
    % Grassmann manifold, i.e., the column space of all X such that
    % X'*B*X is identity. 
    gGr = grassmanngeneralizedfactory(n, p, B);
    
    problem.M = gGr;
    problem.cost  = @(X)    -trace(X'*A*X);
    problem.egrad = @(X)    -2*(A*X); % Only Euclidean gradient needed.
    problem.ehess = @(X, H) -2*(A*H); % Only Euclidean Hessian needed.
    
    % Execute some checks on the derivatives for early debugging.
    % These things can be commented out of course.
    % checkgradient(problem);
    % pause;
    % checkhessian(problem);
    % pause;
    
    % Issue a call to a solver. A random initial guess will be chosen and
    % default options are selected except for the ones we specify here.
    options.Delta_bar = 8*sqrt(p);
    options.tolgradnorm = 1e-7;
    options.verbosity = 2; % set to 0 to silence the solver, 2 for normal output.
    [Xsol, costXsol, info] = trustregions(problem, [], options); %#ok<ASGLU>
    
    % To extract the eigenvalues, solve the small p-by-p symmetric 
    % eigenvalue problem.
    [Vsol, Dsol] = eig(Xsol'*(A*Xsol));
    Ssol = diag(Dsol);
    
    % To extract the eigenvectors, rotate Xsol by the p-by-p orthogonal
    % matrix Vsol.
    Xsol = Xsol*Vsol;
    
    % This quantity should be small.
    % norm(A*Xsol - B*Xsol*diag(Ssol));
  
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/generalized_procrustes.m
================================================
function [A, R] = generalized_procrustes(A_measure)
% Rotationally align clouds of points (generalized Procrustes problem)
%
% function X = generalized_procrustes(A_measure)
%
% The input is a 3D matrix A_measure of size nxmxN. Each of the N slices
% A_measure(:, :, i) is a cloud of m points in R^n. These clouds are
% assumed to be (noisy) rotated versions of a reference cloud Atrue.
% This algorithm tries to find the optimal rotations to apply to the
% individual clouds such that they will match each other as much as
% possible following a least-squares cost.
%
% The output A is an estimate of the cloud Atrue (up to rotation). The
% output R is a 3D matrix of size nxnxN containing the rotation matrices
% such that R(:, :, i) * A is approximately equal to A_measure(:, :, i).

% This file is part of Manopt and is copyrighted. See the license file.
%
% Main author: Nicolas Boumal, July 8, 2013
% Contributors:
%
% Change log:
%   
    
    if ~exist('A_measure', 'var')
        % Generate random data to test the method.
        % There are N clouds of m points in R^n. Each of them is a noisy,
        % rotated version of a reference cloud A. Rotations are uniformly
        % random and noise on each rotated cloud is iid normal with
        % standard deviation sigma.
        n = 3;
        m = 10;
        N = 50;
        % The reference cloud
        Atrue = randn(n, m);
        % A 3D matrix containing the N measured clouds
        sigma = .3;
        A_measure = multiprod(randrot(n, N), Atrue) + sigma*randn(n, m, N);
    else
        [n, m, N] = size(A_measure);
    end
    
    % Construct a manifold structure representing the product of groups of
    % rotations with the Euclidean space for A. We optimize simultaneously
    % for the reference cloud and for the rotations that affect each of the
    % measured clouds. Notice that there is a group invariance because
    % there is no way of telling which orientation the reference cloud
    % should be in.
    tuple.R = rotationsfactory(n, N);
    tuple.A = euclideanfactory(n, m);
    M = productmanifold(tuple);

    % Define the cost function here. Points on the manifold M are
    % structures with fields X.A and X.R, containing matrices of sizes
    % respectively nxm and nxnxN. The store structure (the caching system)
    % is used to keep the residue matrix E in memory, as it is also used in
    % the computation of the gradient and of the Hessian. This way, we
    % prevent redundant computations.
    function [f, store] = cost(X, store)
        if ~isfield(store, 'E')
            R = X.R;
            A = X.A;
            store.E = multiprod(R, A) - A_measure;
        end
        E = store.E;
        f = (E(:)'*E(:))/(2*N);
    end

    % Riemannian gradient of the cost function.
    function [g, store] = grad(X, store)
        R = X.R;
        A = X.A;
        if ~isfield(store, 'E')
            [~, store] = cost(X, store);
        end
        E = store.E;
        % Compute the Euclidean gradient of the cost wrt the rotations R
        % and wrt the cloud A,
        egrad.R = multiprod(E, A'/N);
        egrad.A = A - mean(multiprod(multitransp(R), A_measure), 3);
        % then transform this Euclidean gradient into the Riemannian
        % gradient.
        g = M.egrad2rgrad(X, egrad);
        store.egrad = egrad;
    end

    % It is not necessary to define the Hessian of the cost. We do it
    % mostly to illustrate how to do it and to study the spectrum of the
    % Hessian at the solution (see further down).
    function [h, store] = hess(X, Xdot, store)
        R = X.R;
        A = X.A;
        % Careful: tangent vectors on the rotation group are represented as
        % skew symmetric matrices. To obtain the corresponding vectors in
        % the ambient space, we need a little transformation. This
        % transformation is typically not needed when we compute the
        % formulas for the gradient and the Hessian directly in Riemannian
        % form instead of resorting the egrad2rgrad and ehess2rhess. These
        % latter tools are convenient for prototyping but are not always
        % the most efficient form to execute the computations.
        Rdot = tuple.R.tangent2ambient(R, Xdot.R);
        Adot = Xdot.A;
        if ~isfield(store, 'egrad')
            [~, store] = grad(X, store);
        end
        E = store.E;
        egrad = store.egrad;
        
        ehess.R = multiprod(multiprod(Rdot, A) + multiprod(R, Adot), A') + ...
                  multiprod(E, Adot');
        ehess.R = ehess.R / N;
        ehess.A = Adot-mean(multiprod(multitransp(Rdot), A_measure), 3);
        
        h = M.ehess2rhess(X, egrad, ehess, Xdot);
    end

    % Setup the problem structure with manifold M and cost+grad functions.
    problem.M = M;
    problem.cost = @cost;
    problem.grad = @grad;
    problem.hess = @hess;

    % For debugging, it's always nice to check the gradient a few times.
    % checkgradient(problem);
    % pause;
    % checkhessian(problem);
    % pause;
    
    % Call a solver on our problem. This can probably be much improved if a
	% clever initial guess is used instead of a random one.
    X = trustregions(problem);
    A = X.A;
    R = X.R;
    
    % To evaluate the performance of the algorithm, see how well Atrue (the
    % reference cloud) matches A (the found cloud). Since the recovery is
    % up to rotation, apply Kabsch algorithm (or standard Procrustes),
    % i.e., compute the polar factorization to best align Atrue and A.
    if exist('Atrue', 'var')
        [U, ~, V] = svd(Atrue*A');
        Ahat = (U*V')*A;
        fprintf('Registration error: %g.\n', norm(Atrue-Ahat, 'fro'));
    end
    
    % Plot the spectrum of the Hessian at the solution found.
    % Notice that the invariance of f under a rotation yields dim SO(n),
    % that is, n*(n-1)/2 zero eigenvalues in the Hessian spectrum at the
    % solution. This indicates that critical points are not isolated and
    % can theoretically prevent quadratic convergence. One solution to
    % circumvent this would be to fix one rotation arbitrarily. Another
    % solution would be to work on a quotient manifold. Both can be
    % achieved in Manopt: they simply require a little more work on the
    % manifold description side.
    if M.dim() <= 512
        stairs(sort(hessianspectrum(problem, X)));
        title('Spectrum of the Hessian at the solution found.');
        xlabel('Eigenvalue number (sorted)');
        ylabel('Value of the eigenvalue');
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/low_rank_dist_completion.m
================================================
function [Y, infos, problem_description] =  low_rank_dist_completion(problem_description)
% Perform low-rank distance matrix completion w/ automatic rank detection.
%
% function Y = low_rank_dist_completion(problem_description)
% function [Y, infos, out_problem_description] = low_rank_dist_completion(problem_description)
%
% It implements the ideas of Journee, Bach, Absil and Sepulchre, SIOPT, 2010,
% applied to the problem of low-rank Euclidean distance matrix completion.
% The details are in the paper "Low-rank optimization for distance matrix completion",
% B. Mishra, G. Meyer, and R. Sepulchre, IEEE CDC, 2011.
%
% Paper link: http://arxiv.org/abs/1304.6663.
%
% Input:
% -------
%
% problem_description: The problem structure with the description of the problem.
%
%
% - problem_description.data_train: Data structure for known distances that are used to learn a low-rank model.
%                                   It contains the 3 fields that are shown
%                                   below. An empty "data_train" structure
%                                   will generate the 3d Helix instance.
%
%       -- data_train.entries:      A column vector consisting of known
%                                   distances. An empty "data_train.entries"
%                                   field will generate the 3d Helix
%                                   instance.
%
%       -- data_train.rows:         The row position of th corresponding
%                                   distances. An empty "data_train.rows"
%                                   field will generate the 3d Helix
%                                   instance.
%
%       -- data_train.cols:         The column position of th corresponding
%                                   distances. An empty "data_train.cols"
%                                   field will generate the 3d Helix
%                                   instance.
%
%
%
% - problem_description.data_test:  Data structure to compute distances for the "unknown" (to the algorithm) distances.
%                                   It contains the 3 fields that are shown
%                                   below. An empty "data_test" structure
%                                   will not compute the test error.
%
%       -- data_test.entries:       A column vector consisting of "unknown" (to the algorithm)
%                                   distances. An empty "data_test.entries"
%                                   field will not compute the test error.
%       -- data_test.rows:          The row position of th corresponding
%                                   distances. An empty "data_test.rows"
%                                   field will not compute the test error.
%       -- data_test.cols:          The column position of th corresponding
%                                   distances. An empty "data_test.cols"
%                                   field will not compute the test error.
%
%
%
% - problem_description.n:          The number of data points. An empty
%                                   "n", but complete "data_train" structure
%                                   will lead to an error, to avoid
%                                   potential data inconsistency.
%
%
%
%
%
% - problem_description.rank_initial: Starting rank. By default, it is 1.
%
%
%
% - problem_description.rank_max:     Maximum rank. By default, it is equal to
%                                     "problem_description.n".
%
%
%
%
% - problem_description.params:  Structure array containing algorithm
%                                parameters for stopping criteria.
%       -- params.abstolcost:    Tolerance on absolute value of cost.
%                                By default, it is 1e-3.
%
%
%       -- params.reltolcost:    Tolerance on absolute value of cost.
%                                By default, it is 1e-3.
%       -- params.tolgradnorm:   Tolerance on the norm of the gradient.
%                                By default, it is 1e-5.
%       -- params.maxiter:       Maximum number of fixe-rank iterations.
%                                By default, it is 100.
%       -- params.tolSmin:       Tolerance on smallest eigenvalue of Sy,
%                                the dual variable.
%                                By default, it is 1e-5.
%       -- params.tolrankdeficiency:   Tolerance on the
%                                      smallest singular value of Y.
%                                      By default, it is 1e-3.
%       -- params.solver:        Fixed-rank algorithm. Options are
%                                '@trustregions' for trust-regions,
%                                '@conjugategradient' for conjugate gradients,
%                                '@steepestdescent' for steepest descent.
%                                 By default, it is '@trustregions'.
%
%
% Output:
% --------
%
%   Y:                    n-by-r solution matrix of rank r.
%   infos:                Structure array with computed statistics.
%   problem_description:  Structure array with used problem description.
%
%
%
% Please cite the Manopt paper as well as the research paper:
%     @InProceedings{mishra2011dist,
%       Title        = {Low-rank optimization for distance matrix completion},
%       Author       = {Mishra, B. and Meyer, G. and Sepulchre, R.},
%       Booktitle    = {{50th IEEE Conference on Decision and Control}},
%       Year         = {2011},
%       Organization = {{IEEE CDC}}
%     }


% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, April 06, 2015.
% Contributors: Nicolas Boumal.
% Change log:  
%   August 30 2016 (BM): 
%                   Corrected some logic flaws while plotting and storing
%                   rank information. A typo was also corrected.

    
    % Check problem description
    if ~exist('problem_description', 'var')
        problem_description = struct();
    end
    problem_description = check_problem_description(problem_description); % Check the problem description;
    
    
    % Common quantities
    data_train = problem_description.data_train;
    data_test =  problem_description.data_test;
    n =  problem_description.n;
    rank_initial = problem_description.rank_initial;
    rank_max =  problem_description.rank_max;
    params =  problem_description.params;
    N = data_train.nentries; % Number of known distances
    EIJ = speye(n);
    EIJ = EIJ(:, data_train.rows) - EIJ(:, data_train.cols);
    rr = rank_initial; % Starting rank.
    Y = randn(n, rr); % Random starting initialization.
    
    
    % Information
    time = [];               % Time for each iteration per rank
    cost = [];               % Cost at each iteration per rank
    test_error = [];         % Test error at each iteration per rank
    rank = [];               % Rank at each iteration
    rank_change_stats = [];  % Some stats relating the change of ranks
    
    
    % Main loop rank search
    rank_search = 0;
    while (rr <= rank_max), % When r = n a global min is attained for sure.
        rank_search = rank_search + 1;
        
        fprintf('>> Rank %d <<\n', rr);
        
        % Follow the descent direction to compute an iterate in a higher dimension
        if (rr > rank_initial),
            if isempty(restartDir), % If no restart dir avail. do a random restart
                disp('No restart dir available, random restart is performed');
                Y = randn(n, rr);
                
            else % Perform a simple line-search based on the restart direction
                disp('>> Line-search with restart direction');
                Y(:, rr) = 0; % Append a column of zeroes
                
                Z = Y(data_train.rows, :) - Y(data_train.cols,:);
                estimDists = sum(Z.^2, 2);
                errors = (estimDists - data_train.entries);
                costBefore = 0.5*mean(errors.^2);
                fprintf('>> Cost before = %f\n',costBefore);
                
                % Simple linesearch to maintain monotonicity
                problem.M = symfixedrankYYfactory(n, rr);
                problem.cost = @(Y)  cost_evaluation(Y, data_train);
                d = zeros(size(Y));
                d(:, rr) = restartDir;
                [unused, Y] = linesearch_decrease(problem, Y, d, costBefore); %#ok<ASGLU>
                
                Z = Y(data_train.rows, :) - Y(data_train.cols,:);
                estimDists = sum(Z.^2, 2);
                errors = (estimDists - data_train.entries);
                costAfter = 0.5*mean(errors.^2);
                
                % Check for decrease
                if costAfter >= costBefore - 1e-8
                    disp('Decrease is not sufficient, random restart');
                    Y = randn(n, rr);
                end
                
            end
            
        end
        
        % Fixed-rank optimization with Manopt
        [Y, infos_fixedrank] = low_rank_dist_completion_fixedrank(data_train, data_test, Y, params);

        % Some info logging
        thistime = [infos_fixedrank.time];
        if ~isempty(time)
            thistime = time(end) + thistime;
        end
        time = [time thistime]; %#ok<AGROW>
        cost = [cost [infos_fixedrank.cost]]; %#ok<AGROW>
        rank = [rank [infos_fixedrank.rank]]; %#ok<AGROW>
        rank_change_stats(rank_search).rank = rr; %#ok<AGROW>
        rank_change_stats(rank_search).iter = length([infos_fixedrank.cost]); %#ok<AGROW>
        rank_change_stats(rank_search).Y = Y; %#ok<AGROW>
        if isfield(infos_fixedrank, 'test_error')
            test_error = [test_error [infos_fixedrank.test_error]]; %#ok<AGROW>
        end
        
        
        % Evaluate gradient of the convex cost function (i.e. wrt X).
        Z = Y(data_train.rows, :) - Y(data_train.cols,:);
        estimDists = sum(Z.^2,2);
        errors = (estimDists - data_train.entries);
        
      
        % Dual variable and its minimum eigenvalue that is used to guarantee convergence.
        Sy = (0.5)*EIJ * sparse(1:N,1:N,2 * errors / N,N,N) * EIJ'; % "0.5" comes from 0.5 in cost evaluation 
        
        
        % Compute smallest algebraic eigenvalue of Sy,
        % this gives us a descent direction for the next rank (v)
        % as well as a way to control progress toward the global
        % optimum (s_min).
        
        % Make eigs silent.
        opts.disp = 0;
        opts.issym = true;
        [v, s_min] = eigs(Sy, 1, 'SA', opts);
        
        
        % Check whether Y is rank deficient.
        vp = svd(Y);
        
        % Stopping criterion.
        fprintf('>> smin = %.3e, and min(vp) = %.3e\n',s_min,min(vp));
        if (s_min  > params.tolSmin) || (min(vp) < params.tolrankdeficiency),
            break;
        end
        
        % Update rank
        rr = rr + 1;
        
        % Compute descent direction
        if (s_min < -1e-10),
            restartDir = v;
        else
            restartDir = [];
        end
    end
    
    
    % Collect relevant statistics
    infos.time = time;
    infos.cost = cost;
    infos.rank = rank;
    infos.test_error = test_error;
    infos.rank_change_stats = rank_change_stats;
    
    % Few plots.
    show_plots(problem_description, infos);
    
end


%% Cost function evaluation.
function val = cost_evaluation(Y, data_train)
    Z = Y(data_train.rows, :) - Y(data_train.cols,:);
    estimDists = sum(Z.^2, 2);
    errors = (estimDists - data_train.entries);
    val = 0.5*mean(errors.^2);
end


%% Local defaults
function localdefaults = getlocaldefaults()
    localdefaults.abstolcost = 1e-3;
    localdefaults.reltolcost = 1e-3;
    localdefaults.tolSmin = -1e-3;
    localdefaults.tolrankdeficiency = 1e-3;
    localdefaults.tolgradnorm = 1e-5;
    localdefaults.maxiter = 100;
    localdefaults.solver = @trustregions; % Trust-regions
end


%% Fixed-rank optimization
function [Yopt, infos] = low_rank_dist_completion_fixedrank(data_train, data_test, Y_initial, params)
    % Common quantities that are used often in the optimization process.
    [n, r] = size(Y_initial);
    EIJ = speye(n);
    EIJ = EIJ(:, data_train.rows) - EIJ(:, data_train.cols);
    
    % Create problem structure
    problem.M = symfixedrankYYfactory(n,  r);
    
    
    % Cost evaluation
    problem.cost = @cost;
    function [f, store] = cost(Y, store)
        if ~isfield(store, 'xij')
            store.xij = EIJ'*Y;
        end
        xij = store.xij;
        estimDists = sum(xij.^2,2);
        f = 0.5*mean((estimDists - data_train.entries).^2);
    end
    
    % Gradient evaluation
    problem.grad = @grad;
    function [g, store] = grad(Y, store)
        N = data_train.nentries;
        if ~isfield(store, 'xij')
            store.xij = EIJ'*Y;
        end
        xij = store.xij;
        estimDists = sum(xij.^2,2);
        g = EIJ * sparse(1:N,1:N,2 * (estimDists - data_train.entries) / N, N, N) * xij;
    end
    
    
    % Hessian evaluation
    problem.hess = @hess;
    function [Hess, store] = hess(Y, eta, store)
        N = data_train.nentries;
        if ~isfield(store, 'xij')
            store.xij = EIJ'*Y;
        end
        xij = store.xij;
        zij = EIJ'*eta;
        estimDists = sum(xij.^2,2);
        crossYZ = 2*sum(xij .* zij,2);
        Hess = (EIJ*sparse(1:N,1:N,2 * (estimDists - data_train.entries) / N,N,N))*zij + (EIJ*sparse(1:N,1:N,2 * crossYZ / N,N,N))*xij;
        Hess = problem.M.proj(Y, Hess);
    end
    
    
    %     % Check numerically whether gradient and Hessian are correct
    %     checkgradient(problem);
    %     drawnow;
    %     pause;
    %     checkhessian(problem);
    %     drawnow;
    %     pause;
    
    
    % When asked, ask Manopt to compute the test error at every iteration.
    if ~isempty(data_test)
        options.statsfun = @compute_test_error;
        EIJ_test = speye(n);
        EIJ_test = EIJ_test(:, data_test.rows) - EIJ_test(:, data_test.cols);
    end
    function stats = compute_test_error(problem, Y, stats) %#ok<INUSL>
        xij = EIJ_test'*Y;
        estimDists_test = sum(xij.^2,2);
        stats.test_error = 0.5*mean((estimDists_test - data_test.entries).^2);
    end
    
    
    % Stopping criteria options
    options.stopfun = @mystopfun;
    function stopnow = mystopfun(problem, Y, info, last) %#ok<INUSL>
        stopnow = (last >= 5 && (info(last-2).cost - info(last).cost < params.abstolcost || abs(info(last-2).cost - info(last).cost)/info(last).cost < params.reltolcost));
    end
    options.tolgradnorm = params.tolgradnorm;
    options.maxiter = params.maxiter;
    
    
    % Call appropriate algorithm
    options.solver = params.solver;
    [Yopt, ~, infos] = manoptsolve(problem, Y_initial, options);
    [infos.rank] = deal(r);
end


%% 3d Helix problem instance
function problem_description = get_3d_Helix_instance()
    
    % Helix curve in 3d
    tvec = 0:2*pi/100:2*pi;
    tvec = tvec'; % column vector
    xvec = 4*cos(3*tvec);
    yvec = 4*sin(3*tvec);
    zvec = 2*tvec;
    Yo = [xvec, yvec, zvec];
    n = size(Yo, 1); % Number of points
    
    % Fraction of unknown distances
    fractionOfUnknown = 0.85;
    
    % True distances among points in 3d Helix
    trueDists = pdist(Yo)'.^2; % True distances
    
    
    % Add noise (set noise_level = 0 for clean measurements)
    noise_level = 0; % 0.01;
    trueDists = trueDists + noise_level * std(trueDists) * randn(size(trueDists));
    
    
    % Compute all pairs of indices
    H = tril(true(n), -1);
    [I, J] = ind2sub([n, n], find(H(:)));
    clear 'H';
    
    
    % Train data
    train = false(length(trueDists), 1);
    train(1:floor(length(trueDists)*(1- fractionOfUnknown))) = true;
    train = train(randperm(length(train)));
    
    data_train.rows = I(train);
    data_train.cols = J(train);
    data_train.entries = trueDists(train);
    data_train.nentries = length(data_train.entries);
    
    
    % Test data
    data_test.nentries = 1*data_train.nentries; % Depends how big data that we can handle.
    test = false(length(trueDists),1);
    test(1 : floor(data_test.nentries)) = true;
    test = test(randperm(length(test)));
    data_test.rows = I(test);
    data_test.cols = J(test);
    data_test.entries = trueDists(test);
    
    
    % Rank bounds
    rank_initial = 1; % Starting rank
    rank_max = n; % Maximum rank
    
    
    % Basic parameters used in optimization
    params = struct();
    params = mergeOptions(getlocaldefaults, params);
    
    
    % Problem description
    problem_description.data_train = data_train;
    problem_description.data_test = data_test;
    problem_description.n = n;
    problem_description.rank_initial = rank_initial;
    problem_description.rank_max = rank_max;
    problem_description.params = params;
    problem_description.Yo = Yo; % Store original Helix structure
end


%% Problem description check
function checked_problem_description = check_problem_description(problem_description)
    checked_problem_description = problem_description;
    
    % Check train data
    if isempty(problem_description)...
            || ~all(isfield(problem_description,{'data_train'}) == 1)...
            || ~all(isfield(problem_description.data_train,{'cols', 'rows', 'entries'}) == 1)...
            || isempty(problem_description.data_train.cols)...
            || isempty(problem_description.data_train.rows)...
            || isempty(problem_description.data_train.entries)
        
        warning('low_rank_dist_completion:problem_description', ...
            'The training set is empty or not properly defined. We work with the default 3d Helix example.\n');
        checked_problem_description = get_3d_Helix_instance();
        checked_problem_description.helix_example = true;
        return; % No need for further check
    end
    
    
    % Check number of data points
    if ~isfield(problem_description, 'n')
        error('low_rank_dist_completion:problem_description',...
            'Error. The scalar corresponding to field "n" of problem description must be given. \n');
    end
    
    
    % Check initial rank
    if ~isfield(problem_description, 'rank_initial')...
            || isempty(problem_description.rank_initial)...
            || ~(floor(problem_description.rank_initial) == problem_description.rank_initial)
        warning('low_rank_dist_completion:problem_description', ...
            'The field "rank_initial" is not properly defined. We work with the default "1".\n');
        rank_initial = 1;
    else
        rank_initial = problem_description.rank_initial;
    end
    checked_problem_description.rank_initial = rank_initial;
    
    
    % Check maximum rank
    if ~isfield(problem_description, 'rank_max')...
            || isempty(problem_description.rank_max)...
            || ~(floor(problem_description.rank_max) == problem_description.rank_max)...
            || problem_description.rank_max > problem_description.n
        warning('low_rank_dist_completion:problem_description', ...
            'The field "rank_max" is not properly defined. We work with the default "n".\n');
        rank_max = problem_description.n;
    else
        rank_max = problem_description.rank_max;
    end
    checked_problem_description.rank_max = rank_max;
    
    
    % Check testing dataset
    if ~isfield(problem_description,{'data_test'})...
            || ~all(isfield(problem_description.data_test,{'cols', 'rows', 'entries'}) == 1)...
            || isempty(problem_description.data_test.cols)...
            || isempty(problem_description.data_test.rows)...
            || isempty(problem_description.data_test.entries)
        
        warning('low_rank_dist_completion:problem_description', ...
            'The field "data_test" is not properly defined. We work with the default "[]".\n');
        data_test = [];
    else
        data_test = problem_description.data_test;
    end
    checked_problem_description.data_test = data_test;
    
    
    % Check parameters
    if isfield(problem_description, 'params')
        params = problem_description.params;
    else
        params = struct();
    end
    params = mergeOptions(getlocaldefaults, params);
    checked_problem_description.params = params;
     
end


%% Show plots
function  show_plots(problem_description, infos)
   
    solver = problem_description.params.solver;
    rank_change_stats = infos.rank_change_stats;
    rank_change_stats_rank = [rank_change_stats.rank];
    rank_change_stats_iter = [rank_change_stats.iter];
    rank_change_stats_iter = cumsum(rank_change_stats_iter);
    N = problem_description.data_train.nentries;
    n = problem_description.n;
    
   
    % Plot: train error
    fs = 20;
    figure('name', 'Training on the known distances');
    
    line(1:length([infos.cost]),log10([infos.cost]),'Marker','O','LineStyle','-','Color','blue','LineWidth',1.5);
    ax1 = gca;
    
    set(ax1,'FontSize',fs);
    xlabel(ax1,'Number of iterations','FontSize',fs);
    ylabel(ax1,'Cost (log scale) on known distances','FontSize',fs);
    
    ax2 = axes('Position',get(ax1,'Position'),...
        'XAxisLocation','top',...
        'YAxisLocation','right',...
        'Color','none',...
        'XColor','k');
    
    set(ax2,'FontSize',fs);
    line(1:length([infos.cost]),log10([infos.cost]),'Marker','O','LineStyle','-','Color','blue','LineWidth',1.5,'Parent',ax2);
    set(ax2,'XTick',rank_change_stats_iter(1:max(1,end-1)),...
        'XTickLabel',rank_change_stats_rank(1) + 1 : rank_change_stats_rank(max(1,end-1)) + 1,...
        'YTick',[]);
    
    set(ax2,'XGrid','on');
    legend(func2str(solver));
    title('Rank');
    legend 'boxoff';
    
    
    % Plot: test error
    if isfield(infos, 'test_error') && ~isempty(infos.test_error)
        Yo = problem_description.Yo;
        
        fs = 20;
        figure('name','Test error on a set of distances different from the training set');
        
        line(1:length([infos.test_error]),log10([infos.test_error]),'Marker','O','LineStyle','-','Color','blue','LineWidth',1.5);
        ax1 = gca;
        
        set(ax1,'FontSize',fs);
        xlabel(ax1,'Number of iterations','FontSize',fs);
        ylabel(ax1,'Cost (log scale) on testing set','FontSize',fs);
        
        ax2 = axes('Position',get(ax1,'Position'),...
            'XAxisLocation','top',...
            'YAxisLocation','right',...
            'Color','none',...
            'XColor','k');
        
        set(ax2,'FontSize',fs);
        line(1:length([infos.test_error]),log10([infos.test_error]),'Marker','O','LineStyle','-','Color','blue','LineWidth',1.5,'Parent',ax2);
        set(ax2,'XTick',rank_change_stats_iter(1:max(1,end-1)),...
            'XTickLabel',rank_change_stats_rank(1) + 1 : rank_change_stats_rank(max(1,end-1)) + 1,...
            'YTick',[]);
        
        set(ax2,'XGrid','on');
        legend(func2str(solver));
        title('Rank');
        legend 'boxoff';
        
        
    end
    
    
    % Plot: visualize Helix curve
    if isfield(problem_description, 'helix_example')
        jj = ceil((length(rank_change_stats_rank) + 1)/2);
        
        
        figure('name',['3D structure with ', num2str(N/((n^2 -n)/2)),' fraction known distances'])
        fs = 20;
        ax1 = gca;
        set(ax1,'FontSize',fs);
        subplot(jj,2,1);
        plot3(Yo(:,1), Yo(:,2), Yo(:,3),'*','Color', 'b','LineWidth',1.0);
        title('Original 3D structure');
        for kk = 1 : length(rank_change_stats_rank)
            subplot(jj, 2, kk + 1);
            rank_change_stats_kk = rank_change_stats(kk);
            Ykk = rank_change_stats_kk.Y;
            if size(Ykk, 2) == 1,
                plot(Ykk(:,1), zeros(size(Ykk, 1)),'*','Color', 'r','LineWidth',1.0);
                legend(func2str(solver))
                title(['Recovery at rank ',num2str(size(Ykk, 2))]);
                
            elseif size(Ykk, 2) == 2
                plot(Ykk(:,1), Ykk(:,2),'*','Color', 'r','LineWidth',1.0);
                title(['Recovery at rank ',num2str(size(Ykk, 2))]);
                
            else  % Project onto dominant 3Dsubspace
                [U1, S1, V1] = svds(Ykk, 3);
                Yhat = U1*S1*V1';
                plot3(Yhat(:,1), Yhat(:,2), Yhat(:,3),'*','Color', 'r','LineWidth',1.0);
                title(['Recovery at rank ',num2str(size(Ykk, 2))]);
            end
            
            axis equal;
            
        end
        
        % Trick to add a global title to the whole subplot collection.
        % HitTest is disabled to make it easier to select the individual
        % subplots (for example, to rotate the viewing angle).
        ha = axes('Position',[0 0 1 1],'Xlim',[0 1],'Ylim',[0 1],'Box','off','Visible','off','Units','normalized', 'clipping' , 'off' );
        set(ha, 'HitTest', 'off');
        text(0.5, 1,['Recovery of Helix from ',num2str(N/((n^2 -n)/2)),' fraction known distances'],'HorizontalAlignment','center','VerticalAlignment', 'top');
    end
       
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/low_rank_matrix_completion.m
================================================
function low_rank_matrix_completion()
% Given partial observation of a low rank matrix, attempts to complete it.
%
% function low_rank_matrix_completion()
%
% This example demonstrates how to use the geometry factory for the
% embedded submanifold of fixed-rank matrices, fixedrankembeddedfactory.
% This geometry is described in the paper
% "Low-rank matrix completion by Riemannian optimization"
% Bart Vandereycken - SIAM Journal on Optimization, 2013.
%
% This can be a starting point for many optimization problems of the form:
%
% minimize f(X) such that rank(X) = k, size(X) = [m, n].
%
% Note that the code is long because it showcases quite a few features of
% Manopt: most of the code is optional.
%
% Input:  None. This example file generates random data.
% 
% Output: None.

% This file is part of Manopt and is copyrighted. See the license file.
% 
% Main author: Nicolas Boumal, July 15, 2014
% Contributors: Bart Vandereycken
% 
% Change log:
% 
    
    % Random data generation. First, choose the size of the problem.
    % We will complete a matrix of size mxn of rank k:
    m = 200;
    n = 500;
    k = 10;
    % Generate a random mxn matrix A of rank k
    L = randn(m, k);
    R = randn(n, k);
    A = L*R';
    % Generate a random mask for observed entries: P(i, j) = 1 if the entry
    % (i, j) of A is observed, and 0 otherwise.
    fraction = 4 * k*(m+n-k)/(m*n);
    P = sparse(rand(m, n) <= fraction);
    % Hence, we know the nonzero entries in PA:
    PA = P.*A;
    
    
    % Pick the manifold of matrices of size mxn of fixed rank k.
    problem.M = fixedrankembeddedfactory(m, n, k);

    % Define the problem cost function. The input X is a structure with
    % fields U, S, V representing a rank k matrix as U*S*V'.
    % f(X) = 1/2 * || P.*(X-A) ||^2
    problem.cost = @cost;
    function f = cost(X)
        % Note that it is very much inefficient to explicitly construct the
        % matrix X in this way. Seen as we only need to know the entries
        % of Xmat corresponding to the mask P, it would be far more
        % efficient to compute those only.
        Xmat = X.U*X.S*X.V';
        f = .5*norm( P.*Xmat - PA , 'fro')^2;
    end

    % Define the Euclidean gradient of the cost function, that is, the
    % gradient of f(X) seen as a standard function of X.
    % nabla f(X) = P.*(X-A)
    problem.egrad = @egrad;
    function G = egrad(X)
        % Same comment here about Xmat.
        Xmat = X.U*X.S*X.V';
        G = P.*Xmat - PA;
    end

    % This is optional, but it's nice if you have it.
    % Define the Euclidean Hessian of the cost at X, along H, where H is
    % represented as a tangent vector: a structure with fields Up, Vp, M.
    % This is the directional derivative of nabla f(X) at X along Xdot:
    % nabla^2 f(X)[Xdot] = P.*Xdot
    problem.ehess = @euclidean_hessian;
    function ehess = euclidean_hessian(X, H)
        % The function tangent2ambient transforms H (a tangent vector) into
        % its equivalent ambient vector representation. The output is a
        % structure with fields U, S, V such that U*S*V' is an mxn matrix
        % corresponding to the tangent vector H. Note that there are no
        % additional guarantees about U, S and V. In particular, U and V
        % are not orthonormal.
        ambient_H = problem.M.tangent2ambient(X, H);
        Xdot = ambient_H.U*ambient_H.S*ambient_H.V';
        % Same comment here about explicitly constructing the ambient
        % vector as an mxn matrix Xdot: we only need its entries
        % corresponding to the mask P, and this could be computed
        % efficiently.
        ehess = P.*Xdot;
    end
    

    % Check consistency of the gradient and the Hessian. Useful if you
    % adapt this example for a new cost function and you would like to make
    % sure there is no mistake.
    % warning('off', 'manopt:fixedrankembeddedfactory:exp');
    % checkgradient(problem); pause;
    % checkhessian(problem); pause;
    
    
    % Compute an initial guess. Points on the manifold are represented as
    % structures with three fields: U, S and V. U and V need to be
    % orthonormal, S needs to be diagonal.
    [U, S, V] = svds(PA, k);
    X0.U = U;
    X0.S = S;
    X0.V = V;
    
    % Minimize the cost function using Riemannian trust-regions, starting
    % from the initial guess X0.
    X = trustregions(problem, X0);
    
    % The reconstructed matrix is X, represented as a structure with fields
    % U, S and V.
    Xmat = X.U*X.S*X.V';
    fprintf('||X-A||_F = %g\n', norm(Xmat - A, 'fro'));
    
    
    % Alternatively, we could decide to use a solver such as
    % steepestdescent or conjugategradient. These solvers need to solve a
    % line-search problem at each iteration. Standard line searches in
    % Manopt have generic purpose systems to do this. But for the problem
    % at hand, it so happens that we can rather accurately guess how far
    % the line-search should look, and it would be a waste to not use that.
    % Look up the paper referenced above for the mathematical explanation
    % of the code below.
    % 
    % To tell Manopt about this special information, we specify the
    % linesearch hint function in the problem structure. Notice that this
    % is not the same thing as specifying a linesearch function in the
    % options structure.
    % 
    % Both the SD and the CG solvers will detect that we
    % specify the hint function below, and they will use an appropriate
    % linesearch algorithm by default, as a result. Typically, they will
    % try the step t*H first, then if it does not satisfy an Armijo
    % criterion, they will decrease t geometrically until satisfaction or
    % failure.
    % 
    % Just like the cost, egrad and ehess functions, the linesearch
    % function could use a store structure if you like. The present code
    % does not use the store structure, which means quite a bit of the
    % computations are made redundantly, and as a result a better method
    % could appear slower. See the Manopt tutorial about caching when you
    % are ready to switch from a proof-of-concept code to an efficient
    % code.
    %
    % The inputs are X (a point on the manifold) and H, a tangent vector at
    % X that is assumed to be a descent direction. That is, there exists a
    % positive t such that f(Retraction_X(tH)) < f(X). The function below
    % is supposed to output a "t" that it is a good "guess" at such a t.
    problem.linesearch = @linesearch_helper;
    function t = linesearch_helper(X, H)
        % Note that you would not usually need the Hessian for this.
        residual_omega = nonzeros(problem.egrad(X));
        dir_omega      = nonzeros(problem.ehess(X, H));
        t = - dir_omega \ residual_omega ;
    end

    % Notice that for this solver, the Hessian is not needed.
    [Xcg, xcost, info, options] = conjugategradient(problem, X0); %#ok<ASGLU>
    
    fprintf('Take a look at the options that CG used:\n');
    disp(options);
    fprintf('And see how many trials were made at each line search call:\n');
    info_ls = [info.linesearch];
    disp([info_ls.costevals]);

    
    fprintf('Try it again without the linesearch helper.\n');
    
    % Remove the linesearch helper from the problem structure.
    problem = rmfield(problem, 'linesearch');
    
    [Xcg, xcost, info, options] = conjugategradient(problem, X0); %#ok<ASGLU>
    
    fprintf('Take a look at the options that CG used:\n');
    disp(options);
    fprintf('And see how many trials were made at each line search call:\n');
    info_ls = [info.linesearch];
    disp([info_ls.costevals]);
    
    
    % If the problem has a small enough dimension, we may (for analysis
    % purposes) compute the spectrum of the Hessian at a point X. This may
    % help in studying the conditioning of a problem. If you don't provide
    % the Hessian, Manopt will approximate the Hessian with finite
    % differences of the gradient and try to estimate its "spectrum" (it's
    % not a proper linear operator). This can give some intuition, but
    % should not be relied upon.
    if problem.M.dim() < 100
        fprintf('Computing the spectrum of the Hessian...');
        s = hessianspectrum(problem, X);
        hist(s);
    end
    
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/low_rank_tensor_completion.m
================================================
function low_rank_tensor_completion()
% Given partial observation of a low rank tensor, attempts to complete it.
%
% function low_rank_tensor_completion()
%
% This example demonstrates how to use the geometry factory for the
% quotient manifold of fixed-rank tensors, 
% fixedrankfactory_tucker_preconditioned.
%
% This geometry is described in the technical report
% "Riemannian preconditioning for tensor completion"
% Hiroyuki Kasai and Bamdev Mishra, arXiv:1506.02159, 2015.
%
% This can be a starting point for many optimization problems of the form:
%
% minimize f(X) such that rank(X) = [r1 r2 r3], size(X) = [n1, n2, n3].
%
% Input:  None. This example file generates random data.
% 
% Output: None.
%
% Please cite the Manopt paper as well as the research paper:
%     @Techreport{kasai2015,
%       Title   = {{R}iemannian preconditioning for tensor completion},
%       Author  = {Kasai, H. and Mishra, B.},
%       Journal = {Arxiv preprint arXiv:1506.02159},
%       Year    = {2015}
%     }

% This file is part of Manopt and is copyrighted. See the license file.
% 
% Main authors: Hiroyuki Kasai and Bamdev Mishra, June 16, 2015.
% Contributors:
% 
% Change log:
% 
    

    % Random data generation with pseudo-random numbers from a 
    % uniform distribution on [0, 1].
    % First, choose the size of the problem.
    % We will complete a tensor of size n1-by-n2-by-n3 of rank (r1, r2, r3):  
    n1 = 70;
    n2 = 60;
    n3 = 50;
    r1 = 3;
    r2 = 4;
    r3 = 5;
    tensor_dims = [n1 n2 n3];
    core_dims = [r1 r2 r3];
    total_entries = n1*n2*n3;
    
    % Generate a random tensor A of size n1-by-n2-by-n3 of rank (r1, r2, r3).
    [U1,R1] = qr(rand(n1, r1), 0);
    [U2,R2] = qr(rand(n2, r2), 0);
    [U3,R3] = qr(rand(n3, r3), 0);

    Z.U1 = R1;
    Z.U2 = R2;
    Z.U3 = R3;   
    Z.G = rand( core_dims );
    Core = tucker2multiarray(Z); % Converts tucker format tensor to full tensor.

    Y.U1 = U1;
    Y.U2 = U2;
    Y.U3 = U3;
    Y.G = Core;
    A = tucker2multiarray(Y);       
    
    % Generate a random mask P for observed entries: P(i, j, k) = 1 if the entry
    % (i, j, k) of A is observed, and 0 otherwise.    
    % Observation ratio
    fraction = 0.1; % Fraction of known entries.
    nr = round(fraction * total_entries);
    ind = randperm(total_entries);
    ind = ind(1 : nr);
    P = false(tensor_dims);
    P(ind) = true;    
    % Hence, we know the nonzero entries in PA:
    PA = P.*A;  
    

    % Pick the manifold of tensors of size n1-by-n2-by-n3 of rank (r1, r2, r3).
    problem.M = fixedrankfactory_tucker_preconditioned(tensor_dims, core_dims);
    
    
    % Define the problem cost function. The input X is a structure with
    % fields U1, U2, U3, G representing a rank (r1,r2,r3) tensor.
    % f(X) = 1/2 * || P.*(X - A) ||^2
    problem.cost = @cost;
    function f = cost(X)
        Xmultiarray = tucker2multiarray(X);
        Diffmultiarray = P.*Xmultiarray - PA;
        Diffmultiarray_flat = reshape(Diffmultiarray, n1, n2*n3);
        f = .5*norm(Diffmultiarray_flat , 'fro')^2;
    end


    % Define the Euclidean gradient of the cost function, that is, the
    % gradient of f(X) seen as a standard function of X.
    % nabla f(X) = P.*(X-A)
    % We only need to give the Euclidean gradient. Manopt converts it
    % internally to the Riemannian counterpart.
    problem.egrad =  @egrad;
    function [g] = egrad(X)
        Xmultiarray = tucker2multiarray(X);
        Smultiarray = P.*Xmultiarray - PA;     

        % BM: computation of S, S1, S2, and S3
        S1multiarray = reshape(Smultiarray, [n1, n2*n3]);
        S2multiarray = reshape(permute(Smultiarray, [2 1 3]),[n2, n1*n3]);
        S3multiarray = reshape(permute(Smultiarray, [3 1 2]),[n3, n1*n2]);

        g.U1 = double(S1multiarray) * kron(X.U3, X.U2) * reshape(X.G, r1, r2*r3)';
        g.U2 = double(S2multiarray) * kron(X.U3, X.U1) * reshape(permute(X.G, [2 1 3]), r2, r1*r3)';
        g.U3 = double(S3multiarray) * kron(X.U2, X.U1) * reshape(permute(X.G, [3 1 2]), r3, r1*r2)';
        g.G = reshape(X.U1' * reshape(double(Smultiarray),n1,n2*n3) * kron(X.U3', X.U2')', r1, r2, r3);  
    end
    
    
    % Define the Euclidean Hessian of the cost at X, along eta, where eta is
    % represented as a tangent vector: a structure with fields U1, U2, U3, G.
    % This is the directional derivative of nabla f(X) at X along Xdot:
    % nabla^2 f(X)[Xdot] = P.*Xdot
    % We only need to give the Euclidean Hessian. Manopt converts it
    % internally to the Riemannian counterpart.
    problem.ehess = @ehess;
    function [Hess] = ehess(X, eta)

        % Computing S, and its unfolding matrices, S1, S2, and S3.
        Xmultiarray = tucker2multiarray(X);
        S = P.*Xmultiarray - PA;     
        S1 = reshape(S, [n1, n2*n3]);
        S2 = reshape(permute(S, [2 1 3]),[n2, n1*n3]);
        S3 = reshape(permute(S, [3 1 2]),[n3, n1*n2]);            

        % Computing Sdot, S1dot, S2dot and S3dot.
        XG = X.G;
        etaG = eta.G;
        G1 = zeros(4*size(X.G));
        G1(1:r1, 1:r2, 1:r3) = XG;
        G1(r1 + 1 : 2*r1, r2 + 1 : 2*r2, r3 + 1 : 2*r3) = XG;
        G1(2*r1 + 1 : 3*r1, 2*r2 + 1 : 3*r2, 2*r3 + 1 : 3*r3) = XG;
        G1(3*r1 + 1 : 4*r1, 3*r2 + 1 : 4*r2, 3*r3 + 1 : 4*r3) = etaG;      
             
        X1.G = G1;
        X1.U1 = [eta.U1 X.U1 X.U1 X.U1];
        X1.U2 = [X.U2 eta.U2 X.U2 X.U2];
        X1.U3 = [X.U3 X.U3 eta.U3 X.U3];
        
        X1multiarray = tucker2multiarray(X1);
        Sdot = P.*X1multiarray;
        S1dot = reshape(Sdot, [n1, n2*n3]);
        S2dot = reshape(permute(Sdot, [2 1 3]),[n2, n1*n3]);
        S3dot = reshape(permute(Sdot, [3 1 2]),[n3, n1*n2]);
        
        % Computing unfolding matrices of X.G and eta.G.
        X_G1 = reshape(double(X.G),r1, r2*r3);
        X_G2 = reshape(permute(double(X.G),[2 1 3]),r2, r1*r3);
        X_G3 = reshape(permute(double(X.G),[3 1 2]),r3, r1*r2);
        eta_G1 = reshape(double(eta.G),r1, r2*r3);
        eta_G2 = reshape(permute(double(eta.G),[2 1 3]),r2, r1*r3);
        eta_G3 = reshape(permute(double(eta.G),[3 1 2]),r3, r1*r2);             

        % Computing Hessians for U1, U2 and U3.
        T1 = double(S1dot) * (kron(X.U3,X.U2)*X_G1') ...
            + double(S1) * (kron(eta.U3,X.U2)*X_G1' ...
            + kron(X.U3,eta.U2)*X_G1' + kron(X.U3,X.U2)*eta_G1');
        
        T2 = double(S2dot) * (kron(X.U3,X.U1)*X_G2') ...
            + double(S2) * (kron(eta.U3,X.U1)*X_G2' ...
            + kron(X.U3,eta.U1)*X_G2' + kron(X.U3,X.U1)*eta_G2');

        T3 = double(S3dot) * (kron(X.U2,X.U1)*X_G3') ...
            + double(S3) * (kron(eta.U2,X.U1)*X_G3' ...
            + kron(X.U2,eta.U1)*X_G3' + kron(X.U2,X.U1)*eta_G3');
        
        Hess.U1 = T1;
        Hess.U2 = T2;
        Hess.U3 = T3;  
        
        % Computing Hessian for G
        N.U1 = X.U1';
        N.U2 = X.U2';
        N.U3 = X.U3';
        N.G = Sdot;
        M0array = tucker2multiarray(N);
        
        M1.U1 = eta.U1';
        M1.U2 = X.U2';
        M1.U3 = X.U3';
        M1.G = S;    
        M1array = tucker2multiarray(M1);
        
        M2.U1 = X.U1';
        M2.U2 = eta.U2';
        M2.U3 = X.U3';
        M2.G = S;    
        M2array = tucker2multiarray(M2); 
        
        M3.U1 = X.U1';
        M3.U2 = X.U2';
        M3.U3 = eta.U3';
        M3.G = S;    
        M3array = tucker2multiarray(M3);   
        
        Hess.G = M0array + M1array + M2array + M3array; 
    end
    

    % Check consistency of the gradient and the Hessian. Useful if you
    % adapt this example for a new cost function and you would like to make
    % sure there is no mistake.
    %
    % Notice that the checkhessian test fails: the slope is not right. 
    % This is because the retraction is not second-order compatible with 
    % the Riemannian exponential on this manifold, making 
    % the checkhessian tool unusable. The Hessian is correct though. 
    % % warning('off', 'manopt:fixedrankfactory_tucker_preconditioned:exp');
    % % checkgradient(problem);
    % % drawnow;
    % % pause;
    % % checkhessian(problem);
    % % drawnow;
    % % pause;
    

    % options
    options.maxiter = 200;
    options.maxinner = 30;
    options.maxtime = inf;
    options.tolgradnorm = 1e-5;     


    % Minimize the cost function using Riemannian trust-regions
    Xtr = trustregions(problem, [], options);

    
    % The reconstructed tensor is X, represented as a structure with fields
    % U1, U2, U3 and G.    
    Xtrmultiarray = tucker2multiarray(Xtr);
    fprintf('||X-A||_F = %g\n', norm(reshape(Xtrmultiarray - A, [n1 n2*n3]), 'fro'));   
    
   
    % Alternatively, we could decide to use a solver such as steepestdescent (SD) 
    % or conjugategradient (CG). These solvers need to solve a
    % line-search problem at each iteration. Standard line searches in
    % Manopt have generic purpose systems to do this. But for the problem
    % at hand, we could exploit the least-squares structure to compute an
    % approximate stepsize for the line-search problem. The approximation
    % is obtained by linearizing the nonlinear manifold locally and further
    % approximating it with a degree 2 polynomial approximation.
    % The specific derivation is in the paper referenced above.
    
    problem.linesearch = @linesearch_helper;
    function tmin = linesearch_helper(X, eta)
        
        % term0
        Xmultiarray = tucker2multiarray(X);
        residual_mat = P.*Xmultiarray - PA;     
        residual_vec = residual_mat(:);
        term0 = residual_vec;
        
        % term1
        XG = X.G;
        etaG = eta.G;        
        G1 = zeros(4*size(X.G));
        G1(1:r1, 1:r2, 1:r3) = XG;
        G1(r1 + 1 : 2*r1, r2 + 1 : 2*r2, r3 + 1 : 2*r3) = XG;
        G1(2*r1 + 1 : 3*r1, 2*r2 + 1 : 3*r2, 2*r3 + 1 : 3*r3) = XG;
        G1(3*r1 + 1 : 4*r1, 3*r2 + 1 : 4*r2, 3*r3 + 1 : 4*r3) = etaG;  

        X1.U1 = [eta.U1 X.U1 X.U1 X.U1];
        X1.U2 = [X.U2 eta.U2 X.U2 X.U2];
        X1.U3 = [X.U3 X.U3 eta.U3 X.U3];
        X1.G = G1;
        
        X1multiarray = tucker2multiarray(X1);
        term1_mat = P.*X1multiarray;    
        term1 = term1_mat(:);
        
        % tmin is the solution to the problem argmin a2*t^2 + a1*t, where
        % the coefficients a1 and a2 are shown below.
        a2 = (term1'*term1);
        a1 = 2*(term1'*term0);
        tmin = - 0.5*(a1 / a2);
        
    end    

    % Notice that for this solver, the Hessian is not needed.
    [Xcg, costcg, infocg] = conjugategradient(problem, [], options);
    
    fprintf('Take a look at the options that CG used:\n');
    disp(options);
    fprintf('And see how many trials were made at each line search call:\n');
    info_ls = [infocg.linesearch];
    disp([info_ls.costevals]); 
    
    
    fprintf('Try it again without the linesearch helper.\n');
    
    % Remove the linesearch helper from the problem structure.
    problem = rmfield(problem, 'linesearch');
    
    [Xcg, xcost, info, options] = conjugategradient(problem, []); %#ok<ASGLU>
    
    fprintf('Take a look at the options that CG used:\n');
    disp(options);
    fprintf('And see how many trials were made at each line search call:\n');
    info_ls = [info.linesearch];
    disp([info_ls.costevals]);
    
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/maxcut.m
================================================
function [x, cutvalue, cutvalue_upperbound, Y] = maxcut(L, r)
% Algorithm to (try to) compute a maximum cut of a graph, via SDP approach.
% 
% function x = maxcut(L)
% function [x, cutvalue, cutvalue_upperbound, Y] = maxcut(L, r)
%
% L is the Laplacian matrix describing the graph to cut. The Laplacian of a
% graph is L = D - A, where D is the diagonal degree matrix (D(i, i) is the
% sum of the weights of the edges adjacent to node i) and A is the
% symmetric adjacency matrix of the graph (A(i, j) = A(j, i) is the weight
% of the edge joining nodes i and j). If L is sparse, this will be
% exploited.
%
% If the graph has n nodes, then L is nxn and the output x is a vector of
% length n such that x(i) is +1 or -1. This partitions the nodes of the
% graph in two classes, in an attempt to maximize the sum of the weights of
% the edges that go from one class to the other (MAX CUT problem).
%
% cutvalue is the sum of the weights of the edges 'cut' by the partition x.
%
% If the algorithm reached the global optimum of the underlying SDP
% problem, then it produces an upperbound on the maximum cut value. This
% value is returned in cutvalue_upperbound if it is found. Otherwise, that
% output is set to NaN.
%
% If r is specified (by default, r = n), the algorithm will stop at rank r.
% This may prevent the algorithm from reaching a globally optimal solution
% for the underlying SDP problem (but can greatly help in keeping the
% execution time under control). If a global optimum of the SDP is reached
% before rank r, the algorithm will stop of course.
%
% Y is a matrix of size nxp, with p <= r, such that X = Y*Y' is the best
% solution found for the underlying SDP problem. If cutvalue_upperbound is
% not NaN, then Y*Y' is optimal for the SDP and cutvalue_upperbound is its
% cut value.
% 
% By Goemans and Williamson 1995, it is known that if the optimal value of
% the SDP is reached, then the returned cut, in expectation, is at most at
% a fraction 0.878 of the optimal cut. (This is not exactly valid because
% we do not use random projection here; sign(Y*randn(size(Y, 2), 1)) will
% give a cut that respects this statement -- it's usually worse though).
%
% The algorithm is essentially that of:
% Journee, Bach, Absil and Sepulchre, SIAM 2010
% Low-rank optimization on the cone of positive semidefinite matrices.
%
% It is itself based on the famous SDP relaxation of MAX CUT:
% Goemans and Williamson, 1995
% Improved approximation algorithms for maximum cut and satisfiability
% problems using semidefinite programming.
% 
% See also: elliptope_SDP

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 18, 2013
% Contributors:
% Change log:
%   
%   April 3, 2015 (NB):
%       L products now counted with the new shared memory system. This is
%       more reliable and more flexible than using a global variable.


    % If no inputs are provided, generate a random graph Laplacian.
    % This is for illustration purposes only.
    if ~exist('L', 'var') || isempty(L)
        n = 20;
        A = triu(randn(n) <= .4, 1);
        A = A+A';
        D = diag(sum(A, 2));
        L = D-A;
    end


    n = size(L, 1);
    assert(size(L, 2) == n, 'L must be square.');

    if ~exist('r', 'var') || isempty(r) || r > n
        r = n;
    end
    
    % We will let the rank increase. Each rank value will generate a cut.
    % We have to go up in the rank to eventually find a certificate of SDP
    % optimality. This in turn will provide an upperbound on the MAX CUT
    % value and ensure that we're doing well, according to Goemans and
    % Williamson's argument. In practice though, the good cuts often come
    % up for low rank values, so we better keep track of the best one.
    best_x = ones(n, 1);
    best_cutvalue = 0;
    cutvalue_upperbound = NaN;
    
    time = [];
    cost = [];
    
    for rr = 2 : r
        
        manifold = elliptopefactory(n, rr);
        
        if rr == 2
            
            % At first, for rank 2, generate a random point.
            Y0 = manifold.rand();
             
        else
            
            % To increase the rank, we could just add a column of zeros to
            % the Y matrix. Unfortunately, this lands us in a saddle point.
            % To escape from the saddle, we may compute an eigenvector of
            % Sy associated to a negative eigenvalue: that will yield a
            % (second order) descent direction Z. See Journee et al ; Sy is
            % linked to dual certificates for the SDP.
            Y0 = [Y zeros(n, 1)];
            LY0 = L*Y0;
            Dy = spdiags(sum(LY0.*Y0, 2), 0, n, n);
            Sy = (Dy - L)/4;
            % Find the smallest (the "most negative") eigenvalue of Sy.
            eigsopts.issym = true;
            eigsopts.isreal = true;
            [v, s] = eigs(Sy, 1, 'SA', eigsopts);
            % If there is no negative eigenvalue for Sy, than we are not at
            % a saddle point: we're actually done!
            if s >= -1e-8
                % We can stop here: we found the global optimum of the SDP,
                % and hence the reached cost is a valid upper bound on the
                % maximum cut value.
                cutvalue_upperbound = max(-[info.cost]);
                break;
            end
            
            % This is our escape direction.
            Z = manifold.proj(Y0, [zeros(n, rr-1) v]);
            
            % % These instructions can be uncommented to see what the cost
            % % function looks like at a saddle point. But will require the
            % % problem structure which is not defined here: see the helper
            % % function.
            % plotprofile(problem, Y0, Z, linspace(-1, 1, 101));
            % drawnow; pause;
            
            % Now make a step in the Z direction to escape from the saddle.
            % It is not obvious that it is ok to do a unit step ... perhaps
            % need to be cautious here with the stepsize. It's not too
            % critical though: the important point is to leave the saddle
            % point. But it's nice to guarantee monotone decrease of the
            % cost, and we can't do that with a constant step (at least,
            % not without a proper argument to back it up).
            stepsize = 1;
            Y0 = manifold.retr(Y0, Z, stepsize);
            
        end
        
        % Use the Riemannian optimization based algorithm lower in this
        % file to reach a critical point (typically a local optimizer) of
        % the max cut cost with fixed rank, starting from Y0.
        [Y, info] = maxcut_fixedrank(L, Y0);
        
        % Some info logging.
        thistime = [info.time];
        if ~isempty(time)
            thistime = time(end) + thistime;
        end
        time = [time thistime]; %#ok<AGROW>
        cost = [cost [info.cost]]; %#ok<AGROW>

        % Time to turn the matrix Y into a cut.
        % We can either do the random rounding as follows:
        % x = sign(Y*randn(rr, 1));
        % or extract the "PCA direction" of the points in Y and cut
        % orthogonally to that direction, as follows (seems faster than
        % calling svds):
        [U, ~, ~] = svd(Y, 0);
        u = U(:, 1);
        x = sign(u);

        cutvalue = (x'*L*x)/4;
        if cutvalue > best_cutvalue
            best_x = x;
            best_cutvalue = cutvalue;
        end
        
    end
    
    x = best_x;
    cutvalue = best_cutvalue;
    
    plot(time, -cost, '.-');
    xlabel('Time [s]');
    ylabel('Relaxed cut value');
    title('The relaxed cut value is an upper bound on the optimal cut value.');

end


function [Y, info] = maxcut_fixedrank(L, Y)
% Try to solve the (fixed) rank r relaxed max cut program, based on the
% Laplacian of the graph L and an initial guess Y. L is nxn and Y is nxr.

    [n, r] = size(Y);
    assert(all(size(L) == n));
    
    % The fixed rank elliptope geometry describes symmetric, positive
    % semidefinite matrices of size n with rank r and all diagonal entries
    % are 1.
    manifold = elliptopefactory(n, r);
    
    % % If you want to compare the performance of the elliptope geometry
    % % against the (conceptually simpler) oblique manifold geometry,
    % % uncomment this line.
    % manifold = obliquefactory(r, n, true);
    
    problem.M = manifold;
    
    % % For rapid prototyping, these lines suffice to describe the cost
    % % function and its gradient and Hessian (here expressed using the
    % % Euclidean gradient and Hessian).
    % problem.cost  = @(Y)  -trace(Y'*L*Y)/4;
    % problem.egrad = @(Y) -(L*Y)/2;
    % problem.ehess = @(Y, U) -(L*U)/2;
    
    % Instead of the prototyping version, the functions below describe the
    % cost, gradient and Hessian using the caching system (the store
    % structure). This alows to execute exactly the required number of
    % multiplications with the matrix L. These multiplications are counted
    % using the shared memory in the store structure: that memory is
    % shared , so we get access to the same data, regardless of the
    % point Y currently visited.

    % For every visited point Y, we will need L*Y. This function makes sure
    % the quantity L*Y is available, but only computes it if it wasn't
    % already computed.
    function store = prepare(Y, store)
        if ~isfield(store, 'LY')
            % Compute and store the product for the current point Y.
            store.LY = L*Y;
            % Create / increment the shared counter (independent of Y).
            if isfield(store.shared, 'counter')
                store.shared.counter = store.shared.counter + 1;
            else
                store.shared.counter = 1;
            end
        end
    end

    problem.cost = @cost;
    function [f, store] = cost(Y, store)
        store = prepare(Y, store);
        LY = store.LY;
        f = -(Y(:)'*LY(:))/4; % = -trace(Y'*LY)/4; but faster
    end

    problem.egrad = @egrad;
    function [g, store] = egrad(Y, store)
        store = prepare(Y, store);
        LY = store.LY;
        g = -LY/2;
    end

    problem.ehess = @ehess;
    function [h, store] = ehess(Y, U, store)
        store = prepare(Y, store); % this line is not strictly necessary
        LU = L*U;
        store.shared.counter = store.shared.counter + 1;
        h = -LU/2;
    end

    % statsfun is called exactly once after each iteration (including after
    % the evaluation of the cost at the initial guess). We then register
    % the value of the L-products counter (which counts how many products
    % were needed so far).
    % options.statsfun = @statsfun;
    % function stats = statsfun(problem, Y, stats, store) %#ok
    %     stats.Lproducts = store.shared.counter;
    % end
    % Equivalent, but simpler syntax:
    options.statsfun = statsfunhelper('Lproducts', ...
                     @(problem, Y, stats, store) store.shared.counter );
    

    % % Diagnostics tools: to make sure the gradient and Hessian are
    % % correct during the prototyping stage.
    % checkgradient(problem); pause;
    % checkhessian(problem); pause;
    
    % % To investigate the effect of the rotational invariance when using
    % % the oblique or the elliptope geometry, or to study the saddle point
    % % issue mentioned above, it is sometimes interesting to look at the
    % % spectrum of the Hessian. For large dimensions, this is slow!
    % stairs(sort(hessianspectrum(problem, Y)));
    % drawnow; pause;
    
    
    % % When facing a saddle point issue as described in the master
    % % function, and when no sure mechanism exists to find an escape
    % % direction, it may be helpful to set useRand to true and raise
    % % miniter to more than 1, when using trustregions. This will tell the
    % % solver to not stop before at least miniter iterations were
    % % accomplished (thus disregarding the zero gradient at the saddle
    % % point) and to use random search directions to kick start the inner
    % % solve (tCG) step. It is not as efficient as finding a sure escape
    % % direction, but sometimes it's the best we have.
    % options.useRand = true;
    % options.miniter = 5;
    
    options.verbosity = 2;
    [Y, Ycost, info] = trustregions(problem, Y, options); %#ok<ASGLU>
    
    fprintf('Products with L: %d\n', max([info.Lproducts]));

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/nonlinear_eigenspace.m
================================================
function Xsol = nonlinear_eigenspace(L, k, alpha)
% Example of nonlinear eigenvalue problem: total energy minimization.
%
% function Xsol = nonlinear_eigenspace(L, k, alpha)
%
% L is a discrete Laplacian operator,
% alpha is a given constant, and
% k corresponds to the dimension of the least eigenspace sought. 
%
% This example demonstrates how to use the Grassmann geometry factory 
% to solve the nonlinear eigenvalue problem as the optimization problem:
%
% minimize 0.5*trace(X'*L*X) + (alpha/4)*(rho(X)*L\(rho(X))) 
% over X such that X'*X = Identity,
%
% where L is of size n-by-n,
% X is an n-by-k matrix, and
% rho(X) is the diagonal part of X*X'.
%
% This example is motivated in the paper
% "A Riemannian Newton Algorithm for Nonlinear Eigenvalue Problems",
% Zhi Zhao, Zheng-Jian Bai, and Xiao-Qing Jin,
% SIAM Journal on Matrix Analysis and Applications, 36(2), 752-774, 2015.
%


% This file is part of Manopt and is copyrighted. See the license file.
%
% Main author: Bamdev Mishra, June 19, 2015.
% Contributors:
%
% Change log:


    % If no inputs are provided, generate a  discrete Laplacian operator.
    % This is for illustration purposes only.
    % The default example corresponds to Case (c) of Example 6.2 of the
    % above referenced paper.
    
    if ~exist('L', 'var') || isempty(L)
        n = 100;
        L = gallery('tridiag', n, -1, 2, -1);
    end
    
    n = size(L, 1);
    assert(size(L, 2) == n, 'L must be square.');
    
    if ~exist('k', 'var') || isempty(k) || k > n
        k = 10;
    end
    
    if ~exist('alpha', 'var') || isempty(alpha)
        alpha = 1;
    end
    
    
    % Grassmann manifold description
    Gr = grassmannfactory(n, k);
    problem.M = Gr;
    
    % Cost function evaluation
    problem.cost =  @cost;
    function val = cost(X)
        rhoX = sum(X.^2, 2); % diag(X*X'); 
        val = 0.5*trace(X'*(L*X)) + (alpha/4)*(rhoX'*(L\rhoX));
    end
    
    % Euclidean gradient evaluation
    % Note: Manopt automatically converts it to the Riemannian counterpart.
    problem.egrad = @egrad;
    function g = egrad(X)
        rhoX = sum(X.^2, 2); % diag(X*X');
        g = L*X + alpha*diag(L\rhoX)*X;
    end
    
    % Euclidean Hessian evaluation
    % Note: Manopt automatically converts it to the Riemannian counterpart.
    problem.ehess = @ehess;
    function h = ehess(X, U)
        rhoX = sum(X.^2, 2); %diag(X*X');
        rhoXdot = 2*sum(X.*U, 2); 
        h = L*U + alpha*diag(L\rhoXdot)*X + alpha*diag(L\rhoX)*U;
    end
    
    
    % Check whether gradient and Hessian computations are correct.
    % checkgradient(problem);
    % pause;
    % checkhessian(problem);
    % pause;
    
    
    % Initialization as suggested in above referenced paper.
    X = randn(n, k);
    [U, S, V] = svd(X, 0); %#ok<ASGLU>
    X = U*V';
    [U0, S0, V0] = eigs(L + alpha*diag(L\(sum(X.^2, 2))), k,'sm'); %#ok<NASGU,ASGLU>
    X0 = U0;
  
    % Call manoptsolve to automatically call an appropriate solver.
    % Note: it calls the trust regions solver as we have all the required
    % ingredients, namely, gradient and Hessian, information.
    Xsol = manoptsolve(problem, X0);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/packing_on_the_sphere.m
================================================
function [X, maxdot] = packing_on_the_sphere(d, n, epsilon, X0)
% Return a set of points spread out on the sphere.
%
% function [X, maxdot] = packing_on_the_sphere(d, n, epsilon, X0)
%
% Using optimization on the oblique manifold, that is, the product of
% spheres, this function returns a set of n points with unit norm in R^d in
% the form of a matrix X of size nxd, such that the points are spread out
% on the sphere. Ideally, we would minimize the maximum inner product
% between any two points X(i, :) and X(j, :), i~=j, but that is a nonsmooth
% cost function. Instead, we replace the max function by a classical
% log-sum-exp approximation and (attempt to) solve:
%
% min_{X in OB(d, n)} log( .5*sum_{i~=j} exp( xi'*xj/epsilon ) ),
%
% with xi = X(:, i) and epsilon is some "diffusion constant". As epsilon
% goes to zero, the cost function is a sharper approximation of the max
% function (under some assumptions), but the cost function becomes stiffer
% and hence harder to optimize.
%
% The second output, maxdot, is the maximum inner product between any two
% points in the returned X. This number is the one we truly are trying to
% minimize.
%
% Notice that this cost function is invariant under rotation of X:
% f(X) = f(XQ) for all orthogonal Q in O(d).
% This calls for optimization over the set of symmetric positive
% semidefinite matrices of size n and rank d with unit diagonal, which can
% be thought of as the quotient of the oblique manifold OB(d, n) by O(d):
% See elliptopefactory.
%
% This is known as the Thomson or, more specifically, the Tammes problem:
% http://en.wikipedia.org/wiki/Tammes_problem
% An interesting page by Neil Sloane collecting best known packings is
% available here http://neilsloane.com/packings/

% This file is part of Manopt and is copyrighted. See the license file.
%
% Main author: Nicolas Boumal, July 2, 2013
% Contributors:
%
% Change log:
%   Aug. 14, 2013 (NB) : Code now compatible to experiment with both the
%                        obliquefactory and the elliptopefactory.
%
%   Jan.  7, 2014 (NB) : Added reference to Neil Sloane's page and the
%                        maxdot output.
%
%   June 24, 2014 (NB) : Now shifting exponentials to alleviate numerical
%                        trouble when epsilon is too small.
%   
    
    if ~exist('d', 'var') || isempty(d)
        % Dimension of the embedding space: R^d
        d = 3;
    end
    if ~exist('n', 'var') || isempty(n)
        % Number n of points to place of the sphere in R^d.
        % For example, n=12 yields an icosahedron:
        % https://en.wikipedia.org/wiki/Icosahedron
        % Notice though that platonic solids are not always optimal.
        % Try for example n = 8: you don't get a cube.
        n = 24;
    end
    if ~exist('epsilon', 'var') || isempty(epsilon)
        % This value should be as close to 0 as affordable.
        % If it is too close to zero, optimization first becomes much
        % slower, than simply doesn't work anymore becomes of floating
        % point overflow errors (NaN's and Inf's start to appear).
        % If it is too large, then log-sum-exp is a poor approximation of
        % the max function, and the spread will be less uniform.
        % An okay value seems to be 0.01 or 0.001 for example. Note that a
        % better strategy than using a small epsilon straightaway is to
        % reduce epsilon bit by bit and to warm-start subsequent
        % optimization in that way. Trustregions will be more appropriate
        % for these fine tunings.
        epsilon = 0.0015;
    end
    
    % Pick your manifold (the elliptope factory quotients out the global
    % rotation invariance of the problem, which is more natural but
    % conceptually a bit more complicated --- for usage with the toolbox it
    % is the same though: just uncomment the appropriate line).
    manifold = obliquefactory(d, n, true);
    % manifold = elliptopefactory(n, d);
    
    % Generate a random initial guess if none was given.
    if ~exist('X0', 'var') || isempty(X0)
        X0 = manifold.rand();
    end

    % Define the cost function with caching system used: the store
    % structure we receive as input is tied to the input point X. Everytime
    % this cost function is called at this point X, we will receive the
    % same store structure back. We may modify the store structure inside
    % the function and return it: the changes will be remembered for next
    % time.
    function [f, store] = cost(X, store)
        if ~isfield(store, 'ready')
            XXt = X*X';
            % Shift the exponentials by the maximum value to reduce
            % numerical trouble due to possible overflows.
            s = max(max(triu(XXt, 1)));
            expXXt = exp((XXt-s)/epsilon);
            % Zero out the diagonal
            expXXt(1:(n+1):end) = 0;
            u = sum(sum(triu(expXXt, 1)));
            store.XXt = XXt;
            store.s = s;
            store.expXXt = expXXt;
            store.u = u;
            store.ready = true;
        end
        u = store.u;
        s = store.s;
        f = s + epsilon*log(u);
    end

    % Define the gradient of the cost. When the gradient is called at a
    % point X for which the cost was already called, the store structure we
    % receive remember everything that the cost function stored in it, so
    % we can reuse previously computed elements.
    function [g, store] = grad(X, store)
        if ~isfield(store, 'ready')
            [~, store] = cost(X, store);
        end
        % Compute the Euclidean gradient
        eg = store.expXXt*X / store.u;
        % Convert to the Riemannian gradient (by projection)
        g = manifold.egrad2rgrad(X, eg);
    end

    % Setup the problem structure with its manifold M and cost+grad
    % functions.
    problem.M = manifold;
    problem.cost = @cost;
    problem.grad = @grad;

    % For debugging, it's always nice to check the gradient a few times.
    % checkgradient(problem);
    % pause;
    
    % Call a solver on our problem with a few options defined. We did not
    % specify the Hessian but it is still okay to call trustregion: Manopt
    % will approximate the Hessian with finite differences of the gradient.
    opts.tolgradnorm = 1e-8;
    opts.maxtime = 1200;
    opts.maxiter = 1e5;
    % X = trustregions(problem, X0, opts);
    X = conjugategradient(problem, X0, opts);
    
    % Evaluate the maximum inner product between any two points of X.
    XXt = X*X';
    dots = XXt(find(triu(ones(n), 1))); %#ok<FNDSB>
    maxdot = max(dots);
    
    % Similarly, even though we did not specify the Hessian, we may still
    % estimate its spectrum at the solution. It should reflect the
    % invariance of the cost function under a global rotatioon of the
    % sphere, which is an invariance under the group O(d) of dimension
    % d(d-1)/2 : this translates into d(d-1)/2 zero eigenvalues in the
    % spectrum of the Hessian.
    % The approximate Hessian is not a linear operator, and is it a
    % fortiori not symmetric. The result of this computation is thus not
    % reliable. It does display the zero eigenvalues as expected though.
    if manifold.dim() < 300
        evs = real(hessianspectrum(problem, X));
        figure;
        stem(1:length(evs), sort(evs), '.');
        title(['Eigenvalues of the approximate Hessian of the cost ' ...
               'function at the solution']);
    end
    
    
    % Show how the inner products X(:, i)'*X(:, j) are distributed.
    figure;
    hist(real(acos(dots)), 20);
    title('Histogram of the geodesic distances');
    
    % This is the quantity we actually want to minimize.
    fprintf('Maximum inner product between two points: %g\n', maxdot);
    
    
    % Give some visualization if the dimension allows
    if d == 2
        % For the circle, the optimal solution consists in spreading the
        % points with angles uniformly sampled in (0, 2pi). This
        % corresponds to the following value for the max inner product:
        fprintf('Optimal value for the max inner product: %g\n', cos(2*pi/n));
        figure;
        t = linspace(-pi, pi, 201);
        plot(cos(t), sin(t), '-', 'LineWidth', 3, 'Color', [152,186,220]/255);
        daspect([1 1 1]);
        box off;
        axis off;
        hold on;
        plot(X(:, 1), X(:, 2), 'r.', 'MarkerSize', 25);
        hold off;
    end
    if d == 3
        figure;
        % Plot the sphere
        [sphere_x, sphere_y, sphere_z] = sphere(50);
        handle = surf(sphere_x, sphere_y, sphere_z);
        set(handle, 'FaceColor', [152,186,220]/255);
        set(handle, 'FaceAlpha', .5);
        set(handle, 'EdgeColor', [152,186,220]/255);
        set(handle, 'EdgeAlpha', .5);
        daspect([1 1 1]);
        box off;
        axis off;
        hold on;
        % Add the chosen points
        Y = 1.02*X';
        plot3(Y(1, :), Y(2, :), Y(3, :), 'r.', 'MarkerSize', 25);
        % And connect the points which are at minimal distance,
        % within some tolerance.
        min_distance = real(acos(maxdot));
        connected = real(acos(XXt)) <= 1.20*min_distance;
        [Ic, Jc] = find(triu(connected, 1));
        for k = 1 : length(Ic)
            i = Ic(k); j = Jc(k);
            plot3(Y(1, [i j]), Y(2, [i j]), Y(3, [i j]), 'k-');
        end
        hold off;
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/positive_definite_karcher_mean.m
================================================
function X = positive_definite_karcher_mean(A)
% Computes a Karcher mean of a collection of positive definite matrices.
%
% function X = positive_definite_karcher_mean(A)
%
% Input:  A 3D matrix A of size nxnxm such that each slice A(:,:,k) is a
%         positive definite matrix of size nxn.
% 
% Output: A positive definite matrix X of size nxn which is a Karcher mean
%         of the m matrices in A, that is, X minimizes the sum of squared
%         Riemannian distances to the matrices in A:
%            f(X) = sum_k=1^m .5*dist^2(X, A(:, :, k))
%         The distance is defined by the natural metric on the set of
%         positive definite matrices: dist(X,Y) = norm(logm(X\Y), 'fro').
% 
% This simple example is not the best way to compute Karcher means. Its
% purpose it to serve as base code to explore other algorithms. In
% particular, in the presence of large noise, this algorithm seems to not
% be able to reach points with a very small gradient norm. This may be
% caused by insufficient accuracy in the gradient computation.

% This file is part of Manopt and is copyrighted. See the license file.
% 
% Main author: Nicolas Boumal, Sept. 3, 2013
% Contributors:
% 
% Change log:
% 
    
    % Generate some random data to test the function if none is given.
    if ~exist('A', 'var') || isempty(A)
        n = 5;
        m = 50;
        A = zeros(n, n, m);
        ref = diag(max(.1, 1+.1*randn(n, 1)));
        for i = 1 : m
            noise = 0.01*randn(n);
            noise = (noise + noise')/2;
            [V, D] = eig(ref + noise);
            A(:, :, i) = V*diag(max(.01, diag(D)))*V';
        end
    end
    
    % Retrieve the size of the problem:
    % There are m matrices of size nxn to average.
    n = size(A, 1);
    m = size(A, 3);
    assert(n == size(A, 2), ...
           ['The slices of A must be square, i.e., the ' ...
	        'first and second dimensions of A must be equal.']);
    
    % Our search space is the set of positive definite matrices of size n.
    % Notice that this is the only place we specify on which manifold we
    % wish to compute Karcher means. Replacing this factory for another
    % geometry will yield code to compute Karcher means on that other
    % manifold, provided that manifold is equipped with a dist function and
    % a logarithmic map log.
    M = sympositivedefinitefactory(n);
    
    % Define a problem structure, specifying the manifold M, the cost
    % function and its gradient.
    problem.M = M;
    problem.cost = @cost;
    problem.grad = @grad;
    
    % Explicitly pick an approximate Hessian for the trust-region method
    problem.approxhess = approxhessianFD(problem, struct('stepsize', 1e-4));
    
    % The functions below make many redundant computations. This
    % performance hit can be alleviated by using the caching system. We go
    % for a simple implementation here, as a tutorial example.
    
    % Cost function
    function f = cost(X)
        f = 0;
        for k = 1 : m
            f = f + M.dist(X, A(:, :, k))^2;
        end
        f = f/(2*m);
    end

    % Riemannian gradient of the cost function
    function g = grad(X)
        g = M.zerovec(X);
        for k = 1 : m
            % Update g in a linear combination of the form
            % g = g - [something]/m.
            g = M.lincomb(X, 1, g, -1/m, M.log(X, A(:, :, k)));
        end
    end
    
    % Execute some checks on the derivatives for early debugging.
    % These things can be commented out of course.
    % The slopes should agree on part of the plot at least. In this case,
    % it is sometimes necessary to inspect the plot visually to make the
    % call, but it is indeed correct.
    % checkgradient(problem);
    % pause;
    
    % Execute this if you want to force using a proper parallel vector
    % transport. This is not necessary. If you omit this, the default
    % vector transport is the identity map, which is (of course) cheaper
    % and seems to perform well in practice.
    % M.transp = M.paralleltransp;
    
    % Issue a call to a solver. Default options are selected.
    % Our initial guess is the first data point. Most solvers work well
    % with this problem. Limited-memory BFGS is one good example:
    X = rlbfgs(problem, A(:, :, 1));

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/radio_interferometric_calibration.m
================================================
function xsol = radio_interferometric_calibration(N, K)
% Returns the gain matrices of N stations with K receivers.
%
% function xsol = radio_interferometric_calibration(N, K)
%
% N >= K is always assumed.
%
% The example considers calibration of an array of N stations.
% We simulate a system with N stations, each having K receivers.
% For radio astronomy, K = 2.
%
% For a detailed exposition of the problem at hand, refer to the paper:
% "Radio interferometric calibration using a Riemannian manifold",
% Sarod Yatawatta, ICASSP, 2013.
% Available at http://dx.doi.org/10.1109/ICASSP.2013.6638382.
%
% The source of the signal is unpolarized (given by the matrix C).
% The measured data is the cross correlation of the signals at each receiver.
% So there will be N(N-1)/2 possible cross correlations.
% Noise with given SNR is added to the signal.
%
% The objective is to estimate the gains of each receiver (K x K) matrix,
% so the total size of the solutions is N x (K x K), which is written
% as an NK x K matrix.
%
% Note: each station gain matrix (KxK) can have a KxK unitary ambiguity,
% therefore we use the quotient manifold structure. The unitary ambiguity 
% is common to all stations, so the solution obtained by 
% optimization routine always has an unkown unitary matrix that makes the 
% solution different from the true solution.
%

% This file is part of Manopt: www.manopt.org.
% Original author: Sarod Yatawatta, June 29, 2015.
% Contributors: Bamdev Mishra.
% Change log:
%    
%   June 28, 2016 (BM):
%       Modified the egrad and ehess operations according to 
%       the modified metric in the symfixedrankYYcomplexfactory file, 
%       where a factor of 2 was removed from the metric. Accordingly, 
%       a factor of 2 was added to egrad and ehess operations.
    
    % Generate some random data to test the function
    
    if ~exist('N', 'var') || isempty(N)
        N = 10; 
    end
    if ~exist('K', 'var') || isempty(K)
        K = 2; 
    end
    
    assert(N >= K, 'N must be larger than or equal to K.');
    
    % Baselines (pairs of correlations)
    B = N*(N-1)/2;
    
    
    % Source coherence, at phase center
    C = eye(K);
    
    % Random J (gains) of all stations
    J = 0.2*rand(K*N,K) + 1i*rand(K*N,K);
 
    % Visibilities (cross correlations)
    V = zeros(K*B,K);
    
    ck = 1;
    for ci = 1 : N -1,
        for cj = ci + 1 : N,
            % Compute cross correlation of each receiver pair.
            V(K*(ck-1)+1:K*ck,:) = J(K*(ci-1)+1:K*ci,:)*C*J(K*(cj-1)+1:K*cj,:)';
            ck = ck + 1;
        end
    end
    
    % Generate noise
    SNR = 10000;% inf;
    nn = randn(K*B,K)+1i*randn(K*B,K);
    noise_var = norm(V)^2/(norm(nn)^2*SNR);
    nn = nn*sqrt(noise_var);
    
    % Add noise to signal
    V = V + nn;
    
    
    % Optimization part by creating the problem structure.
    % First, we use the manifold desctription.
    % Second, we define the problem cost, gradient and Hessian functions.
   
    
    % Manifold description
    % Note that the actual dimension is KN x K.
    problem.M = symfixedrankYYcomplexfactory(K*N, K);
    
    
    % Cost function
    problem.cost = @cost;
    function fval = cost(x)
        fval = 0.0;
        ck = 1;
        for p = 1 : N - 1,
            for q = p + 1 : N,
                res = V(K*(ck-1)+1:K*ck,:) - x(K*(p-1)+1:K*p,:)*C*x(K*(q-1)+1:K*q,:)'; % Residual
                fval = fval + real(res(:)'*res(:)); % Add norm of the residual.
                ck = ck + 1;
            end
        end
    end
    
    % Euclidean gradient of the cost function.
    % Manopt automatically converts it to the Riemannian couterpart.
    % The code involves for-loops for readability, but could be vectorized
    % for improved speed.
    problem.egrad = @egrad;
    function grad = egrad(x)
        grad = zeros(K*N, K);
        ck = 1;
        for p = 1 : N - 1,
            for q = p+1 : N,
                res = 2*(V(K*(ck-1)+1:K*ck,:) - x(K*(p-1)+1:K*p,:)*C*x(K*(q-1)+1:K*q,:)'); % Residual
                grad(K*(p-1)+1:K*p,:) = grad(K*(p-1)+1:K*p,:) - res*x(K*(q-1)+1:K*q,:)*C';
                grad(K*(q-1)+1:K*q,:) = grad(K*(q-1)+1:K*q,:) - res'*x(K*(p-1)+1:K*p,:)*C;
                ck = ck + 1;
            end
        end
    end
    
    % Euclidean Hessian of the cost function along a search direction eta.
    % Manopt automatically converts it to the Riemannian couterpart.
    problem.ehess = @ehess;
    function hess = ehess(x, eta)
        hess = zeros(K*N, K);
        ck = 1;
        for p = 1 : N-1,
            for q = p+1:N,
                res = 2*(V(K*(ck-1)+1:K*ck,:) -x(K*(p-1)+1:K*p,:)*C*x(K*(q-1)+1:K*q,:)'); % Residual
                resdot = 2*(-x(K*(p-1)+1:K*p,:)*C*eta(K*(q-1)+1:K*q,:)'  - eta(K*(p-1)+1:K*p,:)*C*x(K*(q-1)+1:K*q,:)'); % Residual derivative
                
                hess(K*(p-1)+1:K*p,:) = hess(K*(p-1)+1:K*p,:) - (res*eta(K*(q-1)+1:K*q,:) + resdot*x(K*(q-1)+1:K*q,:))*C';
                hess(K*(q-1)+1:K*q,:) = hess(K*(q-1)+1:K*q,:) - (res'*eta(K*(p-1)+1:K*p,:) + resdot'*x(K*(p-1)+1:K*p,:))*C;
                ck = ck + 1;
            end
        end
    end
    
    
    % Execute some checks on the derivatives for early debugging.
    % checkgradient(problem);
    % pause;
    % checkhessian(problem);
    % pause;
    
    
    % Solve.
    [xsol,  xcost,  info] = trustregions(problem); 
    fprintf('Final cost: %g.\n', xcost);
    
    
    % Display some statistics.
    fs = 11;
    figure;
    semilogy([info.iter], [info.gradnorm], 'o-.','Color','blue', 'MarkerSize',6, 'LineWidth',1.1);
    ax1 = gca;
    set(ax1,'FontSize',fs);
    xlabel(ax1, 'Iteration #', 'FontSize',fs);
    ylabel(ax1, 'Gradient norm', 'FontSize',fs);
    title('Convergence of the trust-regions algorithm');

    % Make a plot of estimation error (only for K = 2).
    if K == 2,
        % Find unitary ambiguity first by solving min ||J - xsol U||.
        % This has a closed-form solution.
        [u, ignore, v] = svd(xsol'*J); %#ok<ASGLU>

        % Error in position
        E = J - xsol*u*v'; 

        % Normalize error
        E = E/norm(J);

        % Plot
        figure;
        ax1 = subplot(1,2,1);
        quiver(real(J(:,1)), imag(J(:,1)),real(E(:,1)),imag(E(:,1)));
        hold all;
        scatter(real(J(:,1)), imag(J(:,1)));
        set(ax1,'FontSize',fs);
        xlabel('Real E_1');
        ylabel('Imag E_1');
        title('Position error 1st coordinate'); 
        axis equal;
        ax2 = subplot(1,2,2);
        quiver(real(J(:,2)),imag(J(:,2)),real(E(:,2)),imag(E(:,2)));
        hold all;
        scatter(real(J(:,2)),imag(J(:,2)));
        set(ax2,'FontSize',fs);
        xlabel('Real E_2');
        ylabel('Imag E_2');
        title('Position error 2nd coordinate'); 
        axis equal;
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/robust_pca.m
================================================
function [U, cost] = robust_pca(X, d)
% Computes a robust version of PCA (principal component analysis) on data.
% 
% function [U, cost] = robustpca(X, d)
%
% Given a matrix X of size p by n, such that each column represents a
% point in R^p, this computes U: an orthonormal basis of size p by d such
% that the column space of U captures the points X as well as possible.
% More precisely, the function attempts to compute U as the minimizer
% over the Grassmann manifold (the set of linear subspaces) of:
%
%  f(U) = (1/n) Sum_{i = 1:n} dist(X(:, i), the space spanned by U)
%       = (1/n) Sum_{i = 1:n} || U*U'*X(:, i) - X(:, i) ||
%
% The output cost represents the average distance achieved with the
% returned U. Notice that norms are not squared, for robustness.
%
% In practice, because this function is nonsmooth, it is smoothed with a
% pseudo-Huber loss function of parameter epsilon (noted e for short), and
% the smoothing parameter is iteratively reduced (with warm starts):
%
%   f_e(U) = (1/n) Sum_{i = 1:n} l_e(|| U*U'*X(:, i) - X(:, i) ||)
%
%   with l_e(x) = sqrt(x^2 + e^2) - e (for e = 0, this is absolute value).
%
% The intermediate optimization of the smooth cost over the Grassmann
% manifold is performed using the Manopt toolbox.
%
% Ideally, the non-outlier data should be centered. If not, this
% pre-processing centers all the data, but bear in mind that outliers will
% shift the center of mass too.
% X = X - repmat(mean(X, 2), [1, size(X, 2)]);
%
% There are no guarantees that this code will return the optimal U.
% This code is distributed to illustrate one possible way of optimizing
% a nonsmooth cost function over a manifold, using Manopt with smoothing.
% For practical use, the constants in the code would need to be tuned.

% This file is part of Manopt and is copyrighted. See the license file.
%
% Main author: Nicolas Boumal and Teng Zhang, May 2, 2014
% Contributors:
%
% Change log:
%
%   March 4, 2015 (NB):
%       Uses a pseudo-Huber loss rather than a Huber loss: this has the
%       nice advantage of being smooth and simpler to code (no if's).
%
%   April 8, 2015 (NB):
%       Built-in test data for quick tests; added comment about centering.


    % If no inputs, generate random data for illustration purposes.
    if nargin == 0
        % Generate some data points aligned on a subspace
        X = rand(2, 1)*(1:30) + .05*randn(2, 30).*[(1:30);(1:30)];
        % And add some random outliers to the mix
        P = randperm(size(X, 2));
        outliers = 10;
        X(:, P(1:outliers)) = 30*randn(2, outliers);
        % Center the data
        % X = X - repmat(mean(X, 2), [1, size(X, 2)]);
        d = 1;
    end


    % Prepare a Manopt problem structure for optimization of the given
    % cost (defined below) over the Grassmann manifold.
    [p, n] = size(X);
    manifold = grassmannfactory(p, d);
    problem.M = manifold;
    problem.cost = @robustpca_cost;
    problem.egrad = @robustpca_gradient;
	
	% Do classical PCA for the initial guess.
	% This is just one idea: it is not necessarily useful or ideal.
    % Using a random initial guess, and starting over for a few different
    % ones is probably much better. For this example, we keep it simple.
    [U, ~, ~] = svds(X, d);

    
	% Iteratively reduce the smoothing constant epsilon and optimize
	% the cost function over Grassmann.
    epsilon = 1;
	n_iterations = 6;
	reduction = .5;
	options.verbosity = 2; % Change this number for more or less output
    warning('off', 'manopt:getHessian:approx');
    for iter = 1 : n_iterations
        U = trustregions(problem, U, options);
        epsilon = epsilon * reduction;
    end
    warning('on', 'manopt:getHessian:approx');
    
    
	% Return the cost as the actual sum of distances, not smoothed.
	epsilon = 0;
	cost = robustpca_cost(U);
    
    
    % If working with the auto-generated input, plot the results.
    if nargin == 0
        scatter(X(1,:), X(2,:));
        hold on;
        plot(U(1)*[-1, 1]*100, U(2)*[-1 1]*100, 'r');
        hold off;
        % Compare to a standard PCA
        [Upca, ~, ~] = svds(X,1);
        hold on;
        plot(Upca(1)*[-1, 1]*100, Upca(2)*[-1 1]*100, 'k');
        hold off;
        xlim(1.1*[min(X(1,:)), max(X(1,:))]);
        ylim(1.1*[min(X(2,:)), max(X(2,:))]);
        legend('data points', 'Robust PCA fit', 'Standard PCA fit');
    end

    
    % Smoothed cost
    function value = robustpca_cost(U)

        vecs = U*(U'*X) - X;
        sqnrms = sum(vecs.^2, 1);
        vals = sqrt(sqnrms + epsilon^2) - epsilon;
        value = mean(vals);

    end

    % Euclidean gradient of the smoothed cost (it will be transformed into
    % the Riemannian gradient automatically by Manopt).
    function G = robustpca_gradient(U)

		% Note that the computation of vecs and sqnrms is redundant
		% with their computation in the cost function. To speed
		% up the code, it would be wise to use the caching capabilities
		% of Manopt (the store structure). See online documentation.
		% It is not done here to keep the code a bit simpler.
        UtX = U'*X;
        vecs = U*UtX-X;
        sqnrms = sum(vecs.^2, 1);
        % This explicit loop is a bit slow: the code below is equivalent
        % and faster to compute the gradient.
        % G = zeros(p, d);
        % for i=1:n
        %     G = G + (1/sqrt(sqnrms(i) + epsilon^2)) * vecs(:,i) * UtX(:,i)';
        % end
        % G = G/n;
        G = mean(multiscale(1./sqrt(sqnrms + epsilon^2), ...
                           multiprod(reshape(vecs, [p, 1, n]), ...
                              multitransp(reshape(UtX, [d, 1, n])))), 3);
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/shapefit_smoothed.m
================================================
function [T_hub, T_lsq, T_cvx] = shapefit_smoothed(V, J)
% ShapeFit formulation for sensor network localization from pair directions
%
% function [T_hub, T_lsq, T_cvx] = shapefit_smoothed(V, J)
%
% This example in based on the paper http://arxiv.org/abs/1506.01437:
% ShapeFit: Exact location recovery from corrupted pairwise directions, 2015
% by Paul Hand, Choongbum Lee and Vladislav Voroninski.
%
% The problem is the following: there are n points t_1, ..., t_n in R^d
% which need to be estimated (localized). To this end, we are given
% measurements of some of the pairwise directions,
% v_ij = (t_i - t_j) / norm(t_i - t_j) + noise.
% Assume there are m such pairwise measurements, defining a graph with m
% edges over n nodes. J is the signed incidence matrix of this graph (see
% in code). To build J from lists I, J in R^m of nodes, use:
% J = sparse([I ; J], [(1:m)' ; (1:m)'], [ones(m, 1), -ones(m, 1)], n, m, 2*m);
%
% The measurements are arranged in the matrix V of size d x m. From V, we
% attempt to estimate t_1, ..., t_n, arranged in T, a matrix of size d x n.
% The estimation can only be done up to translation and scaling. The
% returned T's are centered: the columns sum to zero.
%
% ShapeFit is a formulation of this estimation problem which is robust to
% outliers. It is a nonsmooth, convex optimization problem over an affine
% space, i.e., a linear manifold. We smooth the cost using the pseudo-Huber
% loss cost and solve the problem using Manopt. This requires two
% ingredients: (1) a factory to describe the affine space, see
% shapefitfactory; (2) defining the cost and its derivative, and minimizing
% it while progressively tightening the smooth approximation (with
% warm-start).
%
% Simply run the example to see the results on random data. It compares the
% smoothed ShapeFit formulation against a least-squares formulation, when
% the measurements include outliers. See in code to vary the noise
% parameters, dimension d, number of nodes n, number of measurements m, ...
%
% Note: since the problem is convex, this returns the global optimum.
% This example also illustrates the use of Manopt for optimization under
% linear constraints: admittedly a simple subcase of optimization on
% manifolds.
%
%
% See also: shapefitfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 18, 2015.
% Contributors: 
% Change log: 


    % Generic useful functions
    center_cols = @(A) bsxfun(@minus, A, mean(A, 2));
    normalize_cols = @(A) bsxfun(@times, A, 1./sqrt(sum(A.^2, 1)));
    sqnorm_cols = @(A) sum(A.^2, 1);

    
    % DATA GENERATION
    %
    % If no inputs are specified, generate some random data for
    % illustration purposes.
    if nargin == 0

        % We estimate n points in R^d
        d =   2;
        n = 500;

        % Those points are the columns of T : they are what we need to
        % estimate, up to scaling and translation. We center T for
        % convenience.
        T_tru = center_cols(rand(d, n));

        % We get a measurement of some pairs of relative directions.
        % Which pairs is encoded in this graph, with J being the (signed,
        % transposed) incidence matrix. J is n x m, sparse.
        % There are roughly edge_fraction * n * (n-1) / 2 measurements.
        edge_fraction = 0.10;
        % [ii, jj] = erdosrenyi(n, edge_fraction);
        [ii, jj] = randomgraph(n, edge_fraction*nchoosek(n, 2));
        m = length(ii);
        J = sparse([ii ; jj], [(1:m)' ; (1:m)'], ...
                   [ones(m, 1), -ones(m, 1)], n, m, 2*m);

        % The measurements give the directions from one point to another.
        % That is: we get the position difference, normalized. Here, with
        % Gaussian noise. Least-squares will be well-suited for this.
        sigma = .0;
        V = normalize_cols(T_tru*J + sigma*randn(d, m)); % d x m

        % Outliers: we replace some of the direction measurements by
        % uniformly random unit-norm vectors.
        outlier_fraction = .3;
        outliers = rand(1, m) < outlier_fraction;
        V(:, outliers) = normalize_cols(randn(d, sum(outliers)));
        
    end % done generating random data
    
    
    [d, m] = size(V);
    n = size(J, 1);
    assert(size(J, 2) == m, 'J must be n x m, with V of size d x m.');

    VJt = full(V*J');

    % This "manifold" describes the Euclidean space of matrices T of size
    % d x n such that <VJt, T> = 1 and T has centered columns: T1 = 0.
    problem.M = shapefitfactory(VJt);

    % This linear operator computes the orthogonal projection of each
    % difference ti - tj on the orthogonal space to v_ij.
    % If the alignment is compatible with the data, then this is zero.
    % A(T) is a d x m matrix.
    function AT = A(T)
        TJ = T*J;
        AT = TJ - bsxfun(@times, V, sum(V .* TJ, 1));
    end

    % Need the adjoint of A, too. Input is d x m, output is d x n.
    Astar = @(W) (W - bsxfun(@times, V, sum(V.*W, 1)))*J';

    
    % LEAST-SQUARES
    %
    % First, work with a least-squares formulation of the problem.
    % That is, we minimize a (very nice) convex cost over an affine space.
    % Since the smooth solvers in Manopt converge to critical points, this
    % means they converge to global optimizers.
    problem.cost  = @(T) 0.5*norm(A(T), 'fro')^2;
    problem.egrad = @(T) Astar(A(T));
    problem.ehess = @(T, Tdot) Astar(A(Tdot));

    T_lsq = trustregions(problem);
    

    % PSEUDO-HUBER SMOOTHED SHAPEFIT
    %
    % Now solve the same, but with a pseudo-Huber loss instead of
    % least-squares.
    % We iteratively sharpen the Huber function, i.e., reduce delta.
    % It is important to warm start in such a fashion: trying to optimize
    % with a random initial guess and a very small delta is typically slow.
    % How fast one should decrease delta, and how accurately one should
    % optimize at each intermediate stage, is open for research.
    delta = 1;
    T_hub = []; % We could use T_lsq as initial guess, too.
    problem = rmfield(problem, 'ehess');
    warning('off', 'manopt:getHessian:approx');
    for iter = 1 : 12
        
        delta = delta / 2;
        
        h = @(x2) sqrt(x2 + delta^2) - delta; % pseudo-Huber loss

        problem.cost  = @(T) sum(h(sqnorm_cols(A(T))));
        problem.egrad = @(T) Astar(bsxfun(@times, A(T), ...
                                    1./sqrt(sqnorm_cols(A(T)) + delta^2)));

        % Solve, using the previous solution as initial guess.
        T_hub = trustregions(problem, T_hub);
        
    end
    
    
    % CVX SHAPEFIT
    %
    % Actual ShapeFit cost (nonsmooth), with CVX.
    % You can get CVX from http://cvxr.com/.
    use_cvx_if_available = false;
    if use_cvx_if_available && exist('cvx_version', 'file')
        T_cvx = shapefit_cvx(V, J);
    else
        T_cvx = NaN(d, n);
    end
    
    
    % VISUALIZATION
    %
    % If T_true is available, for display, we scale the estimators to match
    % the norm of the target. The scaling factor is obtained by minimizing
    % the norm of the discrepancy : norm(T_tru - scale*T_xxx, 'fro').
    % A plot is produced if d is 2 or 3.
    if exist('T_tru', 'var') && (d == 2 || d == 3)
        
        T_lsq = T_lsq * trace(T_tru'*T_lsq) / norm(T_lsq, 'fro')^2;
        T_hub = T_hub * trace(T_tru'*T_hub) / norm(T_hub, 'fro')^2;
        T_cvx = T_cvx * trace(T_tru'*T_cvx) / norm(T_cvx, 'fro')^2;

    
        switch d
            case 2
                plot(T_tru(1, :), T_tru(2, :), 'bo', ...
                     T_lsq(1, :), T_lsq(2, :), 'rx', ...
                     T_hub(1, :), T_hub(2, :), 'k.', ...
                     T_cvx(1, :), T_cvx(2, :), 'g.');
            case 3
                plot3(T_tru(1, :), T_tru(2, :), T_tru(3, :), 'bo', ...
                      T_lsq(1, :), T_lsq(2, :), T_lsq(3, :), 'rx', ...
                      T_hub(1, :), T_hub(2, :), T_hub(3, :), 'k.', ...
                      T_cvx(1, :), T_cvx(2, :), T_cvx(3, :), 'g.');
        end

        legend('ground truth', 'least squares', ...
               sprintf('pseudo-huber, \\delta = %.1e', delta), ...
               'CVX ShapeFit');
           
        title(sprintf(['ShapeFit problem : d = %d, n = %d, edge ' ...
                       'fraction = %.2g, sigma = %.2g, outlier ' ...
                       'fraction = %.2g'], d, n, edge_fraction, sigma, ...
                       outlier_fraction));
        axis equal;
    
    end

end


% If CVX is available, it can be used to solve the nonsmooth problem
% directly, very elegantly.
function T_cvx = shapefit_cvx(V, J)
    d = size(V, 1);
    n = size(J, 1); %#ok<NASGU>
    VJt = full(V*J');
    cvx_begin
        variable T_cvx(d, n)
        % We want to minimize this:
        % minimize sum( norms( A(T_cvx), 2, 1 ) )
        % But unfortunately, CVX doesn't handle bsxfun. Instead, we use
        % repmat, which is slower, and hence hurts the comparison in
        % disfavor of CVX.
        minimize sum( norms( T_cvx*J - V .* repmat(sum(V .* (T_cvx*J), 1), [d, 1])  , 2, 1 ) )
        sum(T_cvx, 2) == zeros(d, 1); %#ok<NODEF,EQEFF>
        VJt(:).' * T_cvx(:) == 1; %#ok<EQEFF>
    cvx_end
end


function [I, J, A] = erdosrenyi(n, p) %#ok<DEFNU>
% Generate a random Erdos-Renyi graph with n nodes and edge probability p.
%
% [I, J, A] = erdosrenyi(n, p)
% 
% Returns a list of edges (I(k), J(k)) for a random, undirected Erdos-Renyi
% graph with n nodes and edge probability p. A is the adjacency matrix.
%
% I(k) < J(k) for all k, i.e., all(I<J) is true.
%
% The memory requirements for this simple implementation scale as O(n^2).

    X = rand(n);
    mask = X <= p;
    X( mask) = 1;
    X(~mask) = 0;
    X = triu(X, 1);

    % A is the adjacency matrix
    A = X + X';
    
    [I, J] = find(X);

end


function [I, J, A] = randomgraph(n, m)
% Generates a random graph over n nodes with at most m edges.
%
% function [I, J, A] = randomgraph(n, m)
%
% Selects m (undirected) edges from a graph over n nodes, uniformly at
% random, with replacement. The self edges and repeated edges are then
% discarded. The remaining number of edges is at most m, and should be
% close to m if m is much smaller than nchoosek(n, 2).
%
% The output satisfies all(I < J). A is the corresponding adjacency matrix.
%
% Uses O(m) memory (not O(n^2)), making it fit for large, sparse graphs.

    % Generate m edges at random, with replacement, and remove repetitions.
    IJ = unique(sort(randi(n, m, 2), 2), 'rows');
    
    % Remove self-edges if any.
    IJ = IJ(IJ(:, 1) ~= IJ(:, 2), :);
    
    % Actual number of selected edges
    k = size(IJ, 1);
    
    I = IJ(:, 1);
    J = IJ(:, 2);
    
    % Build the adjacency matrix of the graph.
    A = sparse([I ; J], [J ; I], ones(2*k, 1), n, n, 2*k);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/sparse_pca.m
================================================
function [Z, P, X, A] = sparse_pca(A, m, gamma)
% Sparse principal component analysis based on optimization over Stiefel.
%
% [Z, P, X] = sparse_pca(A, m, gamma)
%
% We consider sparse PCA applied to a data matrix A of size pxn, where p is
% the number of samples (observations) and n is the number of variables
% (features). We attempt to extract m different components. The parameter
% gamma, which must lie between 0 and the largest 2-norm of a column of
% A, tunes the balance between best explanation of the variance of the data
% (gamma = 0, mostly corresponds to standard PCA) and best sparsity of the
% principal components Z (gamma maximal, Z is zero). The variables
% contained in the columns of A are assumed centered (zero-mean).
%
% The output Z of size nxm represents the principal components. There are m
% columns, each one of unit norm and capturing a prefered direction of the
% data, while trying to be sparse. P has the same size as Z and represents
% the sparsity pattern of Z. X is an orthonormal matrix of size pxm
% produced internally by the algorithm.
%
% With classical PCA, the variability captured by m components is
% sum(svds(A, m))
% With the outputted Z, which should be sparser than normal PCA, it is
% sum(svd(A*Z))
%
% The method is based on the maximization of a differentiable function over
% the Stiefel manifold of dimension pxm. Notice that this dimension is
% independent of n, making this method particularly suitable for problems
% with many variables but few samples (n much larger than p). The
% complexity of each iteration of the algorithm is linear in n as a result.
%
% The theory behind this code is available in the paper
% http://jmlr.org/papers/volume11/journee10a/journee10a.pdf
% Generalized Power Method for Sparse Principal Component Analysis, by
% Journee, Nesterov, Richtarik and Sepulchre, JMLR, 2010.
% This implementation is not equivalent to the one described in that paper
% (and is independent from their authors) but is close in spirit
% nonetheless. It is provided with Manopt as an example file but was not
% optimized: please do not judge the quality of the algorithm described by
% the authors of the paper based on this implementation.

% This file is part of Manopt and is copyrighted. See the license file.
% 
% Main author: Nicolas Boumal, Dec. 24, 2013
% Contributors:
% 
% Change log:
% 

    % If no input is provided, generate random data for a quick demo
    if nargin == 0
        n = 100;
        p = 10;
        m = 2;

        % Data matrix
        A = randn(p, n);

        % Regularization parameter. This should be between 0 and the largest
        % 2-norm of a column of A.
        gamma = 1;
        
    elseif nargin ~= 3
        error('Please provide 3 inputs (or none for a demo).');
    end
    
    % Execute the main algorithm: it will compute a sparsity pattern P.
    [P, X] = sparse_pca_stiefel_l1(A, m, gamma);
    
    % Compute the principal components in accordance with the sparsity.
    Z = postprocess(A, P, X);

end


% Sparse PCA based on the block sparse PCA algorithm with l1-penalty as
% featured in the reference paper by Journee et al. This is not the same
% algorithm but it is the same cost function optimized over the same search
% space. We force N = eye(m).
function [P, X] = sparse_pca_stiefel_l1(A, m, gamma)
    
    [p, n] = size(A); %#ok<NASGU>

    % The optimization takes place over a Stiefel manifold whose dimension
    % is independent of n. This is especially useful when there are many
    % more variables than samples.
    St = stiefelfactory(p, m);
    problem.M = St;

    % In this helper function, given a point 'X' on the manifold we check
    % whether the caching structure 'store' has been populated with
    % quantities that are useful to compute at X or not. If they were not,
    % then we compute and store them now.
    function store = prepare(X, store)
        if ~isfield(store, 'ready') || ~store.ready
            store.AtX = A'*X;
            store.absAtX = abs(store.AtX);
            store.pos = max(0, store.absAtX - gamma);
            store.ready = true;
        end
    end

    % Define the cost function here and set it in the problem structure.
    problem.cost = @cost;
    function [f, store] = cost(X, store)
        store = prepare(X, store);
        pos = store.pos;
        f = -.5*norm(pos, 'fro')^2;
    end

    % Here, we chose to define the Euclidean gradient (egrad instead of
    % grad) : Manopt will take care of converting it to the Riemannian
    % gradient.
    problem.egrad = @egrad;
    function [G, store] = egrad(X, store)
        if ~isfield(store, 'G')
            store = prepare(X, store);
            pos = store.pos;
            AtX = store.AtX;
            sgAtX = sign(AtX);
            factor = pos.*sgAtX;
            store.G = -A*factor;
        end
        G = store.G;
    end

    % checkgradient(problem);
    % pause;

    % The optimization happens here. To improve the method, it may be
    % interesting to investigate better-than-random initial iterates and,
    % possibly, to fine tune the parameters of the solver.
    X = trustregions(problem);

    % Compute the sparsity pattern by thresholding
    P = abs(A'*X) > gamma;
    
end


% This post-processing algorithm produces a matrix Z of size nxm matching
% the sparsity pattern P and representing sparse principal components for
% A. This is to be called with the output of the main algorithm. This
% algorithm is described in the reference paper by Journee et al.
function Z = postprocess(A, P, X)
    fprintf('Post-processing... ');
    counter = 0;
    maxiter = 1000;
    tolerance = 1e-8;
    while counter < maxiter
        Z = A'*X;
        Z(~P) = 0;
        Z = Z*diag(1./sqrt(diag(Z'*Z)));
        X = ufactor(A*Z);
        counter = counter + 1;
        if counter > 1 && norm(Z0-Z, 'fro') < tolerance*norm(Z0, 'fro')
            break;
        end
        Z0 = Z;
    end
    fprintf('done, in %d iterations (max = %d).\n', counter, maxiter);
end

% Returns the U-factor of the polar decomposition of X
function U = ufactor(X)
    [W, S, V] = svd(X, 0); %#ok<ASGLU>
    U = W*V';
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/thomson_problem.m
================================================
function X = thomson_problem(n, d)
% Simple attempt at computing n well distributed points on a sphere in R^d.
% 
% This is an example of how Manopt can approximate the gradient and even
% the Hessian of a cost function based on finite differences, even if only
% the cost function is specified without its derivatives.
%
% This functionality is provided only as a help for prototyping, and should
% not be used to compare algorithms in terms of computation time or
% accuracy, because the underlying gradient approximation scheme is slow.
%
% See also the derivative free solvers for an alternative:
% pso and neldermead.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Nov. 1, 2016
% Contributors:
% Change log:

if ~exist('n', 'var') || isempty(n)
    n = 50;
end
if ~exist('d', 'var') || isempty(d)
    d = 3;
end

% Define the Thomson problem with 1/r^2 potential. That is: find n points
% x_i on a sphere in R^d such that the sum over all pairs (i, j) of the
% potentials 1/||x_i - x_j||^2 is minimized. Since the points are on a
% sphere, each potential is .5/(1-x_i'*x_j).
problem.M = obliquefactory(d, n);
problem.cost = @(X) sum(sum(triu(1./(1-X'*X), 1))) / n^2;

% Attempt to minimize the cost. Since the gradient is not provided, Manopt
% approximates it with finite differences. This is /slow/, since for each
% gradient approximation, problem.M.dim()+1 calls to the cost function are
% necessary, on top of generating an orthonormal basis of the tangent space
% at each iterate.
%
% Note that it is difficult to reach high accuracy critical points with an
% approximate gradient, hence it may be required to set a less ambitious
% value for the gradient norm tolerance.
opts.tolgradnorm = 1e-4;

% Pick a solver. Both work fairly well on this problem.
% X = conjugategradient(problem, [], opts);
X = rlbfgs(problem, [], opts);

% Plot the points on a translucid sphere
if nargout == 0 && d == 3
    [x, y, z] = sphere(50);
    surf(x, y, z, 'FaceAlpha', .5);
    hold all;
    plot3(X(1, :), X(2, :), X(3, :), '.', 'MarkerSize', 20);
    axis equal;
    box off;
    axis off;
end

% For much better performance, after an early prototyping phase, the
% gradient of the cost function should be specified, typically in
% problem.grad or problem.egrad. See the online document at
%
%   http://www.manopt.org
%
% for more information.

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/examples/truncated_svd.m
================================================
function [U, S, V, info] = truncated_svd(A, p)
% Returns an SVD decomposition of A truncated to rank p.
%
% function [U, S, V, info] = truncated_svd(A, p)
%
% Input: A real matrix A of size mxn and an integer p <= min(m, n).
% Output: An orthonormal matrix U of size mxp, an orthonormal matrix Y of
%         size nxp and a diagonal matrix S of size pxp with nonnegative and
%         decreasing diagonal entries such that USV.' is the best rank p
%         approximation of A according to the Frobenius norm. All real.
%         This function produces an output akin to svds.
% 
% The decomposition is obtained by maximizing
%   f(U, V) = .5*norm(U'*A*V, 'fro')^2
% where U, V are orthonormal. Notice that f(U*Q, V*R) = f(U, V) for all
% Q, R orthogonal pxp matrices. Hence, only the column spaces of U and V
% matter and we may perform the optimization over a product of two
% Grassmannian manifolds.
%
% It is easy to show that maximizing f is equivalent to minimizing g with
%   g(U, V) = min_S norm(U*S*V' - A, 'fro')^2,
% which confirms that we are going for a best low-rank approximation of A.
% 
% The inner workings of the Grassmann manifold use the built-in svd
% function of Matlab but only for matrices of size mxp and nxp to
% re-orthonormalize them.
% 
% Notice that we are actually chasing a best fixed-rank approximation of a
% matrix, which is best obtained by working directly over a manifold of
% fixed-rank matrices. This is simply an example script to demonstrate some
% functionalities of the toolbox.
% 
% The code can be modified to accept a function handle for A(x) = A*x
% instead of a matrix A, which is often useful. This would further require
% a function handle At for the transpose of A, such that At(x) = A.'*x.

% This file is part of Manopt and is copyrighted. See the license file.
% 
% Main author: Nicolas Boumal, July 5, 2013
% Contributors:
% 
% Change log:
% 

    
    % Generate some random data to test the function if none is given.
    if ~exist('A', 'var') || isempty(A)
        A = randn(42, 60);
    end
    if ~exist('p', 'var') || isempty(p)
        p = 5;
    end
    
    % Retrieve the size of the problem and make sure the requested
    % approximation rank is at most the maximum possible rank.
    [m, n] = size(A);
    assert(p <= min(m, n), 'p must be smaller than the smallest dimension of A.');
    
    % Define the cost and its derivatives on the Grassmann manifold
    tuple.U = grassmannfactory(m, p);
    tuple.V = grassmannfactory(n, p);
    % All of the code will work just as well if we ignore the invariance
    % property of the cost function indicated above and thus place U and V
    % on the Stiefel manifold (orthonormal matrices) instead of the
    % Grassmann manifold. Working on Stiefel is expected to be slower
    % though, partly because de search space is higher dimensional and
    % partly because the optimizers are not isolated.
    % tuple.U = stiefelfactory(m, p);
    % tuple.V = stiefelfactory(n, p);
    M = productmanifold(tuple);
    
    % Define a problem structure, specifying the manifold M, the cost
    % function and its derivatives. Here, to demonstrate the rapid
    % prototyping capabilities of Manopt, we directly define the Euclidean
    % gradient and the Euclidean Hessian egrad and ehess instead of the
    % Riemannian gradient and Hessian grad and hess. Manopt will take care
    % of the conversion. This automatic conversion is usually not
    % computationally optimal though, because much of the computations
    % involved in obtaining the gradient could be reused to obtain the
    % Hessian. After the prototyping stage, when efficiency becomes
    % important, it makes sense to define grad and hess rather than egrad
    % an ehess, and to use the caching system (the store structure).
    problem.M = M;
    problem.cost  = @cost;
    problem.egrad = @egrad;
    problem.ehess = @ehess;
    
    % The functions below make many redundant computations. This
    % performance hit can be alleviated by using the caching system.
    
    % Cost function
    function f = cost(X)
        U = X.U;
        V = X.V;
        f = -.5*norm(U'*A*V, 'fro')^2;
    end
    % Euclidean gradient of the cost function
    function g = egrad(X)
        U = X.U;
        V = X.V;
        AV = A*V;
        AtU = A'*U;
        g.U = -AV*(AV'*U);
        g.V = -AtU*(AtU'*V);
    end
    % Euclidean Hessian of the cost function
    function h = ehess(X, H)
        U = X.U;
        V = X.V;
        Udot = H.U;
        Vdot = H.V;
        AV = A*V;
        AtU = A'*U;
        AVdot = A*Vdot;
        AtUdot = A'*Udot;
        h.U = -(AVdot*AV'*U + AV*AVdot'*U + AV*AV'*Udot);
        h.V = -(AtUdot*AtU'*V + AtU*AtUdot'*V + AtU*AtU'*Vdot);
    end
    
    
    % Execute some checks on the derivatives for early debugging.
    % These things can be commented out of course.
    % checkgradient(problem);
    % pause;
    % checkhessian(problem);
    % pause;
    
    % Issue a call to a solver. A random initial guess will be chosen and
    % default options are selected. Here, we specify a maximum trust
    % region radius (which in turn induces an initial trust region radius).
    % Note that this is not required: default values are used if we omit
    % this. The diameter of the manifold scales like sqrt(2*p), hence the
    % form of our (empirical) choice.
    options.Delta_bar = 4*sqrt(2*p);
    [X, Xcost, info] = trustregions(problem, [], options); %#ok<ASGLU>
    U = X.U;
    V = X.V;
    
    % Finish the job by rotating U and V such that the middle matrix S can
    % be diagonal with nonnegative, decreasing entries. This requires a
    % small svd of size pxp.
    Spp = U'*A*V;
    [Upp, Spp, Vpp] = svd(Spp);
    U = U*Upp;
    S = Spp;
    V = V*Vpp;
    
    % For our information, Manopt can also compute the spectrum of the
    % Riemannian Hessian on the tangent space at (any) X. Computing the
    % spectrum at the solution gives us some idea of the conditioning of
    % the problem. If we were to implement a preconditioner for the
    % Hessian, this would also inform us on its performance.
    %
    % Notice that if the optimization is performed on a product of Stiefel
    % manifolds instead of a product of Grassmannians, the double
    % invariance under the orthogonal group O(p) will appear as twice
    % p*(p-1)/2, thus p*(p-1) zero eigenvalues in the spectrum of the
    % Hessian. This means that the minimizers are not isolated, which
    % typically hinders convergence of second order algorithms.
    if M.dim() < 512
        evs = hessianspectrum(problem, X);
        stairs(sort(evs));
        title(['Eigenvalues of the Hessian of the cost function ' ...
               'at the solution']);
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/importmanopt.m
================================================
% Add Manopt to the path to make all manopt components available.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Jan. 3, 2013.
% Contributors: 
% Change log: 
%   Aug.  7, 2013 (NB): Changed to work without the import command
%                       (new structure of the toolbox).
%   Aug.  8, 2013 (NB): Changed to use addpath_recursive, home brewed.
%   Aug. 22, 2013 (NB): Using genpath instead of home cooked
%                       addpath_recursive.

addpath(pwd);

% Recursively add Manopt directories to the Matlab path.
cd manopt;
addpath(genpath(pwd));
cd ..;


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/StoreDB.m
================================================
classdef StoreDB < handle_light
% The StoreDB class is a handle class to manage caching in Manopt.
%
% To create an object, call: storedb = StoreDB();
% Alternatively, call: storedb = StoreDB(storedepth); to instruct
% the database to keep at most storedepth store's in its history.
% (Note that clean up only happens when purge() is called).
%
% The storedb object is passed by reference: when it is passed to a
% function as an input, and that function modifies it, the original
% object is modified.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 3, 2015.
% Contributors: 
% Change log: 

% TODO : protect get/setWithShared calls: limit to one, and forbid access
%        to shared memory while it has not been returned.
%        Do think of the applyStatsFun case : calls a getWithShared, does
%        not need a setWithShared. I think for statsfun there should be a
%        method "forfeitWithShared".
    
    properties(Access = public)
       
        % This memory is meant to be shared at all times. Users can modify
        % this at will. It is the same for all points x.
        shared = struct();
        
        % This memory is used by the toolbox for, e.g., automatic caching
        % and book keeping. Users should not overwrite this. It is the
        % same for all points x.
        internal = struct();
        
        % When calling purge(), only a certain number of stores will be
        % kept in 'history'. This parameter fixes that number. The most
        % recently modified stores are kept. Set to inf to keep all stores.
        storedepth = inf;
        
    end
    
    properties(Access = private)
        
        % This structure holds separate memories for individual points.
        % Use get and set to interact with this. The field name 'shared' is
        % reserved, for use with get/setWithShared.
        history = struct();
        
        % This internal counter is used to obtain unique key's for points.
        counter = uint32(0);
        
        % This internal counter is used to time calls to 'set', and hence
        % keep track of which stores in 'history' were last updated.
        timer = uint32(0);
        
    end
    
    
    methods(Access = public)
        
        % Constructor
        function storedb = StoreDB(storedepth)
            if nargin >= 1
                storedb.storedepth = storedepth;
            end
        end
        
        % Return the store associated to a given key.
        % If the key is unknown, returns an empty structure.
        function store = get(storedb, key)
            if isfield(storedb.history, key)
                store = storedb.history.(key);
            else
                store = struct();
            end
        end
        
        % Same as get, but adds the shared memory in store.shared.
        function store = getWithShared(storedb, key)
            store = storedb.get(key);
            store.shared = storedb.shared;
        end
        
        % Save the given store at the given key. If no key is provided, a
        % new key is generated for this store (i.e., it is assumed this
        % store pertains to a new point). The key is returned in all cases.
        % A field 'lastset__' is added/updated in the store structure,
        % keeping track of the last time that store was modified.
        function key = set(storedb, store, key)
            if nargin < 3
                key = getNewKey(storedb);
            end
            store.lastset__ = storedb.timer;
            storedb.timer = storedb.timer + 1;
            storedb.history.(key) = store;
        end
        
        % Same as set, but extracts the shared memory and saves it.
        % The stored store will still have a 'shared' field, but it will be
        % empty.
        function key = setWithShared(storedb, store, key)
            storedb.shared = store.shared;
            store.shared = [];
            key = storedb.set(store, key);
        end
        
        % Generate a unique key and return it. This should be called
        % everytime a new point is generated / stored. Keys are valid field
        % names for structures.
        function key = getNewKey(storedb)
            key = sprintf('z%d', storedb.counter);
            storedb.counter = storedb.counter + 1;
        end
        
        % Clear entries in storedb.history to limit memory usage.
        function purge(storedb)
            
            if isinf(storedb.storedepth)
                return;
            end
            
            if storedb.storedepth <= 0
                storedb.history = struct();
                return;
            end

            % Get list of field names (keys).
            keys = fieldnames(storedb.history);
            nkeys = length(keys);

            % If we need to remove some of the elements in the database,
            if nkeys > storedb.storedepth

                % Get the last-set counter of each element:
                % a higher number means it was modified more recently.
                lastset = zeros(nkeys, 1, 'uint32');
                for i = 1 : nkeys
                    lastset(i) = storedb.history.(keys{i}).lastset__;
                end

                % Sort the counters and determine the threshold above which
                % the field needs to be removed.
                sortlastset = sort(lastset, 1, 'descend');
                minlastset = sortlastset(storedb.storedepth);

                % Remove all fields that are too old.
                storedb.history = rmfield(storedb.history, ...
                                               keys(lastset < minlastset));
            end
            
        end % end of purge()
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/applyStatsfun.m
================================================
function stats = applyStatsfun(problem, x, storedb, key, options, stats)
% Apply the statsfun function to a stats structure (for solvers).
%
% function stats = applyStatsfun(problem, x, storedb, key, options, stats)
%
% Applies the options.statsfun user supplied function (if it was provided)
% to the stats structure, and returns the (possibly) modified stats
% structure.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% Note: if statsfun accepts a store structure as input, this structure can
% be read but not modified (modifications will be lost) ; the store
% structure will contain the store.shared field.
%
% See also: 

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 3, 2013.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

	if isfield(options, 'statsfun')
		
        switch nargin(options.statsfun)
            case 3
                stats = options.statsfun(problem, x, stats);
            case 4
                % Obtain, pass along, and save the store for x.
                % get/setWithShared must come in pairs.
                store = storedb.getWithShared(key);
                stats = options.statsfun(problem, x, stats, store);
                storedb.setWithShared(store, key);
            otherwise
                warning('manopt:statsfun', ...
                        'statsfun unused: wrong number of inputs');
        end
	end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetApproxGradient.m
================================================
function candoit = canGetApproxGradient(problem)
% Checks whether an approximate gradient can be computed for this problem.
%
% function candoit = canGetApproxGradient(problem)
%
% Returns true if an approximate gradient of the cost function is provided
% in the given problem description, false otherwise.
% If a gradient is defined but no approximate gradient is defined
% explicitly, returns false.
%
% Even if this returns false, calls to getApproxGradient may succeed, as
% they will be redirected to getGradientFD. The latter simply requires
% availability of the cost in problem.
%
% See also: canGetGradient getGradientFD

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Nov. 1, 2016.
% Contributors: 
% Change log: 

    candoit = isfield(problem, 'approxgrad');
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetApproxHessian.m
================================================
function candoit = canGetApproxHessian(problem)
% Checks whether an approximate Hessian can be computed for this problem.
%
% function candoit = canGetApproxHessian(problem)
%
% Returns true if an approximate Hessian of the cost function is provided
% in the given problem description, false otherwise.
% If a Hessian is defined but no approximate Hessian is defined explicitly,
% returns false.
%
% Even if this returns false, calls to getApproxHessian may succeed, as
% they will be redirected to getHessianFD. The latter simply requires
% availability of gradients in problem, and vector transports in problem.M.
%
% See also: canGetHessian getHessianFD

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 8, 2015.
% Contributors: 
% Change log: 

    candoit = isfield(problem, 'approxhess');
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetCost.m
================================================
function candoit = canGetCost(problem)
% Checks whether the cost function can be computed for a problem structure.
%
% function candoit = canGetCost(problem)
%
% Returns true if the cost function can be computed given the problem
% description, false otherwise.
%
% See also: getCost canGetDirectionalDerivative canGetGradient canGetHessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 


    candoit = isfield(problem, 'cost') || isfield(problem, 'costgrad');
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetDirectionalDerivative.m
================================================
function candoit = canGetDirectionalDerivative(problem)
% Checks whether dir. derivatives can be computed for a problem structure.
% 
% function candoit = canGetDirectionalDerivative(problem)
%
% Returns true if the directional derivatives of the cost function can be
% computed given the problem description, false otherwise.
%
% See also: canGetCost canGetGradient canGetHessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 

    candoit = isfield(problem, 'diff') || canGetGradient(problem);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetEuclideanGradient.m
================================================
function candoit = canGetEuclideanGradient(problem)
% Checks whether the Euclidean gradient can be computed for a problem.
%
% function candoit = canGetEuclideanGradient(problem)
%
% Returns true if the Euclidean gradient can be computed given the problem
% description, false otherwise.
%
% See also: canGetGradient getEuclideanGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   June 28, 2016 (NB):
%       Added support for getPartialEuclideanGradient


    candoit = isfield(problem, 'egrad') || canGetPartialEuclideanGradient(problem);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetGradient.m
================================================
function candoit = canGetGradient(problem)
% Checks whether the gradient can be computed for a problem structure.
% 
% function candoit = canGetGradient(problem)
%
% Returns true if the gradient of the cost function can be computed given
% the problem description, false otherwise.
%
% See also: canGetCost canGetDirectionalDerivative canGetHessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   June 28, 2016 (NB):
%       Added support for getPartialGradient
%
%   Nov. 1, 2016 (NB):
%       Added support for gradient from directional derivatives

    candoit = isfield(problem, 'grad') || isfield(problem, 'costgrad') || ...
              canGetEuclideanGradient(problem) || ...
              canGetPartialGradient(problem) || ...
              ... % Check if directional derivatives can be obtained, since
              ... % it is possible to compute the gradient from directional
              ... % derivatives (expensively). Here, it is not possible to
              ... % call canGetDirectionalDerivative, because that function
              ... % would then potentially call canGetGradient, thus 
              ... % starting an infinite loop. As a result, we have some
              ... % code redundancy: the check below needs to be kept
              ... % equivalent to the check in canGetDirectionalDerivative.
              isfield(problem, 'diff');
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetHessian.m
================================================
function candoit = canGetHessian(problem)
% Checks whether the Hessian can be computed for a problem structure.
%
% function candoit = canGetHessian(problem)
%
% Returns true if the Hessian of the cost function can be computed given
% the problem description, false otherwise.
%
% See also: canGetCost canGetDirectionalDerivative canGetGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 

    candoit = isfield(problem, 'hess') || ...
              (isfield(problem, 'ehess') && canGetEuclideanGradient(problem));
    
    % Display an extra warning message to the user in anticipation of
    % common mistakes.
    if ~candoit && ...
           (isfield(problem, 'ehess') && ~canGetEuclideanGradient(problem))
        warning('manopt:canGetHessian', ...
               ['If the Hessian is supplied as a Euclidean Hessian (ehess),\n' ...
                'then the Euclidean gradient must also be supplied (egrad).']);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetLinesearch.m
================================================
function candoit = canGetLinesearch(problem)
% Checks whether the problem structure can give a line-search a hint.
%
% function candoit = canGetLinesearch(problem)
%
% Returns true if the the problem description includes a mechanism to give
% line-search algorithms a hint as to "how far to look", false otherwise.
%
% See also: getLinesearch

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 17, 2014.
% Contributors: 
% Change log: 


    candoit = isfield(problem, 'linesearch');
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetPartialEuclideanGradient.m
================================================
function candoit = canGetPartialEuclideanGradient(problem)
% Checks whether the partial Euclidean gradient can be computed for a problem.
% 
% function candoit = canGetPartialEuclideanGradient(problem)
%
% Returns true if the partial Euclidean gradient of the cost function can
% be computed given the problem description, false otherwise.
%
% See also: getPartialEuclideanGradient canGetPartialGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 28, 2016.
% Contributors: 
% Change log: 

    candoit = (isfield(problem, 'partialegrad') && isfield(problem, 'ncostterms'));
    
    if isfield(problem, 'partialegrad') && ~isfield(problem, 'ncostterms')
        warning('manopt:partialegrad', ...
               ['If problem.partialegrad is specified, indicate the number n\n' ...
                'of terms in the cost function with problem.ncostterms = n.']);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetPartialGradient.m
================================================
function candoit = canGetPartialGradient(problem)
% Checks whether the partial gradient can be computed for a given problem.
% 
% function candoit = canGetPartialGradient(problem)
%
% Returns true if the partial gradient of the cost function can be computed
% given the problem description, false otherwise.
%
% See also: getPartialGradient canGetPartialEuclideanGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 28, 2016.
% Contributors: 
% Change log: 

    candoit = (isfield(problem, 'partialgrad') && isfield(problem, 'ncostterms')) || ...
              canGetPartialEuclideanGradient(problem);
    
    if isfield(problem, 'partialgrad') && ~isfield(problem, 'ncostterms')
        warning('manopt:partialgrad', ...
               ['If problem.partialgrad is specified, indicate the number n\n' ...
                'of terms in the cost function with problem.ncostterms = n.']);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetPrecon.m
================================================
function candoit = canGetPrecon(problem)
% Checks whether a preconditioner was specified in the problem description.
%
% function candoit = canGetPrecon(problem)
%
% Returns true if a preconditioner was specified, false otherwise. Notice
% that even if this function returns false, it is still possible to call
% getPrecon, as the default preconditioner is simply the identity operator.
% This check function is mostly useful to tell whether that default
% preconditioner will be in use or not.
%
% See also: getPrecon getSqrtPrecon canGetSqrtPrecon getHessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 3, 2013.
% Contributors: 
% Change log: 

    candoit = isfield(problem, 'precon') || canGetSqrtPrecon(problem);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetSqrtPrecon.m
================================================
function candoit = canGetSqrtPrecon(problem)
% Checks whether a square root of preconditioner was specified in problem.
%
% function candoit = canGetSqrtPrecon(problem)
%
% Returns true if the problem structure allows for applying the square root
% of a preconditioner to tangent vectors at a given point. The square root
% of the preconditioner at x must be a symmetric, positive definite
% operator Q such that applying Q twice (Q o Q) amounts to applying the
% preconditioner once. If both a preconditioner and a square root of
% preconditioner are provided, it is the user's responsibility to ensure
% their compatibility.
%
% Similarly to getPrecon, if the present function returns false, calls to
% getSqrtPrecon will still work: they will act as the identity. Note that
% this may be incompatible with the preconditioner if it is given. Thus,
% always check by calling canGetSqrtPrecon first.
%
% See also: canGetPrecon getSqrtPrecon getPrecon

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 3, 2015.
% Contributors: 
% Change log: 

    candoit = isfield(problem, 'sqrtprecon');
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/canGetSubgradient.m
================================================
function candoit = canGetSubgradient(problem)
% Checks whether a subgradient can be computed for a problem structure.
% 
% function candoit = canGetSubgradient(problem)
%
% Returns true if a subgradient of the cost function can be computed given
% the problem description, false otherwise.
%
% See also: canGetGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 20, 2017.
% Contributors: 
% Change log: 

    candoit = isfield(problem, 'subgrad') || canGetGradient(problem);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getApproxGradient.m
================================================
function approxgrad = getApproxGradient(problem, x, storedb, key)
% Computes an approximation of the gradient of the cost function at x.
%
% function approxgrad = getApproxGradient(problem, x)
% function approxgrad = getApproxGradient(problem, x, storedb)
% function approxgrad = getApproxGradient(problem, x, storedb, key)
%
% Returns an approximation of the gradient at x for the cost function
% described in the problem structure.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% If no approximate gradient was provided, this call is redirected to
% getGradientFD.
% 
% See also: getGradientFD canGetApproxGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Nov. 1, 2016.
% Contributors: 
% Change log: 

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end


    if isfield(problem, 'approxgrad')
    %% Compute the approximate gradient using approxgrad.
		
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.approxgrad);
            case 1
                approxgrad = problem.approxgrad(x);
            case 2
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [approxgrad, store] = problem.approxgrad(x, store);
                storedb.setWithShared(store, key);
            case 3
                % Pass along the whole storedb (by reference), with key.
                approxgrad = problem.approxgrad(x, storedb, key);
            otherwise
                up = MException('manopt:getApproxGradient:badapproxgrad', ...
                    'approxgrad should accept 1, 2 or 3 inputs.');
                throw(up);
        end
        
    else
    %% Try to fall back to a standard FD approximation.
    
        approxgrad = getGradientFD(problem, x, storedb, key);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getApproxHessian.m
================================================
function approxhess = getApproxHessian(problem, x, d, storedb, key)
% Computes an approximation of the Hessian of the cost fun. at x along d.
%
% function approxhess = getApproxHessian(problem, x, d)
% function approxhess = getApproxHessian(problem, x, d, storedb)
% function approxhess = getApproxHessian(problem, x, d, storedb, key)
%
% Returns an approximation of the Hessian at x along d of the cost function
% described in the problem structure.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% If no approximate Hessian was provided, this call is redirected to
% getHessianFD.
% 
% See also: getHessianFD canGetApproxHessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end


    if isfield(problem, 'approxhess')
    %% Compute the approximate Hessian using approxhess.
		
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.approxhess);
            case 2
                approxhess = problem.approxhess(x, d);
            case 3
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [approxhess, store] = problem.approxhess(x, d, store);
                storedb.setWithShared(store, key);
            case 4
                % Pass along the whole storedb (by reference), with key.
                approxhess = problem.approxhess(x, d, storedb, key);
            otherwise
                up = MException('manopt:getApproxHessian:badapproxhess', ...
                    'approxhess should accept 2, 3 or 4 inputs.');
                throw(up);
        end
        
    else
    %% Try to fall back to a standard FD approximation.
    
        approxhess = getHessianFD(problem, x, d, storedb, key);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getCost.m
================================================
function cost = getCost(problem, x, storedb, key)
% Computes the cost function at x.
%
% function cost = getCost(problem, x)
% function cost = getCost(problem, x, storedb)
% function cost = getCost(problem, x, storedb, key)
%
% Returns the value at x of the cost function described in the problem
% structure.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% See also: canGetCost

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end


    if isfield(problem, 'cost')
    %% Compute the cost function using cost.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.cost)
            case 1
                cost = problem.cost(x);
            case 2
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [cost, store] = problem.cost(x, store);
                storedb.setWithShared(store, key);
            case 3
                % Pass along the whole storedb (by reference), with key.
                cost = problem.cost(x, storedb, key);
            otherwise
                up = MException('manopt:getCost:badcost', ...
                    'cost should accept 1, 2 or 3 inputs.');
                throw(up);
        end
        
    elseif isfield(problem, 'costgrad')
    %% Compute the cost function using costgrad.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.costgrad)
            case 1
                cost = problem.costgrad(x);
            case 2
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [cost, grad, store] = problem.costgrad(x, store); %#ok
                storedb.setWithShared(store, key);
            case 3
                % Pass along the whole storedb (by reference), with key.
                cost = problem.costgrad(x, storedb, key);
            otherwise
                up = MException('manopt:getCost:badcostgrad', ...
                    'costgrad should accept 1, 2 or 3 inputs.');
                throw(up);
        end

    else
    %% Abandon computing the cost function.

        up = MException('manopt:getCost:fail', ...
            ['The problem description is not explicit enough to ' ...
             'compute the cost.']);
        throw(up);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getCostGrad.m
================================================
function [cost, grad] = getCostGrad(problem, x, storedb, key)
% Computes the cost function and the gradient at x in one call if possible.
%
% function [cost, grad] = getCostGrad(problem, x)
% function [cost, grad] = getCostGrad(problem, x, storedb)
% function [cost, grad] = getCostGrad(problem, x, storedb, key)
%
% Returns the value at x of the cost function described in the problem
% structure, as well as the gradient at x.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% See also: canGetCost canGetGradient getCost getGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end


    if isfield(problem, 'costgrad')
    %% Compute the cost/grad pair using costgrad.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.costgrad)
            case 1
                [cost, grad] = problem.costgrad(x);
            case 2
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [cost, grad, store] = problem.costgrad(x, store);
                storedb.setWithShared(store, key);
            case 3
                % Pass along the whole storedb (by reference), with key.
                [cost, grad] = problem.costgrad(x, storedb, key);
            otherwise
                up = MException('manopt:getCostGrad:badcostgrad', ...
                    'costgrad should accept 1, 2 or 3 inputs.');
                throw(up);
        end

    else
    %% Revert to calling getCost and getGradient separately
    
        cost = getCost(problem, x, storedb, key);
        grad = getGradient(problem, x, storedb, key);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getDirectionalDerivative.m
================================================
function diff = getDirectionalDerivative(problem, x, d, storedb, key)
% Computes the directional derivative of the cost function at x along d.
%
% function diff = getDirectionalDerivative(problem, x, d)
% function diff = getDirectionalDerivative(problem, x, d, storedb)
% function diff = getDirectionalDerivative(problem, x, d, storedb, key)
%
% Returns the derivative at x along d of the cost function described in the
% problem structure.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% See also: getGradient canGetDirectionalDerivative

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

    
    if isfield(problem, 'diff')
    %% Compute the directional derivative using diff.
		
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.diff)
            case 2
                diff = problem.diff(x, d);
            case 3
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [diff, store] = problem.diff(x, d, store);
                storedb.setWithShared(store, key);
            case 4
                % Pass along the whole storedb (by reference), with key.
                diff = problem.diff(x, d, storedb, key);
            otherwise
                up = MException('manopt:getDirectionalDerivative:baddiff', ...
                    'diff should accept 2, 3 or 4 inputs.');
                throw(up);
        end
    
    elseif canGetGradient(problem)
    %% Compute the directional derivative using the gradient.
        
        % Compute the gradient at x, then compute its inner product with d.
        grad = getGradient(problem, x, storedb, key);
        diff = problem.M.inner(x, grad, d);
        
    else
    %% Abandon computing the directional derivative.
    
        up = MException('manopt:getDirectionalDerivative:fail', ...
            ['The problem description is not explicit enough to ' ...
             'compute the directional derivatives of f.']);
        throw(up);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getEuclideanGradient.m
================================================
function egrad = getEuclideanGradient(problem, x, storedb, key)
% Computes the Euclidean gradient of the cost function at x.
%
% function egrad = getEuclideanGradient(problem, x)
% function egrad = getEuclideanGradient(problem, x, storedb)
% function egrad = getEuclideanGradient(problem, x, storedb, key)
%
% Returns the Euclidean gradient at x of the cost function described in the
% problem structure.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% Because computing the Hessian based on the Euclidean Hessian will require
% the Euclidean gradient every time, to avoid overly redundant
% computations, if the egrad function does not use the store caching
% capabilites, this implements an automatic caching functionality. Writing
% egrad to accept the optional store or storedb parameter will disable
% automatic caching, but allow user controlled caching.
%
% See also: getGradient canGetGradient canGetEuclideanGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 9, 2013.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   June 28, 2016 (NB):
%       Added support for getPartialEuclideanGradient

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

    
    if isfield(problem, 'egrad')
    %% Compute the Euclidean gradient using egrad.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.egrad)
            case 1
                % If it does not want to deal with the store structure,
                % then we do some caching of our own. There is a small
                % performance hit for this is some cases, but we expect
                % that this is most often the preferred choice.
                store = storedb.get(key);
                if ~isfield(store, 'egrad__')
                    store.egrad__ = problem.egrad(x);
                    storedb.set(store, key);
                end
                egrad = store.egrad__;
            case 2
                % Obtain, pass along, and save the store for x.
                % If the user deals with the store structure, then we don't
                % do any automatic caching: the user is in control.
                store = storedb.getWithShared(key);
                [egrad, store] = problem.egrad(x, store);
                storedb.setWithShared(store, key);
            case 3
                % Pass along the whole storedb (by reference), with key.
                % Same here: no automatic caching.
                egrad = problem.egrad(x, storedb, key);
            otherwise
                up = MException('manopt:getEuclideanGradient:badegrad', ...
                    'egrad should accept 1, 2 or 3 inputs.');
                throw(up);
        end
        
    elseif canGetPartialEuclideanGradient(problem)
    %% Compute the Euclidean gradient using a full partial Euclidean gradient.
        
        d = problem.ncostterms;
        egrad = getPartialEuclideanGradient(problem, x, 1:d, storedb, key);

    else
    %% Abandon computing the Euclidean gradient
    
        up = MException('manopt:getEuclideanGradient:fail', ...
            ['The problem description is not explicit enough to ' ...
             'compute the Euclidean gradient of the cost.']);
        throw(up);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getGlobalDefaults.m
================================================
function opts = getGlobalDefaults()
% Returns a structure with default option values for Manopt.
%
% function opts = getGlobalDefaults()
%
% Returns a structure opts containing the global default options such as
% verbosity level etc. Typically, global defaults are overwritten by solver
% defaults, which are in turn overwritten by user-specified options.
% See the online Manopt documentation for details on options.
%
% See also: mergeOptions

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 


    % There should be no reason to modify this file.
    % For better compatibility with future Manopt versions,
    % use the options structure of solvers.
    %
    % Really: don't modify it.
    

    % Verbosity level: 0 is no output at all. The higher the verbosity, the
    % more info is printed / displayed during solver execution.
    opts.verbosity = 3;
    
    % If debug is set to true, additional computations may be performed and
    % debugging information is outputed during solver execution.
    opts.debug = false;
    
    % Maximum number of store structures to store. If set to 0, caching
    % capabilities are not disabled, but the cache will be emptied at each
    % iteration of iterative solvers (more specifically: every time the
    % solver calls to purge the storedb).
    opts.storedepth = 20;
    
    % Maximum amount of time a solver may execute, in seconds.
    opts.maxtime = inf;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getGradient.m
================================================
function grad = getGradient(problem, x, storedb, key)
% Computes the gradient of the cost function at x.
%
% function grad = getGradient(problem, x)
% function grad = getGradient(problem, x, storedb)
% function grad = getGradient(problem, x, storedb, key)
%
% Returns the gradient at x of the cost function described in the problem
% structure.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% See also: getDirectionalDerivative canGetGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%  June 28, 2016 (NB):
%       Works with getPartialGradient.
%
%   Nov. 1, 2016 (NB):
%       Added support for gradient from directional derivatives.
%       Last resort is call to getApproxGradient instead of an exception.

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

    
    if isfield(problem, 'grad')
    %% Compute the gradient using grad.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.grad)
            case 1
                grad = problem.grad(x);
            case 2
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [grad, store] = problem.grad(x, store);
                storedb.setWithShared(store, key);
            case 3
                % Pass along the whole storedb (by reference), with key.
                grad = problem.grad(x, storedb, key);
            otherwise
                up = MException('manopt:getGradient:badgrad', ...
                    'grad should accept 1, 2 or 3 inputs.');
                throw(up);
        end
    
    elseif isfield(problem, 'costgrad')
    %% Compute the gradient using costgrad.
		
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.costgrad)
            case 1
                [unused, grad] = problem.costgrad(x); %#ok
            case 2
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [unused, grad, store] = problem.costgrad(x, store); %#ok
                storedb.setWithShared(store, key);
            case 3
                % Pass along the whole storedb (by reference), with key.
                [unused, grad] = problem.costgrad(x, storedb, key); %#ok
            otherwise
                up = MException('manopt:getGradient:badcostgrad', ...
                    'costgrad should accept 1, 2 or 3 inputs.');
                throw(up);
        end
    
    elseif canGetEuclideanGradient(problem)
    %% Compute the gradient using the Euclidean gradient.
        
        egrad = getEuclideanGradient(problem, x, storedb, key);
        grad = problem.M.egrad2rgrad(x, egrad);
    
    elseif canGetPartialGradient(problem)
    %% Compute the gradient using a full partial gradient.
        
        d = problem.ncostterms;
        grad = getPartialGradient(problem, x, 1:d, storedb, key);
        
    elseif canGetDirectionalDerivative(problem)
    %% Compute gradient based on directional derivatives; expensive!
    
        B = tangentorthobasis(problem.M, x);
        df = zeros(size(B));
        for k = 1 : numel(B)
            df(k) = getDirectionalDerivative(problem, x, B{k}, storedb, key);
        end
        grad = lincomb(problem.M, x, B, df);

    else
    %% Attempt the computation of an approximation of the gradient.
        
        grad = getApproxGradient(problem, x, storedb, key);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getGradientFD.m
================================================
function gradfd = getGradientFD(problem, x, storedb, key)
% Computes an approx. of the gradient w/ finite differences of the cost.
%
% function gradfd = getGradientFD(problem, x)
% function gradfd = getGradientFD(problem, x, storedb)
% function gradfd = getGradientFD(problem, x, storedb, key)
%
% Returns a finite difference approximation of the gradient at x for
% the cost function described in the problem structure. The finite
% difference is based on M.dim()+1 computations of the cost.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% If the cost cannot be computed, an exception is thrown.
%
% See also: approxgradientFD

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Nov. 1, 2016.
% Contributors: 
% Change log: 

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

    % This gradient approximation is based on the cost:
    % check availability.
    if ~canGetCost(problem)
        up = MException('manopt:getGradientFD:nocost', ...
            'getGradientFD requires the cost to be computable.');
        throw(up);
    end
    
    
    % Default parameters. See approxgradientFD for explicit user access to
    % these parameters.
    stepsize = 2^-23;
    subspacedim = [];
    
    
    % Evaluate the cost at the root point
    fx = getCost(problem, x, storedb, key);

    % Pick an orthonormal basis for the tangent space at x, or a subspace
    % thereof. The default is a full subspace. If a strict subspace is
    % picked, the returned vector approximates the orthogonal projection of
    % the gradient to that subspace.
    B = tangentorthobasis(problem.M, x, subspacedim);
    
    % Use finite differences to approximate the directional derivative
    % along each direction in the basis B.
    df = zeros(size(B));
    for k = 1 : numel(B)
        % Move in the B{k} direction
        xk = problem.M.retr(x, B{k}, stepsize);
        % Evaluate the cost there
        fxk = getCost(problem, xk, storedb);
        % Finite difference
        df(k) = (fxk - fx)/stepsize;
    end
    
    % Build the gradient approximation.
    gradfd = lincomb(problem.M, x, B, df);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getHessian.m
================================================
function hess = getHessian(problem, x, d, storedb, key)
% Computes the Hessian of the cost function at x along d.
%
% function hess = getHessian(problem, x, d)
% function hess = getHessian(problem, x, d, storedb)
% function hess = getHessian(problem, x, d, storedb, key)
%
% Returns the Hessian at x along d of the cost function described in the
% problem structure.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% If an exact Hessian is not provided, an approximate Hessian is returned
% if possible, without warning. If not possible, an exception will be
% thrown. To check whether an exact Hessian is available or not (typically
% to issue a warning if not), use canGetHessian.
%
% See also: getPrecon getApproxHessian canGetHessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end
    
    
    if isfield(problem, 'hess')
    %% Compute the Hessian using hess.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.hess)
            case 2
                hess = problem.hess(x, d);
            case 3
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [hess, store] = problem.hess(x, d, store);
                storedb.setWithShared(store, key);
            case 4
                % Pass along the whole storedb (by reference), with key.
                hess = problem.hess(x, d, storedb, key);
            otherwise
                up = MException('manopt:getHessian:badhess', ...
                    'hess should accept 2, 3 or 4 inputs.');
                throw(up);
        end
    
    elseif isfield(problem, 'ehess') && canGetEuclideanGradient(problem)
    %% Compute the Hessian using ehess.
    
        % We will need the Euclidean gradient for the conversion from the
        % Euclidean Hessian to the Riemannian Hessian.
        egrad = getEuclideanGradient(problem, x, storedb, key);
		
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.ehess)
            case 2
                ehess = problem.ehess(x, d);
            case 3
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [ehess, store] = problem.ehess(x, d, store);
                storedb.setWithShared(store, key);
            case 4
                % Pass along the whole storedb (by reference), with key.
                ehess = problem.ehess(x, d, storedb, key);
            otherwise
                up = MException('manopt:getHessian:badehess', ...
                    'ehess should accept 2, 3 or 4 inputs.');
                throw(up);
        end
        
        % Convert to the Riemannian Hessian
        hess = problem.M.ehess2rhess(x, egrad, ehess, d);
        
    else
    %% Attempt the computation of an approximation of the Hessian.
        
        hess = getApproxHessian(problem, x, d, storedb, key);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getHessianFD.m
================================================
function hessfd = getHessianFD(problem, x, d, storedb, key)
% Computes an approx. of the Hessian w/ finite differences of the gradient.
%
% function hessfd = getHessianFD(problem, x, d)
% function hessfd = getHessianFD(problem, x, d, storedb)
% function hessfd = getHessianFD(problem, x, d, storedb, key)
%
% Returns a finite difference approximation of the Hessian at x along d of
% the cost function described in the problem structure. The finite
% difference is based on computations of the gradient.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% If the gradient cannot be computed, an exception is thrown.
%
% See also: approxhessianFD

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   Feb. 19, 2015 (NB):
%       It is sufficient to ensure positive radial linearity to guarantee
%       (together with other assumptions) that this approximation of the
%       Hessian will confer global convergence to the trust-regions method.
%       Formerly, in-code comments referred to the necessity of having
%       complete radial linearity, and that this was harder to achieve.
%       This appears not to be necessary after all, which simplifies the
%       code.
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   Nov. 1, 2016 (NB):
%       Removed exception in case of unavailable gradient, as getGradient
%       now knows to fall back to an approximate gradient if need be.

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

	% Step size
    norm_d = problem.M.norm(x, d);
    
    % First, check whether the step d is not too small
    if norm_d < eps
        hessfd = problem.M.zerovec(x);
        return;
    end
    
    % Parameter: how far do we look?
    % If you need to change this parameter, use approxhessianFD explicitly.
    % A power of 2 is chosen so that scaling by epsilon does not incur any
    % round-off error in IEEE arithmetic.
    epsilon = 2^-14;
        
    c = epsilon/norm_d;
    
    % Compute the gradient at the current point.
    grad = getGradient(problem, x, storedb, key);
    
    % Compute a point a little further along d and the gradient there.
    % Since this is a new point, we need a new key for it, for the storedb.
    x1 = problem.M.retr(x, d, c);
    key1 = storedb.getNewKey();
    grad1 = getGradient(problem, x1, storedb, key1);
    
    % Transport grad1 back from x1 to x.
    grad1 = problem.M.transp(x1, x, grad1);
    
    % Return the finite difference of them.
    hessfd = problem.M.lincomb(x, 1/c, grad1, -1/c, grad);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getLinesearch.m
================================================
function t = getLinesearch(problem, x, d, storedb, key)
% Returns a hint for line-search algorithms.
%
% function t = getLinesearch(problem, x, d)
% function t = getLinesearch(problem, x, d, storedb)
% function t = getLinesearch(problem, x, d, storedb, key)
%
% For a line-search problem at x along the tangent direction d, computes
% and returns t such that retracting t*d at x yields a good point around
% where to look for a line-search solution. That is: t is a hint as to
% "how far to look" along the line.
% 
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% See also: canGetLinesearch

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 17, 2014.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end


    if isfield(problem, 'linesearch')
    %% Compute the line-search hint function using linesearch.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.linesearch)
            case 2
                t = problem.linesearch(x, d);
            case 3
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [t, store] = problem.linesearch(x, d, store);
                storedb.setWithShared(store, key);
            case 4
                % Pass along the whole storedb (by reference), with key.
                t = problem.linesearch(x, d, storedb, key);
            otherwise
                up = MException('manopt:getLinesearch:badfun', ...
                    'linesearch should accept 2, 3 or 4 inputs.');
                throw(up);
        end

    else
    %% Abandon computing the line-search function.

        up = MException('manopt:getLinesearch:fail', ...
            ['The problem description is not explicit enough to ' ...
             'compute a line-search hint.']);
        throw(up);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getPartialEuclideanGradient.m
================================================
function egrad = getPartialEuclideanGradient(problem, x, I, storedb, key)
% Computes the Euclidean gradient of a subset of terms in cost function.
%
% function egrad = getPartialEuclideanGradient(problem, x, I)
% function egrad = getPartialEuclideanGradient(problem, x, I, storedb)
% function egrad = getPartialEuclideanGradient(problem, x, I, storedb, key)
%
% Assume the cost function described in the problem structure is a sum of
% many terms, as
%
%    f(x) = sum_i f_i(x) for i = 1:d,

% where d is specified as d = problem.ncostterms.
% 
% For a subset I of 1:d, getPartialEuclideanGradient obtains the Euclidean
% gradient of the partial cost function
% 
%    f_I(x) = sum_i f_i(x) for i = I.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% See also: getGradient canGetPartialEuclidean Gradient getPartialGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 28, 2016
% Contributors: 
% Change log: 


    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end
    
    % Make sure I is a row vector, so that it is natural to loop over it
    % with " for i = I ".
    I = (I(:)).';
    
    
    if isfield(problem, 'partialegrad')
    %% Compute the partial Euclidean gradient using partialegrad.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.partialegrad)
            case 2
                egrad = problem.partialegrad(x, I);
            case 3
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [egrad, store] = problem.partialegrad(x, I, store);
                storedb.setWithShared(store, key);
            case 4
                % Pass along the whole storedb (by reference), with key.
                egrad = problem.partialegrad(x, I, storedb, key);
            otherwise
                up = MException('manopt:getPartialEuclideanGradient:badpartialegrad', ...
                    'partialegrad should accept 2, 3 or 4 inputs.');
                throw(up);
        end
    
    else
    %% Abandon computing the partial Euclidean gradient.
    
        up = MException('manopt:getPartialEuclideanGradient:fail', ...
            ['The problem description is not explicit enough to ' ...
             'compute the partial Euclidean gradient of the cost.']);
        throw(up);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getPartialGradient.m
================================================
function grad = getPartialGradient(problem, x, I, storedb, key)
% Computes the gradient of a subset of terms in the cost function at x.
%
% function grad = getPartialGradient(problem, x, I)
% function grad = getPartialGradient(problem, x, I, storedb)
% function grad = getPartialGradient(problem, x, I, storedb, key)
%
% Assume the cost function described in the problem structure is a sum of
% many terms, as
%
%    f(x) = sum_i f_i(x) for i = 1:d,

% where d is specified as d = problem.ncostterms.
% 
% For a subset I of 1:d, getPartialGradient obtains the gradient of the
% partial cost function
% 
%    f_I(x) = sum_i f_i(x) for i = I.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% See also: getGradient canGetPartialGradient getPartialEuclideanGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 28, 2016
% Contributors: 
% Change log: 


    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end
    
    
    % Make sure I is a row vector, so that it is natural to loop over it
    % with " for i = I ".
    I = (I(:)).';

    
    if isfield(problem, 'partialgrad')
    %% Compute the partial gradient using partialgrad.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.partialgrad)
            case 2
                grad = problem.partialgrad(x, I);
            case 3
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [grad, store] = problem.partialgrad(x, I, store);
                storedb.setWithShared(store, key);
            case 4
                % Pass along the whole storedb (by reference), with key.
                grad = problem.partialgrad(x, I, storedb, key);
            otherwise
                up = MException('manopt:getPartialGradient:badpartialgrad', ...
                    'partialgrad should accept 2, 3 or 4 inputs.');
                throw(up);
        end
    
    elseif canGetPartialEuclideanGradient(problem)
    %% Compute the partial gradient using the Euclidean partial gradient.
        
        egrad = getPartialEuclideanGradient(problem, x, I, storedb, key);
        grad = problem.M.egrad2rgrad(x, egrad);

    else
    %% Abandon computing the partial gradient.
    
        up = MException('manopt:getPartialGradient:fail', ...
            ['The problem description is not explicit enough to ' ...
             'compute the partial gradient of the cost.']);
        throw(up);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getPrecon.m
================================================
function Pd = getPrecon(problem, x, d, storedb, key)
% Applies the preconditioner for the Hessian of the cost at x along d.
%
% function Pd = getPrecon(problem, x, d)
% function Pd = getPrecon(problem, x, d, storedb)
% function Pd = getPrecon(problem, x, d, storedb, key)
%
% Returns as Pd the result of applying the Hessian preconditioner to the
% tangent vector d at point x. The preconditioner is supposed to be a
% symmetric, positive definite approximation of the inverse of the Hessian.
% 
% If no preconditioner is available, Pd = d (identity).
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% See also: getHessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

    
    if isfield(problem, 'precon')
    %% Precondition using precon.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.precon)
            case 2
                Pd = problem.precon(x, d);
            case 3
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [Pd, store] = problem.precon(x, d, store);
                storedb.setWithShared(store, key);
            case 4
                % Pass along the whole storedb (by reference), with key.
                Pd = problem.precon(x, d, storedb, key);
            otherwise
                up = MException('manopt:getPrecon:badprecon', ...
                    'precon should accept 2, 3 or 4 inputs.');
                throw(up);
        end      

    elseif canGetSqrtPrecon(problem)
    %% Precondition by applying the square root of the preconditioner twice.
        
        sqrtPd = getSqrtPrecon(problem, x, d, storedb, key);
        Pd = getSqrtPrecon(problem, x, sqrtPd, storedb, key);
        
    else
    %% No preconditioner provided, so just use the identity.
    
        Pd = d;
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getSqrtPrecon.m
================================================
function sqrtPd = getSqrtPrecon(problem, x, d, storedb, key)
% Applies the square root of the Hessian preconditioner at x along d.
%
% function sqrtPd = getSqrtPrecon(problem, x, d)
% function sqrtPd = getSqrtPrecon(problem, x, d, storedb)
% function sqrtPd = getSqrtPrecon(problem, x, d, storedb, key)
%
% Returns as sqrtPd the result of applying the square root of the Hessian
% preconditioner to the tangent vector d at point x. The preconditioner is
% supposed to be a symmetric, positive definite approximation of the
% inverse of the Hessian. Its square root must thus be symmetric and
% positive definite itself.
% 
% If no square root of preconditioner is available, sqrtPd = d (identity).
% Note that this may be incompatible with the preconditioner, if that one
% is supplied in the problem description. Always check with canGetPrecon
% and canGetSqrtPrecon.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% See also: getPrecon canGetPrecon canGetSqrtPrecon getHessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 3, 2015.
% Contributors: 
% Change log: 

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

    
    if isfield(problem, 'sqrtprecon')
    %% Apply sqrtprecon for the square root of the preconditioner
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.sqrtprecon)
            case 2
                sqrtPd = problem.sqrtprecon(x, d);
            case 3
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [sqrtPd, store] = problem.sqrtprecon(x, d, store);
                storedb.setWithShared(store, key);
            case 4
                % Pass along the whole storedb (by reference), with key.
                sqrtPd = problem.sqrtprecon(x, d, storedb, key);
            otherwise
                up = MException('manopt:getSqrtPrecon:badsqrtprecon', ...
                    'sqrtprecon should accept 2, 3 or 4 inputs.');
                throw(up);
        end
        
    else
    %% No preconditioner square root provided, so just use the identity.
    
        sqrtPd = d;
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getStore.m
================================================
function store = getStore(problem, x, storedb) %#ok<STOUT,INUSD>

    error('This file was removed from Manopt. Please use the StoreDB class.');

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/getSubgradient.m
================================================
function subgrad = getSubgradient(problem, x, tol, storedb, key)
% Computes a subgradient of the cost function at x, up to a tolerance
%
% function subgrad = getSubgradient(problem, x)
% function subgrad = getSubgradient(problem, x, tol)
% function subgrad = getSubgradient(problem, x, tol, storedb)
% function subgrad = getSubgradient(problem, x, tol, storedb, key)
%
% Returns a subgradient at x of the cost function described in the problem
% structure. A tolerance tol ( >= 0 ) can also be specified. By default,
% tol = 0.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% See also: getDirectionalDerivative canGetGradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 20, 2017.
% Contributors: 
% Change log: 

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end
    
    % Default tolerance is 0
    if ~exist('tol', 'var') || isempty(tol)
        tol = 0;
    end

    
    if isfield(problem, 'subgrad')
    %% Compute a subgradient using subgrad.
	
        % Check whether this function wants to deal with storedb or not.
        switch nargin(problem.subgrad)
            case 1
                warning('manopt:subgradient', ...
                       ['problem.subgrad normally admits a second\n' ...
                        'parameter, tol >= 0, as a tolerance.\n']);
                subgrad = problem.subgrad(x); % tol is not passed here
            case 2
                subgrad = problem.subgrad(x, tol);
            case 3
                % Obtain, pass along, and save the store for x.
                store = storedb.getWithShared(key);
                [subgrad, store] = problem.subgrad(x, tol, store);
                storedb.setWithShared(store, key);
            case 4
                % Pass along the whole storedb (by reference), with key.
                subgrad = problem.subgrad(x, tol, storedb, key);
            otherwise
                up = MException('manopt:getSubgradient:badsubgrad', ...
                    'subgrad should accept 1, 2, 3 or 4 inputs.');
                throw(up);
        end
    
    elseif canGetGradient(problem)
    %% The gradient is a subgradient.
        
        subgrad = getGradient(problem, x, storedb, key);
    
    else
    %% Abandon
        
        up = MException('manopt:getSubgradient:fail', ...
            ['The problem description is not explicit enough to ' ...
             'compute a subgradient.']);
        throw(up);
        
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/handle_light.m
================================================
classdef handle_light < handle
% Trick class to hide methods inherited from the handle class
% when calling methods(myclass).
%
% Source:
% http://stackoverflow.com/questions/6621850/is-it-possible-to-hide-the-methods-inherited-from-the-handle-class-in-matlab
% Posted by sclarke81 on StackOverflow on Oct. 24, 2012.

% This file is part of Manopt: www.manopt.org.
% Original author: sclarke81, added April 3, 2013.
% Contributors: 
% Change log: 

   methods(Hidden)
      function lh = addlistener(varargin)
         lh = addlistener@handle(varargin{:});
      end
      function notify(varargin)
         notify@handle(varargin{:});
      end
      function delete(varargin)
         delete@handle(varargin{:});
      end
      function Hmatch = findobj(varargin)
         Hmatch = findobj@handle(varargin{:});
      end
      function p = findprop(varargin)
         p = findprop@handle(varargin{:});
      end
      function TF = eq(varargin)
         TF = eq@handle(varargin{:});
      end
      function TF = ne(varargin)
         TF = ne@handle(varargin{:});
      end
      function TF = lt(varargin)
         TF = lt@handle(varargin{:});
      end
      function TF = le(varargin)
         TF = le@handle(varargin{:});
      end
      function TF = gt(varargin)
         TF = gt@handle(varargin{:});
      end
      function TF = ge(varargin)
         TF = ge@handle(varargin{:});
      end
   end
   
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/mergeOptions.m
================================================
function opts = mergeOptions(opts1, opts2)
% Merges two options structures with one having precedence over the other.
%
% function opts = mergeOptions(opts1, opts2)
%
% input: opts1 and opts2 are two structures.
% output: opts is a structure containing all fields of opts1 and opts2.
% Whenever a field is present in both opts1 and opts2, it is the value in
% opts2 that is kept.
%
% The typical usage is to have opts1 contain default options and opts2
% contain user-specified options that overwrite the defaults.
%
% See also: getGlobalDefaults

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 


    if isempty(opts1)
        opts1 = struct();
    end
    if isempty(opts2)
        opts2 = struct();
    end

    opts = opts1;
    fields = fieldnames(opts2);
    for i = 1 : length(fields)
        opts.(fields{i}) = opts2.(fields{i});
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/purgeStoredb.m
================================================
function storedb = purgeStoredb(storedb, storedepth) %#ok<INUSD>

    error('This file was removed from Manopt. Please use the StoreDB class.');

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/setStore.m
================================================
function storedb = setStore(problem, x, storedb, store) %#ok<INUSD>

    error('This file was removed from Manopt. Please use the StoreDB class.');

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/core/stoppingcriterion.m
================================================
function [stop, reason] = stoppingcriterion(problem, x, options, info, last)
% Checks for standard stopping criteria, as a helper to solvers.
%
% function [stop, reason] = stoppingcriterion(problem, x, options, info, last)
%
% Executes standard stopping criterion checks, based on what is defined in
% the info(last) stats structure and in the options structure.
%
% The returned number 'stop' is 0 if none of the stopping criteria
% triggered, and a (strictly) positive integer otherwise. The integer
% identifies which criterion triggered:
%  0 : Nothing triggered;
%  1 : Cost tolerance reached;
%  2 : Gradient norm tolerance reached;
%  3 : Max time exceeded;
%  4 : Max iteration count reached;
%  5 : Maximum number of cost evaluations reached;
%  6 : User defined stopfun criterion triggered.
%
% The output 'reason' is a string describing the triggered event.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 2, 2015 (NB):
%       'reason' now contains the option (name and value) that triggered.


    stop = 0;
    reason = '';
    
    stats = info(last);

    % Target cost attained
    if isfield(stats, 'cost') && isfield(options, 'tolcost') && ...
       stats.cost <= options.tolcost
        reason = sprintf('Cost tolerance reached; options.tolcost = %g.', options.tolcost);
        stop = 1;
        return;
    end

    % Target gradient norm attained
    if isfield(stats, 'gradnorm') && isfield(options, 'tolgradnorm') && ...
       stats.gradnorm < options.tolgradnorm
        reason = sprintf('Gradient norm tolerance reached; options.tolgradnorm = %g.', options.tolgradnorm);
        stop = 2;
        return;
    end

    % Allotted time exceeded
    if isfield(stats, 'time') && isfield(options, 'maxtime') && ...
       stats.time >= options.maxtime
        reason = sprintf('Max time exceeded; options.maxtime = %g.', options.maxtime);
        stop = 3;
        return;
    end

    % Allotted iteration count exceeded
    if isfield(stats, 'iter') && isfield(options, 'maxiter') && ...
       stats.iter >= options.maxiter
        reason = sprintf('Max iteration count reached; options.maxiter = %g.', options.maxiter);
        stop = 4;
        return;
    end
    
    % Allotted function evaluation count exceeded
    if isfield(stats, 'costevals') && isfield(options, 'maxcostevals') && ...
       stats.costevals >= options.maxcostevals
        reason = sprintf('Maximum number of cost evaluations reached; options.maxcostevals = %g.', options.maxcostevals);
        stop = 5;
    end

    % Check whether the possibly user defined stopping criterion
    % triggers or not.
    if isfield(options, 'stopfun')
        userstop = options.stopfun(problem, x, info, last);
        if userstop
            reason = 'User defined stopfun criterion triggered; see options.stopfun.';
            stop = 6;
            return;
        end
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/complexcircle/complexcirclefactory.m
================================================
function M = complexcirclefactory(n)
% Returns a manifold struct to optimize over unit-modulus complex numbers.
%
% function M = complexcirclefactory()
% function M = complexcirclefactory(n)
%
% Description of vectors z in C^n (complex) such that each component z(i)
% has unit modulus. The manifold structure is the Riemannian submanifold
% structure from the embedding space R^2 x ... x R^2, i.e., the complex
% circle is identified with the unit circle in the real plane.
%
% By default, n = 1.
%
% See also spherecomplexfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   July 7, 2014 (NB): Added ehess2rhess function.
%
%   Sep. 3, 2014 (NB): Correction to the dist function (extract real part).
%
%   April 13, 2015 (NB): Fixed logarithm.
%
%   Oct. 8, 2016 (NB)
%       Code for exponential was simplified to only treat the zero vector
%       as a particular case.
%
%   July 20, 2017 (NB)
%       The distance function is now even more accurate. Improved logarithm
%       accordingly.
    
    if ~exist('n', 'var')
        n = 1;
    end

    M.name = @() sprintf('Complex circle (S^1)^%d', n);
    
    M.dim = @() n;
    
    M.inner = @(z, v, w) real(v'*w);
    
    M.norm = @(x, v) norm(v);
    
    M.dist = @(x, y) norm(real(2*asin(.5*abs(x - y))));
    
    M.typicaldist = @() pi*sqrt(n);
    
    M.proj = @(z, u) u - real( conj(u) .* z ) .* z;	
    
    M.tangent = M.proj;
    
    % For Riemannian submanifolds, converting a Euclidean gradient into a
    % Riemannian gradient amounts to an orthogonal projection.
	M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(z, egrad, ehess, zdot)
        rhess = M.proj(z, ehess - real(z.*conj(egrad)).*zdot);
    end
    
    M.exp = @exponential;
    function y = exponential(z, v, t)
        
        if nargin == 2
            % t = 1;
            tv = v;
        else
            tv = t*v;
        end

        y = zeros(n, 1);

        nrm_tv = abs(tv);
        
        % We need to be careful for zero steps.
        mask = (nrm_tv > 0);
        y(mask) = z(mask).*cos(nrm_tv(mask)) + ...
                  tv(mask).*(sin(nrm_tv(mask))./nrm_tv(mask));
        y(~mask) = z(~mask);
        
    end
    
    M.retr = @retraction;
    function y = retraction(z, v, t)
        if nargin == 2
            % t = 1;
            tv = v;
        else
            tv = t*v;
        end
        y = sign(z+tv);
    end

    M.log = @logarithm;
    function v = logarithm(x1, x2)
        v = M.proj(x1, x2 - x1);
        di = real(2*asin(.5*abs(x1 - x2)));
        nv = abs(v);
        factors = di ./ nv;
        factors(di <= 1e-10) = 1;
		v = v .* factors;
    end
    
    M.hash = @(z) ['z' hashmd5( [real(z(:)) ; imag(z(:))] ) ];
    
    M.rand = @random;
    function z = random()
        z = sign(randn(n, 1) + 1i*randn(n, 1));
    end
    
    M.randvec = @randomvec;
    function v = randomvec(z)
        % i*z(k) is a basis vector of the tangent vector to the k-th circle
        v = randn(n, 1) .* (1i*z);
        v = v / norm(v);
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, 1);
    
    M.transp = @(x1, x2, d) M.proj(x2, d);
    
    M.pairmean = @pairmean;
    function z = pairmean(z1, z2)
        z = sign(z1+z2);
    end

    M.vec = @(x, u_mat) [real(u_mat) ; imag(u_mat)];
    M.mat = @(x, u_vec) u_vec(1:n) + 1i*u_vec((n+1):end);
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/complexcircle/realphasefactory.m
================================================
function M = realphasefactory(n, z0, zmax)
% Returns a manifold struct to optimize over phases of fft's of real signals
%
% function M = realphasefactory(n)
% function M = realphasefactory(n, z0)
% function M = realphasefactory(n, z0, zmax)
%
% If x is a real vector of length n, then y = fft(x) is a complex vector
% which obeys certain symmetries. Specifically, for any integer k,
%
%  y(1+mod(k, n)) = conj(y(1+mod(n-k, n)))
%
% The same holds for the phases of the Fourier transform z = sign(y).
%
% This factory returns a Manopt manifold structure which represents the set
% of complex vectors z of length n which could be the phases of the Fourier
% transform of a real signal of length n:
%
%   abs(z) = 1   and   z(1+mod(k, n)) = conj(z(1+mod(n-k, n))) for each k.
%
% For k = 1, this readily implies that z(1) is +1 or -1, so that the set of
% possible z's is disconnected. To choose which connected component to work
% with, set the second input z0 to +1 or -1 (this is the sign of the mean
% of x). By default, z0 = 1.
%
% Furthermore, if n is even, then k = n/2 implies z(1+n/2) is +1 or -1 as
% well, thus further disconnecting the set of acceptable z's. To choose
% which component to work with, set the third input zmax to +1 or -1. By
% default, it is +1.
%
% The Riemannian manifold structure is the Riemannian submanifold
% structure from the embedding space R^2 x ... x R^2, i.e., the complex
% circles are identified with the unit circle in the real plane.
% Concretely, this means the inner product is <u, v>_z = real(u'*v).
% Tangent vectors at z are complex vectors of length n which notably
% satisfy z(1+0) = 0 and, if n is even, z(1+n/2) = 0.
%
% n must be integer and n >= 3 (for n = 1:2 the manifold has dimension 0).
%
% Extra functions available in M include M.up, M.down and M.downup. They
% allow to capture the symmetries concisely, as:
%
%    M.up(z) == conj(M.down(z)).
%
% See in code for more details.
%
% See also complexcirclefactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Feb. 2, 2017.
% Contributors: joint work with Tamir Bendory, Zhizhen Zhao and Amit Singer
% Change log: 
%
%   July 20, 2017 (NB)
%       The distance function is now more accurate. Improved logarithm
%       accordingly.

    assert(n == round(n) && n >= 3, 'n must be an integer >= 3.');
    
    even_n = (round(n/2) == n/2);
    
    if ~exist('z0', 'var') || isempty(z0)
        z0 = 1;
    end
    if ~exist('zmax', 'var') || isempty(zmax)
        zmax = 1;
    end
    
    assert(z0 == 1 || z0 == -1, 'z0 must be +1 or -1.');
    assert(zmax == 1 || zmax == -1, 'zmax must be +1 or -1.');

    if even_n
        M.name = @() sprintf('Phases of fft''s of real signals of length %d (z0 = %d, zmax = %d)', n, z0, zmax);
    else
        M.name = @() sprintf('Phases of fft''s of real signals of length %d (z0 = %d)', n, z0);
    end
    
    M.dim = @() floor((n-1)/2);
    
    M.inner = @(z, v, w) real(v'*w);
    
    M.norm = @(z, u) norm(u);
    
    M.dist = @(z1, z2) norm(real(2*asin(.5*abs(z1 - z2))));
    
    M.typicaldist = @() pi*sqrt(n/2);
    
    % Special functions to ease working with the symmetries.
    down = @(u) u;
    up = @(u) u([1 ; (n:-1:2)']);
    downup = @(u) (down(u) + conj(up(u)))/2;
    M.down = down;
    M.up = up;
    M.downup = downup;
    
    M.proj = @proj;
    function pu = proj(z, u)
        duu = downup(u);
        pu = duu - real(duu .* conj(z)).*z;
        % Note that there is no need to enforce pu(1) = 0 or (if n is even)
        % pu(1+n/2) = 0 manually, since the IEEE standard ensures that the
        % above operation will be exact for those entries provided z(1)
        % (and possibly z(1+n/2) is +1 or -1, as should be the case.
    end
    
    M.tangent = M.proj;
    
    % For Riemannian submanifolds, converting a Euclidean gradient into a
    % Riemannian gradient amounts to an orthogonal projection.
	M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(z, egrad, ehess, zdot)
        rhess = M.proj(z, ehess - real(downup(egrad) .* conj(z)).*zdot);
    end
    
    M.exp = @exponential;
    function y = exponential(z, v, t)
        
        if nargin == 2
            % t = 1;
            tv = v;
        else
            tv = t*v;
        end

        y = zeros(n, 1);

        nrm_tv = abs(tv);
        
        % We need to be careful for zero steps.
        mask = (nrm_tv > 0);
        y(mask) = z(mask).*cos(nrm_tv(mask)) + ...
                  tv(mask).*(sin(nrm_tv(mask))./nrm_tv(mask));
        y(~mask) = z(~mask);
        
    end
    
    M.retr = @retraction;
    function y = retraction(z, v, t)
        if nargin == 2
            % t = 1;
            tv = v;
        else
            tv = t*v;
        end
        y = sign(z+tv);
    end

    M.log = @logarithm;
    function v = logarithm(x1, x2)
        v = M.proj(x1, x2 - x1);
        di = real(2*asin(.5*abs(x1 - x2)));
        nv = abs(v);
        factors = di ./ nv;
        factors(di <= 1e-6) = 1;
		v = v .* factors;
    end
    
    M.hash = @(z) ['z' hashmd5( [real(z(:)) ; imag(z(:))] ) ];
    
    M.rand = @random;
    function z = random()
        z = sign(downup(randn(n, 1) + 1i*randn(n, 1)));
        z(1) = z0;
        if even_n
            z(1 + n/2) = zmax;
        end
    end
    
    M.randvec = @randomvec;
    function v = randomvec(z)
        v = M.proj(z, randn(n, 1) + 1i*randn(n, 1));
        v = v / norm(v);
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(z) zeros(n, 1);
    
    M.transp = @(z1, z2, u) M.proj(z2, u);
    
    M.pairmean = @pairmean;
    function z = pairmean(z1, z2)
        z = sign(z1+z2);
    end

    % This vec/mat pair is an isometry which allows to switch between the
    % classical representation of tangent vectors---as complex vectors of
    % length n---to real vectors of length M.dim() whose entries are the
    % coordinates of the tangent vector in the basis 1i*z, for the first
    % half. A scaling of sqrt(2) is applied to ensure isometry, since
    % tangent vectors are represented with only half of their entries.
    I = 2 : floor((n+1)/2);
    if even_n
        middle = 0;
    else
        middle = [];
    end
    M.vec = @(z, u_mat) sqrt(2)*real(u_mat(I) .* conj(1i*z(I)));
    M.mat = @(z, u_vec) [0 ; u_vec.*(1i*z(I)) ; middle ; ...
                             flipud(conj(u_vec.*(1i*z(I))))]/sqrt(2);
    M.vecmatareisometries = @() true;
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/README_Essential.txt
================================================
# A Riemannian quotient representation for the essential manifold
Contributed by Roberto Tron.

The essential matrix is a 3x3 matrix that encodes the epipolar
constraint between the homogeneous coordinates of the projection of a
common 3-D point in two cameras. Not all the 3x3 matrices are essential matrices, as
these need to encode the relative pose of the two cameras (up to a
global scaling). The space of valid essential matrices can be endowed
with a Riemannian structure by following the derivations presented in:

  R. Tron, K. Daniilidis,
  "The Space of Essential Matrices as a Riemannian Quotient Manifold -
  Geometric Interpretation and Optimization Algorithms" 
  International Journal of Computer Vision, (submitted).

This work shows that the essential manifold can be seen as a quotient
manifold of $SO(3) \times SO(3)$, where $SO(3)$ is the manifold of 3-D
rotations. In Matlab, we represents k points on the essential
manifold as array of dimension $[3 \times 6 \times k]$, where each $[3 \times 3]$
sub matrix is a 3-D rotation.

The implementation provides both the "signed" and "unsigned" version
of the manifold presented in the paper. The only difference between
the two is in how the logarithm, and hence the distance, are
computed. In the signed version, the points related by the twisted pair
ambiguity are considered as distinct; in practice, this is the case when the
cheirality constraint is used to remove the ambiguity. In the unsigned
version, points related by the twisted pair ambiguity belong to the
same class; in practice, it means that they all produce equivalent
epipolar constraints. See the paper for details.

Factory call:
M=essentialfactory(k,signature).
By default, k equals 1. The string signature should be set to "signed"
(resp. "unsigned") to use the signed (resp. unsigned) version of the
manifold. By default, signature equals "signed".

See the paper for the definition of the set and tangent spaces.

Note: following the representation of tangent vectors for SO(3) in
MANOPT, tangent vectors for the essential manifold are represented as
$[3 \times 6 \times k]$ matrices, where each $[3 \times 3]$
sub matrix is skew-symmetric. The real tangent vector in the ambient
space is obtained by multiplying on the left each $[3 \times 3]$
skew-symmetric matrix with the corresponding rotation from the base
point.

## Toolset
The following list contains some of the nontrivial available functions
in the structure M.

- Dimension
M.dim()
$\dim M=5k$

- Metric
M.inner(X,S,T)
$\langle U, V \rangle = \sum_{i=1}^k trace(S_i^TT_i)$, where S and T
are representation of two tangent vectors at X.

- Norm
M.norm(X,S)
$\norm{U}=\sqrt{\langle U, U \rangle}$

- Distance
M.dist(X,Y)
$\dist(X,Y)=\sqrt(\sum_{i=1}^k \norm{\log(X_i,Y_i)}$, see M.log(X,Y)
below

- Typical distance
M.typicaldist()
\pi\sqrt{k}

- Vertical tangent space projector
M.vertproj(X,H)
Projects a point in the ambient space onto the vertical space at
X. See the paper for details. Note that this operation returns an
array containing skew-symmetric matrices.

- Tangent space projector
M.proj(X,H)
Projects a point in the ambient space onto the horizontal space at
X. See the paper for details. Note that this operation returns an
array containing skew-symmetric matrices.

- Tangent space to ambient space
M.tangent2ambient(X,S)
Computes a matrix H where H(1:3,:,i)=X(1:3,:,i)*S(1:3,:,i) and
H(4:6,:,i)=X(4:6,:,i)*S(4:6,:,i). This function is necessary because
the proj operator takes as input an ambient vector and returns a
tangent vector. To apply the proj again to the result (which should
change nothing), it is necessary to first represent the tangent vector
obtained as an ambient vector. This function is here because of formal
peculiarities and is likely to disappear at some point. 

- Essential matrix
M.E(X)
Returns the 3\times 3 essential matrix corresponding to the point on
the manifold X.

- Tangent of the essential matrix
M.dE(X,S)
Returns the matrix $\dot{E}$ obtained from a point X moving on a curve
with tangent S. Mathematically, this is the push-forward of S through
the mapping M.E(X)

- Double tangent of the essential matrix
M.ddE(X,S)
Returns the matrix $\ddot{E}$ obtained from a point X moving on a
*geodesic* curve (i.e., with zero acceleration) with tangent S. 
Mathematically, this is the push-forward of S through the mapping M.dE(X,S)

- Euclidean to Riemannian function
M.ef2rf(X,ef)
Returns the value of ef evaluated at M.E(X). ef must be a function handle

- Euclidean gradient of a function of E to Euclidean gradient of a function of X
M.egradE2egrad(X,egradE)
Returns the Euclidean gradient (matrix of partial derivatives) in the
entries of X (taken as a $3 \times 6$ matrix) given the Euclidean gradient
of a function of E (which is a $3 \times 3$ matrix). egrad must be
a function handle for which egrad(E) returns the $3 \times 3$
Euclidean gradient of a function evaluated at the essential matrix E=M.E(X)
Note: this function uses a different convention than egrad2rgrad for
other manifolds. In this case egradE is a function handle, while in the
other cases egrad is a matrix.

- Euclidean to Riemannian gradient
M.egrad2rgrad(X,egrad)
Returns the Riemannian gradient (a tangent vector at X) corresponding
to the Euclidean gradient of a function of X taken as a matrix. egrad must be
a function handle for which egrad(X) returns the $3 \times 6$
Euclidean gradient of a function evaluated at the point X.
Note: this function uses a different convention than egrad2rgrad for
other manifolds. In this case egrad is a function handle, while in the
other cases egrad is a matrix.

- Euclidean gradient of a function of E to Riemannian gradient
M.egradE2rgrad(X,egradE)
This function is the combination of M.egradE2egrad and
M.egrad2rgrad. See the respective comments for more information.

- Euclidean Hessian of a function of E to to Euclidean Hessian of a function of X 
M.ehessE2ehess(X,egradE, ehessE, V)
Returns the Euclidean Hessian (operator given by second order partial
derivatives) in the entries of X (taken as a $3 \times 6$ matrix)
evaluated in the direction V (which represents a direction in the
ambient space) given the Euclidean Hessian operator of a function of E
(which is a $3 \times 3$ matrix). ehessE must be a function handle for which
egrad(E,dE) returns the $3 \times 3$ Euclidean Hessian of a function
evaluated at the essential matrix E for the tangent vector dE. See
also M.egradE2egrad.

- Euclidean to Riemannian Hessian 
M.ehessE2rhess(X,egrad, ehess, V)
This function is the combination of M.ehessE2ehess and
M.ehess2rhess. See the respective comments for more information.

- Exponential map
M.exp(X,S,t)
Returns the point obtained by following the normal geodesic starting from X
with tangent S for a length t. This function does not check that S is
horizontal: it simply applies the exponential map on each copy of
SO(3)

- Logarithm map
M.log(X,Y)
The inverse of the exponential map. It is guaranteed to correspond to
the horizontal vector pointing in the direction of the shortest
geodesic from X to Y.

- Transport
M.transp(X1,X2,S1)
Transport a vector from the tangent space of X1 to the tangent space
of X2, using left translations in SO(3)^2. This transport preserves
the length of the vectors.

- Distance
M.dist(X,Y)
$\dist(X,Y)=\|\log(X,Y)\|$
Compute the shortest geodesic distance between X and Y. 

- Pair mean
M.pairmean(X,Y)
Mid-point of the shortest geodesic between X and Y.


## Example

The file essential_svd.m contains an example of the use of the
essential manifold in MANOPT. It first builds random essential
matrices A_i, i=1,..,k. It then tries to find matrices E_i, i=1,...,k
which minimize

\sum_{i=1}^k \frac{1}{2}\|E_i-A_i\|^2.

The i-th component of the Euclidean gradient is simply E_i-A_i and the
Hessian operator is the identity.

This problem is trivial, as the cost function is separable in each i
and the solution is simply E_i=A_i. However, this example shows
how to define the gradient and hessian of the cost function with k>1
and shows that indeed the optimization procedure converges
to the expected minimizer.

## Files
With respect to a vanilla installation of MANOPT, the implementation
of the essential manifold adds the following files and directories

manopt/manifolds/essential
examples/essential_svd.m


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_costE2cost.m
================================================
function val = essential_costE2cost(X, costE)
% Cost evaluation at X given function handle in the Essential matrix E.
%
% function val = essential_costE2cost(X, costE)
%
% costE is the function handle for the cost function in E.
%
% See also: essential_egradE2egrad essential_ehessE2ehess

% This file is part of Manopt: www.manopt.org.
% Original author: Roberto Tron, Aug. 8, 2014
% Contributors: Bamdev Mishra, May 22, 2015.

    e3hat = [0 -1 0; 1 0 0; 0 0 0];
    
    RA = X(:,1:3,:); 
    RB = X(:,4:6,:); 
    E = multiprod(multiprod(multitransp(RA), e3hat), RB); 
    
    val = costE(E);
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_egradE2egrad.m
================================================
function egrad = essential_egradE2egrad(X, egradE)
% Converts the gradient in essential matrix E to the gradient in X.
%
% function egrad = essential_egradE2egrad(X, egradE)
%
% egradE is the function handle for the gradient in E.
% 
% The output is a matrix in the space of X.
%
% See also: essential_costE2cost essential_ehessE2ehess


% This file is part of Manopt: www.manopt.org.
% Original author: Roberto Tron, Aug. 8, 2014
% Contributors: Bamdev Mishra, May 22, 2015.

    e3hat = [0 -1 0; 1 0 0; 0 0 0];
    RA = X(:,1:3,:); 
    RB = X(:,4:6,:);
    E = multiprod(multiprod(multitransp(RA), e3hat), RB); 
    G =  egradE(E); 
    
    %The following is the vectorized version of egrad = e3hat*[RB*G' -RA*G];
    egrad = multiprod(e3hat, cat(2,...
        multiprod(RB, multitransp(G)),...
        -multiprod(RA, G)));
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_ehessE2ehess.m
================================================
function ehess = essential_ehessE2ehess(X, egradE, ehessE, S)
% Converts the Hessian in essential matrix E to the Hessian in X.
%
% function ehess = essential_ehessE2ehess(X, egradE, ehessE, S)
%
% egradE is the function handle for the gradient in E.
% ehessE is the function handle for the Hessian in E.
% S is the search direction in the space of X.
%
% The output is a matrix in the space of X.
%
% See also: essential_costE2cost essential_egradE2egrad


% This file is part of Manopt: www.manopt.org.
% Original author: Roberto Tron, Aug. 8, 2014
% Contributors: Bamdev Mishra, May 22, 2015.
   
   e3hat = [0 -1 0; 1 0 0; 0 0 0];
    
    RA = X(:,1:3,:); 
    RB = X(:,4:6,:);
    E = multiprod(multiprod(multitransp(RA), e3hat), RB); % M.E(X);
    G =  egradE(E); 
    
    V = essential_sharp(multiprod(essential_flat(X), essential_flat(S)));
    VA = V(:,1:3,:);
    VB = V(:,4:6,:);
    
    dE = multiprod(multiprod(multitransp(RA), e3hat), VB)...
        + multiprod(multiprod(multitransp(VA), e3hat), RB);
    dG = ehessE(E, dE);
    
    %The following is the vectorized version of ehess = e3hat*[(VB*G'+RB*H') -(VA*G+RA*H)]
    ehess = multiprod(e3hat,cat(2,...
        multiprod(VB, multitransp(G)) + multiprod(RB, multitransp(dG)),...
            -multiprod(VA, G) - multiprod(RA, dG)));
    
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_flat.m
================================================
function Hp = essential_flat(H)
    %Reshape a [3x6xk] matrix to a [3x3x2k] matrix
    Hp = reshape(H,3,3,[]);
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_hat3.m
================================================
%Compute the matrix representation of the cross product
%function [V,vShift] = essential_hat3(v)
%V is a [3x3xN] array of skew-symmetric matrices where each [3x3] block is
%the matrix representation of the cross product of one of the columns of v
%vShift is equal to permute(v,[1 3 2]).
function [V, vShift] = essential_hat3(v)
    N = size(v,2);
    V = zeros(3,3,N);
    vShift = permute(v,[1 3 2]);
    V(1,2,:) = -vShift(3,:,:);
    V(2,1,:) = vShift(3,:,:);
    V(1,3,:) = vShift(2,:,:);
    V(3,1,:) = -vShift(2,:,:);
    V(2,3,:) = -vShift(1,:,:);
    V(3,2,:) = vShift(1,:,:);
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essential_sharp.m
================================================
function H = essential_sharp(Hp)
    %Reshape a [3x3x2k] matrix to a [3x6xk] matrix
    H = reshape(Hp,3,6,[]);
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/essentialfactory.m
================================================
function M = essentialfactory(k, strSigned)
% Manifold structure to optimize over the space of essential matrices.
%
% function M = essentialfactory(k)
% function M = essentialfactory(k, 'signed')
% function M = essentialfactory(k, 'unsigned')
%
%
% Quotient representation of the essential manifold: deals with the
% representation of the space of essential matrices M_rE. These are used in
% computer vision to represent the epipolar constraint between projected
% points in two perspective views.
%
% The space is represented as the quotient (SO(3)^2/SO(2)).
% See the following references for details:
%
%   R. Tron, K. Daniilidis,
%   "On the quotient representation of the essential manifold"
%   IEEE Conference on Computer Vision and Pattern Recognition, 2014
%
% For computational purposes, each essential matrix is represented as a
% [3x6] matrix where each [3x3] block is a rotation.
%
% The metric used is the one induced by the submersion of M_rE in SO(3)^2.
%
% Tangent vectors are represented in the Lie algebra of SO(3)^2, i.e., as
% [3x6] matrices where each [3x3] block is a skew-symmetric matrix.
% Use the function tangent2ambient(X, H) to switch from the Lie algebra
% representation to the embedding space representation in R^(3x6).
%
% By default, k = 1, and the geometry is 'signed'.
%
% Optional arguments:
%   "signed"    selects the signed version of the manifold
%   "unsigned"  selects the unsigned version of the manifold
%
% See also rotationsfactory

% Please cite the Manopt paper as well as the research paper:
%     @InProceedings{tron2014essential,
%       Title        = {On the quotient representation of the essential manifold},
%       Author       = {Tron, R. and Daniilidis, K.},
%       Booktitle    = {IEEE Conference on Computer Vision and Pattern Recognition},
%       Year         = {2014},
%       Organization = {{IEEE CVPR}}
%     }


% This file is part of Manopt: www.manopt.org.
% Original author: Roberto Tron, Aug. 8, 2014
% Contributors: Bamdev Mishra, May 15, 2015.
%
%
% RT: General implementation note: to streamline component-wise
% computations, in tangentProjection and exponential,
% we flatten out the arguments into [3 x 3 x 2K] arrays, compute the
% components all together, and then sharp the result again into [3 x 6 x K]
% arrays.


    % Optional parameters to switch between the signed and unsigned
    % versions of the manifold.
    if ~exist('strSigned', 'var') || isempty(strSigned)
        strSigned = 'signed';
    end
    switch(strSigned)
        case 'signed'
            flagSigned = true;
        case 'unsigned'
            flagSigned = false;
        otherwise
            error('Second argument can be either empty, ''signed'', or ''unsigned''.');
    end

    
    if ~exist('k', 'var') || isempty(k)
        k = 1;
    end
    
    if k == 1
        M.name = @() sprintf('Quotient representation of the essential manifold, %s', strSigned);
    elseif k > 1 && k == round(k)
        M.name = @() sprintf('Product of %d quotient representations of the essential manifold, %s', k, strSigned);
    else
        error('k must be an integer no less than 1.');
    end
    
    M.dim = @() k*5;
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d(:));
    
    M.typicaldist = @() pi*sqrt(2*k);
    
    M.proj = @tangentProjection;
    function HProjHoriz=tangentProjection(X,H)
        % Project H on the tangent space of SO(3)^2
        HProj = essential_sharp(multiskew(multiprod(multitransp(essential_flat(X)), essential_flat(H))));
        
        % Compute projection on vertical component
        p = vertproj(X, HProj);
        
        HProjHoriz = HProj - multiprod(p/2,[essential_hat3(permute(X(3,1:3,:),[2 3 1])) essential_hat3(permute(X(3,4:6,:),[2 3 1]))]);% BM: okay
    end
    
    
    M.tangent = @(X, H) essential_sharp(multiskew(essential_flat(H)));
    
    M.egrad2rgrad=@egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad)
        rgrad = M.proj(X, egrad);
    end
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, S)
        % Reminder: S contains skew-symmeric matrices. The actual
        % direction that the point X is moved along is X*S.
        RA = p1(X);
        RB = p2(X);
        SA = p1(S);
        SB = p2(S);
        
        G = egrad; 
        GA = p1(G);
        GB = p2(G);
        
        H = ehess; 
        
        % RT: We now compute the connection, i.e. the part of the derivative
        % given by the curvature of the space (as opposed to a simple
        % Euclidean derivative).
        
        % The following is the vectorized version of connection=-[multisym(GA'*RA)*SA multisym(GB'*RB)*SB];
        connection = tangent2ambient(X,-cat(2,...
            multiprod(multisym(multiprod(multitransp(GA), RA)), SA),...
            multiprod(multisym(multiprod(multitransp(GB), RB)), SB)));
        rhess = M.proj(X,H + connection);
    end
    
    
    M.exp = @exponential;
    function Y = exponential(X, U, t)
        if nargin == 3
            U = t*U;
        end
        
        UFlat = essential_flat(U);
        exptUFlat = rot3_exp(UFlat);
        Y = essential_sharp(multiprod(essential_flat(X), exptUFlat));
    end
    
    M.retr = @exponential;
    
    M.log = @logarithm; 
    function U = logarithm(X, Y)
        
        QX = [X(:,1:3,:);X(:,4:6,:)];
        QY = [Y(:,1:3,:);Y(:,4:6,:)];
        QYr = essential_closestRepresentative(QX,QY,'flagSigned',flagSigned);
        Yr = [QYr(1:3,:,:) QYr(4:6,:,:)];
        U = zeros(size(X));
        U(:,1:3,:) = rot3_log(multiprod(multitransp(X(:,1:3,:)),Yr(:,1:3,:)));
        U(:,4:6,:) = rot3_log(multiprod(multitransp(X(:,4:6,:)),Yr(:,4:6,:)));
    end
    
    M.hash = @(X) ['z' hashmd5(X(:))];
    
    M.rand = @() randessential(k);
    function Q = randessential(N)
        % Generates random essential matrices.
        %
        % function Q = randessential(N)
        %
        % Q is a [3x6] matrix where each [3x3] block is a uniformly distributed
        % matrix.
        
        % This file is part of Manopt: www.manopt.org.
        % Original author: Roberto Tron, Aug. 8, 2014
        % Contributors:
        % Change log:
        
        if nargin < 1
            N = 1;
        end
        
        Q = [randrot(3,N) randrot(3,N)];
    end
    
    M.randvec = @randomvec;
    function U = randomvec(X)
        U = tangentProjection(X, essential_sharp(randskew(3, 2*k)));
        U = U / sqrt(M.inner([],U,U));
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(3, 6, k);
    
    M.transp = @transport;
    function S2 = transport(X1, X2, S1)
        % Transport a vector from the tangent space at X1 to the tangent
        % space at X2. This transport uses the left translation of the
        % ambient group and preserves the norm of S1. The left translation
        % aligns the vertical spaces at the two elements.
        
        % Group operation in the ambient group, X12=X2'*X1
        X12 = essential_sharp(multiprod(multitransp(essential_flat(X2)),essential_flat(X1)));
        X12Flat = essential_flat(X12);
        
        % Left translation, S2=X12*S*X12'
        S2 = essential_sharp(multiprod(X12Flat,multiprod(essential_flat(S1),multitransp(X12Flat))));
    end
    
    M.pairmean = @pairmean;
    function Y = pairmean(X1, X2)
        V = M.log(X1, X2);
        Y = M.exp(X1, .5*V);
    end
    
    M.dist = @(x, y) M.norm(x, M.log(x, y)); 
    
    M.vec = @(x, u_mat) u_mat(:);
    M.mat = @(x, u_vec) reshape(u_vec, [3, 6, k]);
    M.vecmatareisometries = @() true;
    
    
    p1 = @(X) X(:,1:3,:);
    p2 = @(X) X(:,4:6,:);
    
    
    vertproj = @(X,H) multiprod(X(3,1:3,:),permute(vee3(H(:,1:3,:)),[1 3 2]))+multiprod(X(3,4:6,:),permute(vee3(H(:,4:6,:)),[1 3 2]));
    
    tangent2ambient = @(X, H) essential_sharp(multiprod(essential_flat(X), essential_flat(H)));
    
    
end


%% Some functions used by the essential factory

function v = vee3(V)
    v = squeeze([V(3,2,:)-V(2,3,:); V(1,3,:)-V(3,1,:); V(2,1,:)-V(1,2,:)])/2;
end


% Compute the exponential map in SO(3) using Rodrigues' formula
%  function R = rot3_exp(V)
% V must be a [3x3xN] array of [3x3] skew-symmetric matrices.
function R = rot3_exp(V)
    v = vee3(V);
    nv = cnorm(v);
    idxZero = nv < 1e-15;
    nvMod = nv;
    nvMod(idxZero) = 1;
    
    vNorm = v./([1;1;1]*nvMod);
    
    % Matrix exponential using Rodrigues' formula
    nv = shiftdim(nv,-1);
    c = cos(nv);
    s = sin(nv);
    [VNorm,vNormShift] = essential_hat3(vNorm);
    vNormvNormT = multiprod(vNormShift,multitransp(vNormShift));
    R=multiprod(eye(3),c)+multiprod(VNorm,s)+multiprod(vNormvNormT,1-c);
end


% Compute the logarithm map in SO(3)
%  function V = rot3_log(R)
% V is a [3x3xN] array of [3x3] skew-symmetric matrices
function V = rot3_log(R)
    skewR = multiskew(R);
    ctheta = (multitrace(R)'-1)/2;
    stheta = cnorm(vee3(skewR));
    theta = atan2(stheta,ctheta);
    
    V=skewR;
    for ik=1:size(R,3)
        V(:,:,ik)=V(:,:,ik)/sincN(theta(ik));
    end
end


function sx = sincN(x)
    sx = sin(x)./x;
    sx(x==0) = 1;
end

function nv = cnorm(v)
    nv = sqrt(sum(v.^2));
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_closestRepresentative.m
================================================
function Q2r=essential_closestRepresentative(Q1,Q2,varargin)
[tMin,~,Q2]=essential_distMinAngle(Q1,Q2,varargin{:});
NQ1=size(Q1,3);
NQ2=size(Q2,3);

if NQ1>1 && NQ2==1
    Q2=repmat(Q2,[1 1 NQ1]);
end
NQ=max(NQ1,NQ2);

Q2r=zeros(size(Q2));
for iQ=1:NQ
    t=tMin(iQ);
    Rz=[cos(t) -sin(t) 0; sin(t) cos(t) 0; 0 0 1];
    Q2r(1:3,1:3,iQ)=Rz*Q2(1:3,1:3,iQ);
    Q2r(4:6,1:3,iQ)=Rz*Q2(4:6,1:3,iQ);
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAngle.m
================================================
function [tMin,fMin,Q2Flip,output]=essential_distMinAngle(Q1,Q2,varargin)
NQ1=size(Q1,3);
NQ2=size(Q2,3);

if NQ1==1 && NQ2>1
    Q1=repmat(Q1,[1 1 NQ2]);
    NQ1=NQ2;
end
if NQ1>1 && NQ2==1
    Q2=repmat(Q2,[1 1 NQ1]);
end

if NQ1>1
    tMin=zeros(NQ1,1);
    fMin=zeros(NQ1,1);
    Q2Flip=zeros(6,3,NQ1);
    if nargout>3
        output=repmat(struct('tMin',[],'fMin',[],'tBreak1',[],'tBreak2',[]),NQ1,1);
    end
    for iQ=1:NQ1
        if nargout>3
            [tMin(iQ),fMin(iQ),Q2Flip(:,:,iQ),output(iQ)]=...
                essential_distMinAngle(Q1(:,:,iQ),Q2(:,:,iQ),varargin{:});
        else
            [tMin(iQ),fMin(iQ),Q2Flip(:,:,iQ)]=...
                essential_distMinAngle(Q1(:,:,iQ),Q2(:,:,iQ),varargin{:});
        end
    end
else
    flagModTMin=false;
    flagSigned=false;

    %optional parameters
    ivarargin=1;
    while(ivarargin<=length(varargin))
        switch(lower(varargin{ivarargin}))
            case 'flagmodtmin'
                ivarargin=ivarargin+1;
                flagModTMin=varargin{ivarargin};
            case 'signed'
                flagSigned=true;
            case 'flagsigned'
                ivarargin=ivarargin+1;
                flagSigned=varargin{ivarargin};
            otherwise
                    error(['Argument ' varargin{ivarargin} ' not valid!'])
        end
        ivarargin=ivarargin+1;
    end

    tMin=zeros(4,1);
    fMin=zeros(4,1);
    tBreak1=zeros(4,1);
    tBreak2=zeros(4,1);
    Q2Flip=zeros(6,3,4);
    if ~flagSigned
        for k=1:4
            [tMin(k),fMin(k),tBreak1(k),tBreak2(k),Q2Flip(:,:,k)]=...
                essential_distMinAnglePair(Q1,Q2,k);
        end
    else
        [tMin,fMin,tBreak1,tBreak2,Q2Flip]=...
            essential_distMinAnglePair(Q1,Q2,1);
    end    

    if flagModTMin
        tMin=modAngle(tMin);
    end

    if nargout>3
        output.tMin=tMin;
        output.fMin=fMin;
        output.tBreak1=tBreak1;
        output.tBreak2=tBreak2;
    end

    if ~flagSigned
        [fMin,idxMin]=min(fMin);
        fMin=max(fMin,0);
        tMin=tMin(idxMin);
        Q2Flip=Q2Flip(:,:,idxMin);
        if nargout>3
            output.idxMin=idxMin;
        end
    end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAnglePair.m
================================================
function [tMin,fMin,tBreak1,tBreak2,Q2,tMinAll]=essential_distMinAnglePair(Q1,Q2,kFlip)

switch kFlip
    case 1
        %nothing to do
    case 2
        Q2([2 3 4 6],:)=-Q2([2 3 4 6],:);
    case 3
        Q2([4 5],:)=-Q2([4 5],:);
    case 4
        Q2([2 3 5 6],:)=-Q2([2 3 5 6],:);
    otherwise
        error('Value of kFlip invalid')
end

Q11=Q1(1:3,:);
Q12=Q1(4:6,:);
Q21=Q2(1:3,:);
Q22=Q2(4:6,:);

Q211=Q21*Q11';
Q212=Q22*Q12';
[tMin,fMin,tBreak1,tBreak2,tMinAll]=essential_distMinAnglePair_base(Q211,Q212);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAnglePair_base.m
================================================
function [tMin,fMin,tBreak1,tBreak2,tMinAll]=essential_distMinAnglePair_base(Q211,Q212)
flagCheckFirstDer=true;
flagUseNewton=true;     %Note: requires flagCheckFirstDer=true
tolMZero=1e-15;
tMinAll=[];

[tBreak1,~,~,c1,m1,p1]=essential_distMinAnglePair_discontinuityDistance(Q211);
[tBreak2,~,~,c2,m2,p2]=essential_distMinAnglePair_discontinuityDistance(Q212);

%check for the degenerate case where the cost is constant
if abs(m1)<tolMZero && abs(m2)<tolMZero
    tMin=0;
    fMin=2*pi^2;
    tMinAll=0;
else
    %ft=@(t)  acos((m1*sin(t+p1)+c1-1)/2)^2+acos((m2*sin(t+p2)+c2-1)/2)^2;

    if abs(modAngle(tBreak1-tBreak2))<1e-8
        tMin=tBreak1+pi;
        fMin=0;
%         theta1=@(t) acos((m1*sin(t+p1)+c1-1)/2);
%         theta2=@(t) acos((m2*sin(t+p2)+c2-1)/2);
% 
%         ft=@(t) 0.5*(theta1(t)^2+theta2(t)^2);
%         [tMin,fMin]=fminbnd(ft,tBreak1,tBreak1+2*pi);
    else
        tSearch1=tBreak1;
        tSearch2=tBreak2;
        if tSearch1>tSearch2
            tSearch1=tSearch1-2*pi;
        end

        if flagCheckFirstDer
            %compute derivatives of each term at discontinuity points
            df1Break1=essential_distMinAnglePair_computeDfBreak(tBreak1,Q211);
            df2Break2=essential_distMinAnglePair_computeDfBreak(tBreak2,Q212);
%             disp('[df1Break1 df2Break2]')
%             disp([df1Break1 df2Break2])
            %compute derivative of each term at other's discontinuity
            %(unroll two calls to dfi)
            theta1Break2=acos(clip((m1*sin(tBreak2+p1)+c1-1)/2));
            df1Break2=-theta1Break2*(m1*cos(tBreak2+p1))/(2*sin(theta1Break2));
            theta2Break1=acos(clip((m2*sin(tBreak1+p2)+c2-1)/2));
            df2Break1=-theta2Break1*(m2*cos(tBreak1+p2))/(2*sin(theta2Break1));

            %compute left and right derivatives of sum of the two terms
            dfBreak1n=+df1Break1+df2Break1;
            dfBreak1p=-df1Break1+df2Break1;
            dfBreak2n=+df2Break2+df1Break2;
            dfBreak2p=-df2Break2+df1Break2;

            flagSearch1=false;
        %     plot([tBreak1 tBreak2],[dfBreak1p dfBreak2p],'cx','MarkerSize',10)
        %     plot([tBreak1 tBreak2],[dfBreak1n dfBreak2n],'mx','MarkerSize',10)
            if sign(dfBreak1p)~=sign(dfBreak2n)
                if flagUseNewton
                    %parabolic prediction of min
                    tMin0=tSearch1-dfBreak1p*(tSearch2-tSearch1)/(dfBreak2n-dfBreak1p);
                    %tMin0=(tSearch1+tSearch2)/2;
                    [tMin,fMin]=essential_distMinAnglePair_dfNewton(m1,p1,c1,m2,p2,c2,tMin0,tSearch1,tSearch2);
                    %fMin=essential_distMinAnglePair_ft(m1,p1,c1,m2,p2,c2,tMin);
                else
                    [tMin,fMin]=fminbnd(essential_distMinAnglePair_ft,tSearch1,tSearch2);
                end
                tMinAll=[tMinAll tMin];
                flagSearch1=true;
            end
            tSearch1=tSearch1+2*pi;
            if sign(dfBreak2p)~=sign(dfBreak1n)
                if flagUseNewton
                    %parabolic prediction of min
                    tMin0=tSearch2-dfBreak2p*(tSearch1-tSearch2)/(dfBreak1n-dfBreak2p);
                    %tMin0=(tSearch1+tSearch2)/2;
                    [tMin2,fMin2]=essential_distMinAnglePair_dfNewton(m1,p1,c1,m2,p2,c2,tMin0,tSearch2,tSearch1);
                    %fMin2=essential_distMinAnglePair_ft(m1,p1,c1,m2,p2,c2,tMin2);
                else
                    [tMin2,fMin2]=fminbnd(essential_distMinAnglePair_ft,tSearch2,tSearch1);
                end
                if ~flagSearch1 || (flagSearch1 && fMin2<fMin)
                    tMin=tMin2;
                    fMin=fMin2;
                end
                tMinAll=[tMinAll tMin2];
            end
        else
            [tMin1,fMin1]=fminbnd(essential_distMinAnglePair_ft,tSearch1,tSearch2);
            tSearch1=tSearch1+2*pi;
            [tMin2,fMin2]=fminbnd(essential_distMinAnglePair_ft,tSearch2,tSearch1);
            if fMin1<fMin2
                tMin=tMin1;
                fMin=fMin1;
            else
                tMin=tMin2;
                fMin=fMin2;
            end
        end
    end
end

function v=clip(v)
v=min(1,max(-1,v));


% function f=fi(m,p,c,t)
% f=acos((m*sin(t+p)+c-1)/2);
% 
% function d=dfi2(m,p,theta,t)
% dtheta= -(m*cos(t+p))/(2*sin(theta));
% d=theta*dtheta;
% 
% function dd=ddfi2(m,p,theta,t)
% eztuSq=(m*cos(t+p)/(2*sin(theta)))^2;
% dd=eztuSq+theta/2*cot(theta/2)*(1-eztuSq);
% 
% function d=dfi(m,p,c,t)
% theta=acos((m*sin(t+p)+c-1)/2);
% dtheta= -(m*cos(t+p))/(2*sin(theta));
% d=theta*dtheta;
% 
% function dd=ddfi(m,p,c,t)
% theta=acos((m*sin(t+p)+c-1)/2);
% eztuSq=(m*cos(t+p)/(2*sin(theta)))^2;
% dd=eztuSq+theta/2*cot(theta/2)*(1-eztuSq);


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAnglePair_computeDfBreak.m
================================================
function dfBreak=essential_distMinAnglePair_computeDfBreak(tBreak,Q21)
c=cos(tBreak);
s=sin(tBreak);

% The code below is an optimization exploiting the structure of RBreak to
% substitute the following code
%     RBreak=Q1'*[c -s 0; s c 0; 0 0 1]*Q2;
% 
%     %compute v0 such that RBreak=rot(pi*v0)
%     [U,~,~]=svd(RBreak+eye(3));
%     v0=U(:,1);
% 
%     dfBreak=pi*abs(Q1(3,:)*v0);

Q1RBreakQ1=[c -s 0; s c 0; 0 0 1]*Q21;
[U,~,~]=svd(Q1RBreakQ1+eye(3));
dfBreak=pi*abs(U(3,1));


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAnglePair_dfNewton.m
================================================
%Support function for essential_distMinAnglePair implementing Newton's search
function [tMin,fMin]=essential_distMinAnglePair_dfNewton(m1,p1,c1,m2,p2,c2,tMin,tLow,tHigh)
tolDist=1e-8;
for i=1:100
%     d=dfi(m1,p1,c1,tMin)+dfi(m2,p2,c2,tMin);
%     dd=ddfi(m1,p1,c1,tMin)+ddfi(m2,p2,c2,tMin);
    %The code below unrolls the following calls
    %     f1=fi(m1,p1,c1,tMin);
    %     f2=fi(m2,p2,c2,tMin);
    %     d=dfi2(m1,p1,f1,tMin)+dfi2(m2,p2,f2,tMin);
    %     dd=ddfi2(m1,p1,f1,tMin)+ddfi2(m2,p2,f2,tMin);
    mc1=m1*cos(tMin+p1);
    mc2=m2*cos(tMin+p2);
    f1=acos(clip((m1*sin(tMin+p1)+c1-1)/2));
    f2=acos(clip((m2*sin(tMin+p2)+c2-1)/2));
    sf1=2*sin(f1);
    sf2=2*sin(f2);
    d1=-f1*mc1/sf1;
    d2=-f2*mc2/sf2;
    d=d1+d2;
    eztuSq1=(mc1/sf1)^2;
    dd1=eztuSq1+f1/2*cot(f1/2)*(1-eztuSq1);
    eztuSq2=(mc2/sf2)^2;
    dd2=eztuSq2+f2/2*cot(f2/2)*(1-eztuSq2);
    dd=dd1+dd2;
        
            
    tOld=tMin;
    tMin=max(tLow+tolDist,min(tHigh-tolDist,tOld-d/dd));
    if abs(tMin-tOld)<tolDist
        break
    end
end
fMin=f1^2+f2^2;

function v=clip(v)
v=min(1,max(-1,v));


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAnglePair_discontinuityDistance.m
================================================
function [tBreak,a,b,c,m,p]=essential_distMinAnglePair_discontinuityDistance(Q21)
a=Q21(1,1)+Q21(2,2);
b=Q21(1,2)-Q21(2,1);
c=Q21(3,3);

m=norm([a;b]);
p=sign(a)*acos(clip(b/m));

%tBreak=modAngle(3/2*pi-p);
tBreak=-0.5*pi-p;

function v=clip(v)
v=min(1,max(-1,v));


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAnglePair_ft.m
================================================
%Evaluate cost function for closest representative search given coefficients
%function ft=essential_distMinAnglePair_ft(t,m1,p1,c1,m2,p2,c2)
%Evaluates the cost function used by essential_distMinAnglePair to find the
%closest representative in the equivalence class of a QREM
%If m2,p2,c2 are omitted or empty, get value of a single term
function ft=essential_distMinAnglePair_ft(t,m1,p1,c1,m2,p2,c2)
flagSingleTerm=false;
if ~exist('m2','var') || isempty(m2)
    flagSingleTerm=true;
end

if flagSingleTerm
    ft=acos((m1*sin(t+p1)+c1-1)/2)^2;
else
    ft=acos((m1*sin(t+p1)+c1-1)/2)^2+acos((m2*sin(t+p2)+c2-1)/2)^2;
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAnglePair_ftFromQ.m
================================================
function [ft,tBreak]=essential_distMinAnglePair_ftFromQ(t,Q1,Q2,varargin)
kFlip=1;
term='both';

ivarargin=1;
while(ivarargin<=length(varargin))
    switch(lower(varargin{ivarargin}))
        case 'kflip'
            ivarargin=ivarargin+1;
            kFlip=varargin{ivarargin};
        case 'term'
            ivarargin=ivarargin+1;
            term=lower(varargin{ivarargin});
        otherwise
            disp(varargin{ivarargin})
            error('Argument not valid!')
    end
    ivarargin=ivarargin+1;
end


Q2=essential_flipAmbiguity(Q2,kFlip);

tBreak=[];
ft=0;
if strcmp(term,'first') || strcmp(term,'both')
    Q11=essential_getR1(Q1);
    Q21=essential_getR1(Q2);
    Q211=Q21*Q11';
    [tBreak1,~,~,c1,m1,p1]=essential_distMinAnglePair_discontinuityDistance(Q211);
    tBreak=[tBreak tBreak1];
    ft=ft+essential_distMinAnglePair_ft(t,m1,p1,c1);
end

if strcmp(term,'second') || strcmp(term,'both')
    Q22=essential_getR2(Q2);
    Q12=essential_getR2(Q1);
    Q212=Q22*Q12';
    [tBreak2,~,~,c2,m2,p2]=essential_distMinAnglePair_discontinuityDistance(Q212);
    tBreak=[tBreak tBreak2];
    ft=ft+essential_distMinAnglePair_ft(t,m2,p2,c2);
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/essential_distMinAnglePair_test.m
================================================
function essential_distMinAnglePair_test
resetRands(3)
flagDegenerateCase=true;
k=2;

e3=[0;0;1];
Q1=rot_randn([],[],2);
if flagDegenerateCase
    Q1b=[Q1(:,:,1);Q1(:,:,2)];
    Q2b=essential_randomVerticalMotion(Q1b);
    Q2=cat(3,Q2b(1:3,:),Q2b(4:6,:));
else
    Q2=rot_randn([],[],2);
end
Rzt=@(t) rot(t*e3);

Q21tk=@(t,k) Rzt(t)*essential_flipAmbiguity_R1(Q2(:,:,1),k);
Q22tk=@(t,k) Rzt(t)*essential_flipAmbiguity_R2(Q2(:,:,2),k);

figure(1)
[tMin,fMin,tBreak1,tBreak2,Q2Flip]=essential_distMinAnglePair([Q1(:,:,1);Q1(:,:,2)],[Q2(:,:,1);Q2(:,:,2)],k);
tMin=modAngle(tMin);
ft=@(t) (rot_dist(Q1(:,:,1),Q21tk(t,k))^2+rot_dist(Q1(:,:,2),Q22tk(t,k))^2);
dft=@(t) 2*e3'*(Q1(:,:,1)*logrot(Q1(:,:,1)'*Q21tk(t,k))+Q1(:,:,2)*logrot(Q1(:,:,2)'*Q22tk(t,k)));
check_der(ft,dft,'angle')
hold on
plot(tBreak1,ft(tBreak1),'r+')
plot(tBreak2,ft(tBreak2),'g+')

plot(tMin,fMin,'kx','MarkerSize',20)

hold off


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/essential/privateessential/modAngle.m
================================================
%Maps any angle to the equivalent between -pi and pi
function a=modAngle(a)
a=mod(a+pi,2*pi)-pi;


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/euclidean/centeredmatrixfactory.m
================================================
function M = centeredmatrixfactory(m, n, rows_or_cols)
% Linear manifold struct. for optimization over matrices with centered cols
%
% function M = centeredmatrixfactory(m, n)
% function M = centeredmatrixfactory(m, n, 'cols')
% function M = centeredmatrixfactory(m, n, 'rows')
%
% Returns M, a structure for Manopt describing the Euclidean space of
% m-by-n matrices whose columns sum to zero (or whose rows sum to zero,
% if 'rows' is passed as last input).
%
% The metric is the standard Frobenius distance and associated trace inner
% product. Matrices on M, denoted by X, have size mxn and obey
% X*ones(n, 1) = 0 (centered columns) or ones(1, m)*X = 0 (centered rows).
%
% See also: euclideanfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 3, 2015.
% Contributors: 
% Change log: 
%
%   Jan. 6, 2017 (NB):
%       M.tangent = M.proj now, instead of being identity. This is notably
%       necessary so that checkgradient will pick up on gradients that do
%       not lie in the appropriate tangent space.

    if ~exist('rows_or_cols', 'var') || isempty(rows_or_cols)
        rows_or_cols = 'cols';
    end
    
    % Define a centering operator: it subtracts the mean column or row.
    switch lower(rows_or_cols)
        case 'cols'
            center = @(X) bsxfun(@minus, X, mean(X, 2));
            M.dim = @() m*n - m;
        case 'rows'
            center = @(X) bsxfun(@minus, X, mean(X, 1));
            M.dim = @() m*n - n;
        otherwise
            error('The third input must be either ''rows'' or ''cols''.');
    end
    
    % This is a non-standard function to have in a Manopt manifold.
    % It is included because it might be helpful in some situations.
    M.center = center;

    M.name = @() sprintf('Space of size %d x %d matrices with centered %s', ...
                         m, n, lower(rows_or_cols));
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d, 'fro');
    
    M.dist = @(x, y) norm(x-y, 'fro');
    
    M.typicaldist = @() sqrt(M.dim());
    
    M.proj = @(x, d) center(d);
    
    M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @(x, eg, eh, d) center(eh);
    
    M.tangent = M.proj;
    
    M.exp = @exp;
    function y = exp(x, d, t)
        if nargin == 3
            y = x + t*d;
        else
            y = x + d;
        end
    end
    
    M.retr = M.exp;
	
	M.log = @(x, y) y-x;

    M.hash = @(x) ['z' hashmd5(x(:))];
    
    M.randvec = @(X) randvec();
    function U = randvec()
        U = center(randn(m, n));
        U = U / norm(U, 'fro');
    end
    
    M.rand = @() center(randn(m, n));
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(m, n);
    
    M.transp = @(x1, x2, d) d;
    
    M.pairmean = @(x1, x2) .5*(x1+x2);
    
    M.vec = @(x, u_mat) u_mat(:);
    M.mat = @(x, u_vec) reshape(u_vec, [m, n]);
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/euclidean/euclideancomplexfactory.m
================================================
function M = euclideancomplexfactory(m, n)
% Returns a manifold struct to optimize over complex matrices.
%
% function M = euclideancomplexfactory(m)
% function M = euclideancomplexfactory(m, n)
% function M = euclideancomplexfactory([n1, n2, ...])
%
% Returns M, a structure describing the vector space of complex matrices,
% as a manifold for Manopt.
%
% The complex plane is here viewed as R^2. The inner product between two
% m-by-n matrices A and B is given by: real(trace(A'*B)). This choice
% guides the proper definition of gradient and Hessian for this geometry.
% This is not the classical Euclidean inner product for complex matrices;
% it is a real inner product.
%
% See also: euclideanfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 7, 2015.
% Contributors: 
% Change log: 
%
%   Jan. 25, 2017 (NB):
%       Added functionality to handle multidimensional arrays.

    % The size can be defined using both m and n, or simply with m.
    % If m is a scalar, then n is implicitly 1.
    % This mimics the use of built-in Matlab functions such as zeros(...).
    if ~exist('n', 'var') || isempty(n)
        if numel(m) == 1
            n = 1;
        else
            n = [];
        end
    end
    
    dimensions_vec = [m(:)', n(:)']; % We have a row vector.
    
    M.size = @() dimensions_vec;

    M.name = @() sprintf('Euclidean space C^(%s)', num2str(dimensions_vec));
    
    M.dim = @() 2*prod(dimensions_vec);
    
    M.inner = @(x, d1, d2) real(d1(:)'*d2(:));
    
    M.norm = @(x, d) norm(d(:), 'fro');
    
    M.dist = @(x, y) norm(x(:)-y(:), 'fro');
    
    M.typicaldist = @() sqrt(prod(dimensions_vec));
    
    M.proj = @(x, d) d;
    
    M.egrad2rgrad = @(x, g) g;
    
    M.ehess2rhess = @(x, eg, eh, d) eh;
    
    M.tangent = M.proj;
    
    M.exp = @exp;
    function y = exp(x, d, t)
        if nargin == 3
            y = x + t*d;
        else
            y = x + d;
        end
    end
    
    M.retr = M.exp;
	
	M.log = @(x, y) y-x;

    M.hash = @(x) ['z' hashmd5([real(x(:)) ; imag(x(:))])];
    
    M.rand = @() (randn(dimensions_vec) + 1i*randn(dimensions_vec))/sqrt(2);
    
    M.randvec = @randvec;
    function u = randvec(x) %#ok<INUSD>
        u = randn(dimensions_vec) + 1i*randn(dimensions_vec);
        u = u / norm(u(:), 'fro');
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(dimensions_vec);
    
    M.transp = @(x1, x2, d) d;
    
    M.pairmean = @(x1, x2) .5*(x1+x2);
    
    sz = prod(dimensions_vec);
    M.vec = @(x, u_mat) [real(u_mat(:)) ; imag(u_mat(:))];
    M.mat = @(x, u_vec) reshape(u_vec(1:sz), dimensions_vec) ...
                        + 1i*reshape(u_vec((sz+1):end), dimensions_vec);
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/euclidean/euclideanfactory.m
================================================
function M = euclideanfactory(m, n)
% Returns a manifold struct to optimize over real matrices.
%
% function M = euclideanfactory(m)
% function M = euclideanfactory(m, n)
% function M = euclideanfactory([n1, n2, ...])
%
% Returns M, a structure describing the Euclidean space of real matrices,
% equipped with the standard Frobenius distance and associated trace inner
% product, as a manifold for Manopt.
%
% m and n in general can be vectors to handle multidimensional arrays.
% If either of m or n is a vector, they are concatenated as [m, n].
%
% Using this simple linear manifold, Manopt can be used to solve standard
% unconstrained optimization problems, for example in replacement of
% Matlab's fminunc.
%
% See also: euclideancomplexfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: Bamdev Mishra, May 4, 2015.
% Change log: 
%
%   July 5, 2013 (NB):
%       Added egred2rgrad, ehess2rhess, mat, vec, tangent.
%   May 4, 2015 (BM):
%       Added functionality to handle multidimensional arrays.


    % The size can be defined using both m and n, or simply with m.
    % If m is a scalar, then n is implicitly 1.
    % This mimics the use of built-in Matlab functions such as zeros(...).
    if ~exist('n', 'var') || isempty(n)
        if numel(m) == 1
            n = 1;
        else
            n = [];
        end
    end
    
    dimensions_vec = [m(:)', n(:)']; % We have a row vector.
    
    M.size = @() dimensions_vec;
    
    M.name = @() sprintf('Euclidean space R^(%s)', num2str(dimensions_vec));
    
    M.dim = @() prod(dimensions_vec);
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d(:), 'fro');
    
    M.dist = @(x, y) norm(x(:) - y(:), 'fro');
    
    M.typicaldist = @() sqrt(prod(dimensions_vec));
    
    M.proj = @(x, d) d;
    
    M.egrad2rgrad = @(x, g) g;
    
    M.ehess2rhess = @(x, eg, eh, d) eh;
    
    M.tangent = M.proj;
    
    M.exp = @exp;
    function y = exp(x, d, t)
        if nargin == 3
            y = x + t*d;
        else
            y = x + d;
        end
    end
    
    M.retr = M.exp;
	
	M.log = @(x, y) y-x;

    M.hash = @(x) ['z' hashmd5(x(:))];
    
    M.rand = @() randn(dimensions_vec);
    
    M.randvec = @randvec;
    function u = randvec(x) %#ok<INUSD>
        u = randn(dimensions_vec);
        u = u / norm(u(:), 'fro');
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(dimensions_vec);
    
    M.transp = @(x1, x2, d) d;
    
    M.pairmean = @(x1, x2) .5*(x1+x2);
    
    M.vec = @(x, u_mat) u_mat(:);
    M.mat = @(x, u_vec) reshape(u_vec, dimensions_vec);
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/euclidean/shapefitfactory.m
================================================
function M = shapefitfactory(VJt)
% Linear manifold structure for optimization over the ShapeFit search space
%
% function M = shapefitfactory(VJt)
%
% Input: VJt is a matrix of size dxn, such that VJt * ones(n, 1) = 0.
%
% Returns M, a structure describing the Euclidean space of d-by-n matrices
% equipped with the standard Frobenius distance and associated trace inner
% product, as a manifold for Manopt. Matrices on M, denoted by T, have size
% dxn and obey T*ones(n, 1) = 0 (centered columns) and <VJt, T> = 1, where
% <A, B> = Trace(A' * B).
%
% See this paper: http://arxiv.org/abs/1506.01437
% ShapeFit: Exact location recovery from corrupted pairwise directions, 2015
% Paul Hand, Choongbum Lee, Vladislav Voroninski
%
% See also: shapefit_smoothed

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 18, 2015.
% Contributors: 
% Change log: 
%
%   Jan. 25, 2017 (NB):
%       M.tangent = M.proj now, instead of being identity. This is notably
%       necessary so that checkgradient will pick up on gradients that do
%       not lie in the appropriate tangent space.
    
    [d, n] = size(VJt);

    M.name = @() sprintf('ShapeFit space of size %d x %d', d, n);
    
    M.dim = @() d*n - d - 1;
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d, 'fro');
    
    M.dist = @(x, y) norm(x-y, 'fro');
    
    M.typicaldist = @() sqrt(d*n);
    
    M.proj = @(T, U) projection(U);
    VJt_normed = VJt / norm(VJt, 'fro');
    function PU = projection(U)
        % Center the columns
        PU = bsxfun(@minus, U, mean(U, 2));
        % Remove component along VJt
        % Note: these two actions can be executed separately, without
        % interference, owing to VJt having centered columns itself.
        PU = PU - (VJt_normed(:)'*U(:))*VJt_normed;
    end
    
    M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @(x, eg, eh, d) projection(eh);
    
    M.tangent = M.proj;
    
    M.exp = @exp;
    function y = exp(x, d, t)
        if nargin == 3
            y = x + t*d;
        else
            y = x + d;
        end
    end
    
    M.retr = M.exp;
	
	M.log = @(x, y) y-x;

    M.hash = @(x) ['z' hashmd5(x(:))];
    
    M.randvec = @(x) randvec();
    function u = randvec()
        u = projection(randn(d, n));
        u = u / norm(u, 'fro');
    end
    
    % We exploit the fact that VJt_normed belongs to the manifold
    M.rand = @() VJt_normed + randn(1) * randvec();
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(d, n);
    
    M.transp = @(x1, x2, d) d;
    
    M.pairmean = @(x1, x2) .5*(x1+x2);
    
    M.vec = @(x, u_mat) u_mat(:);
    M.mat = @(x, u_vec) reshape(u_vec, [d, n]);
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/euclidean/skewsymmetricfactory.m
================================================
function M = skewsymmetricfactory(n, k)
% Returns a manifold struct to optimize over k skew-symmetric matrices of size n
%
% function M = skewsymmetricfactory(n)
% function M = skewsymmetricfactory(n, k)
%
% Returns M, a structure describing the Euclidean space of n-by-n
% skew-symmetric matrices equipped with the standard Frobenius distance and
% associated trace inner product, as a manifold for Manopt.
%
% By default, k = 1. If k > 1, points and vectors are stored in 3D matrices
% X of size nxnxk such that each slice X(:, :, i), for i = 1:k, is
% skew-symmetric.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 28, 2016.
% Contributors: 
% Change log: 
%
%   Jan. 25, 2017 (NB):
%       M.tangent = M.proj now, instead of being identity. This is notably
%       necessary so that checkgradient will pick up on gradients that do
%       not lie in the appropriate tangent space.
    
    if ~exist('k', 'var') || isempty(k)
        k = 1;
    end

    M.name = @() sprintf('(Skew-symmetric matrices of size %d)^%d', n, k);
    
    M.dim = @() k*n*(n-1)/2;
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d(:), 'fro');
    
    M.dist = @(x, y) norm(x(:)-y(:), 'fro');
    
    M.typicaldist = @() sqrt(k)*n;
    
    M.proj = @(x, d) multiskew(d);
    
    M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @(x, eg, eh, d) M.proj(x, eh);
    
    M.tangent = M.proj;
    
    M.exp = @exp;
    function y = exp(x, d, t)
        if nargin == 3
            y = x + t*d;
        else
            y = x + d;
        end
    end
    
    M.retr = M.exp;
	
	M.log = @(x, y) y-x;

    M.hash = @(x) ['z' hashmd5(x(:))];
    
    M.rand = @() multiskew(randn(n, n, k));
    
    M.randvec = @randvec;
    function u = randvec(x) %#ok<INUSD>
        u = multiskew(randn(n, n, k));
        u = u / norm(u(:), 'fro');
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, n, k);
    
    M.transp = @(x1, x2, d) d;
    
    M.pairmean = @(x1, x2) .5*(x1+x2);
    
    
    % Elaborate list of indices of strictly upper-triangular entries.
    single_upper_triangle = find(triu(ones(n), 1));
    all_upper_triangle = bsxfun(@plus, single_upper_triangle, n^2*(0:(k-1)));
    all_upper_triangle = all_upper_triangle(:);
    
    % To vectorize a matrix, we extract all upper-triangular entries and
    % scale by sqrt(2) to ensure isometry, that is: given two tangent
    % vectors U and V at a point X, M.inner(X, U, V) is equal to u'*v,
    % where u = M.vec(X, U) and likewise for v. This construction has the
    % advantage of providing a vectorized representation of matrices that
    % has the same length as the intrinsic dimension of the space they live
    % in.
    M.vec = @(x, u_mat) sqrt(2)*u_mat(all_upper_triangle);
    M.mat = @matricize;
    function u_mat = matricize(X, u_vec) %#ok<INUSL>
        u_mat = zeros(n, n, k);
        u_mat(all_upper_triangle) = u_vec((k*n+1):end) / sqrt(2);
        u_mat = u_mat - multitransp(u_mat);
    end
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/euclidean/symmetricfactory.m
================================================
function M = symmetricfactory(n, k)
% Returns a manifold struct to optimize over k symmetric matrices of size n
%
% function M = symmetricfactory(n)
% function M = symmetricfactory(n, k)
%
% Returns M, a structure describing the Euclidean space of n-by-n symmetric
% matrices equipped with the standard Frobenius distance and associated
% trace inner product, as a manifold for Manopt.
% 
% By default, k = 1. If k > 1, points and vectors are stored in 3D matrices
% X of size nxnxk such that each slice X(:, :, i), for i = 1:k, is
% symmetric.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Jan. 22, 2014.
% Contributors: 
% Change log: 
%
%   Jan. 25, 2017 (NB):
%       M.tangent = M.proj now, instead of being identity. This is notably
%       necessary so that checkgradient will pick up on gradients that do
%       not lie in the appropriate tangent space.
    
    if ~exist('k', 'var') || isempty(k)
        k = 1;
    end

    M.name = @() sprintf('(Symmetric matrices of size %d)^%d', n, k);
    
    M.dim = @() k*n*(n+1)/2;
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d(:), 'fro');
    
    M.dist = @(x, y) norm(x(:)-y(:), 'fro');
    
    M.typicaldist = @() sqrt(k)*n;
    
    M.proj = @(x, d) multisym(d);
    
    M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @(x, eg, eh, d) M.proj(x, eh);
    
    M.tangent = M.proj;
    
    M.exp = @exp;
    function y = exp(x, d, t)
        if nargin == 3
            y = x + t*d;
        else
            y = x + d;
        end
    end
    
    M.retr = M.exp;
	
	M.log = @(x, y) y-x;

    M.hash = @(x) ['z' hashmd5(x(:))];
    
    M.rand = @() multisym(randn(n, n, k));
    
    M.randvec = @randvec;
    function u = randvec(x) %#ok<INUSD>
        u = multisym(randn(n, n, k));
        u = u / norm(u(:), 'fro');
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, n, k);
    
    M.transp = @(x1, x2, d) d;
    
    M.pairmean = @(x1, x2) .5*(x1+x2);
    
    
    % Elaborate list of indices of diagonal entries of an nxnxk matrix.
    single_diag_entries = (1:(n+1):n^2)';
    all_diag_entries = bsxfun(@plus, single_diag_entries, n^2*(0:(k-1)));
    all_diag_entries = all_diag_entries(:);
    
    % Likewise, elaborate list of indices of upper-triangular entries.
    single_upper_triangle = find(triu(ones(n), 1));
    all_upper_triangle = bsxfun(@plus, single_upper_triangle, n^2*(0:(k-1)));
    all_upper_triangle = all_upper_triangle(:);
    
    % To vectorize a matrix, we extract all diagonal entries, then all
    % upper-triangular entries, the latter being scaled by sqrt(2) to
    % ensure isometry, that is: given two tangent vectors U and V at a
    % point X, M.inner(X, U, V) is equal to u'*v, where u = M.vec(X, U) and
    % likewise for v. This construction has the advantage of providing a
    % vectorized representation of matrices that has the same length as the
    % intrinsic dimension of the space they live in.
    M.vec = @(x, u_mat) [u_mat(all_diag_entries) ; ...
                         sqrt(2)*u_mat(all_upper_triangle)];
    M.mat = @matricize;
    function u_mat = matricize(X, u_vec) %#ok<INUSL>
        u_mat = zeros(n, n, k);
        u_mat(all_upper_triangle) = u_vec((k*n+1):end) / sqrt(2);
        u_mat = u_mat + multitransp(u_mat);
        u_mat(all_diag_entries) = u_vec(1:(k*n));
    end
    M.vecmatareisometries = @() true;

end

% Former, easier versions for vec / mat. They had the disadvantage of
% giving vector representations of length k*n^2, instead of k*n*(n+1).
% M.vec = @(x, u_mat) u_mat(:);
% M.mat = @(x, u_vec) reshape(u_vec, [m, n]);
% M.vecmatareisometries = @() true;


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankMNquotientfactory.m
================================================
function M = fixedrankMNquotientfactory(m, n, k)
% Manifold of m-by-n matrices of rank k with two factor quotient geometry.
%
% function M = fixedrankMNquotientfactory(m, n, k)
%
% This follows the quotient geometry described in the following paper:
% P.-A. Absil, L. Amodei and G. Meyer,
% "Two Newton methods on the manifold of fixed-rank matrices endowed
%  with Riemannian quotient geometries", arXiv, 2012.
%
% Paper link: http://arxiv.org/abs/1209.0068
%
% A point X on the manifold is represented as a structure with two
% fields: M and N. The matrix M (mxk) is orthonormal, while the matrix N
% (nxk) is full-rank such that X = M*N';
%
% Tangent vectors are represented as a structure with two fields (M, N).
%
% Please cite the Manopt paper as well as the research paper:
%     @Article{absil2014fixedrank,
%       Title   = {Two Newton methods on the manifold of fixed-rank matrices endowed with Riemannian quotient geometries},
%       Author  = {Absil, P.-A. and Amodei, L. and Meyer, G.},
%       Journal = {Computational Statistics},
%       Year    = {2014},
%       Number  = {3-4},
%       Pages   = {569--590},
%       Volume  = {29},
%       Doi     = {10.1007/s00180-013-0441-6}
%     }

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors:
% Change log:
    
    
    M.name = @() sprintf('MN'' quotient manifold of %dx%d matrices of rank %d', m, n, k);
    
    M.dim = @() (m+n-k)*k;
    
    % Choice of the metric is motivated by the symmetry present in the
    % space.
    M.inner = @(X, eta, zeta) eta.M(:).'*zeta.M(:) + eta.N(:).'*zeta.N(:);
    
    M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
    
    M.dist = @(x, y) error('fixedrankMNquotientfactory.dist not implemented yet.');
    
    M.typicaldist = @() 10*k;
    
    symm = @(X) .5*(X+X');
    stiefel_proj = @(M, H) H - M*symm(M'*H);
    
    M.egrad2rgrad = @egrad2rgrad;
    function eta = egrad2rgrad(X, eta)
        eta.M = stiefel_proj(X.M, eta.M);
    end
    
    M.ehess2rhess = @ehess2rhess;
    function Hess = ehess2rhess(X, egrad, ehess, eta)
        
        % Directional derivative of the Riemannian gradient.
        Hess.M = ehess.M - eta.M*symm(X.M'*egrad.M);
        Hess.M = stiefel_proj(X.M, Hess.M);
        
        Hess.N = ehess.N;
        
        % Projection onto the horizontal space.
        Hess = M.proj(X, Hess);
    end
    
    
    M.proj = @projection;
    function etaproj = projection(X, eta)
        
        % Start by projecting the vector from Rmp x Rnp to the tangent
        % space to the total space, that is, eta.M should be in the
        % tangent space to Stiefel at X.M and eta.N is arbitrary.
        eta.M = stiefel_proj(X.M, eta.M);
        
        % Now project from the tangent space to the horizontal space, that
        % is, take care of the quotient.
        
        % First solve a Sylvester equation (A symm., B skew-symm.)
        A = X.N'*X.N + eye(k);
        B = eta.M'*X.M + eta.N'*X.N;
        B = B-B';
        omega = lyap(A, -B);
        
        % And project along the vertical space to the horizontal space.
        etaproj.M = eta.M + X.M*omega;
        etaproj.N = eta.N + X.N*omega;
        
    end
    
    M.exp = @exponential;
    function Y = exponential(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        A = t*X.M'*eta.M;
        S = t^2*eta.M'*eta.M;
        Y.M = [X.M t*eta.M]*expm([A -S ; eye(k) A])*eye(2*k, k)*expm(-A);
        
        % re-orthonormalize (seems necessary from time to time).
        [Q R] = qr(Y.M, 0);
        Y.M = Q * diag(sign(diag(R)));
        
        Y.N = X.N + t*eta.N;
        
    end
    
    % Factor M lives on the Stiefel manifold, hence we will reuse its
    % random generator.
    stiefelm = stiefelfactory(m, k);
    
    M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        Y.M = uf(X.M + t*eta.M); % This is a valid retraction
        Y.N = X.N + t*eta.N;
    end
    
    M.hash = @(X) ['z' hashmd5([X.M(:) ; X.N(:)])];
    
    M.rand = @random;
    function X = random()
        X.M = stiefelm.rand();
        X.N = randn(n, k);
    end
    
    M.randvec = @randomvec;
    function eta = randomvec(X)
        eta.M = randn(m, k);
        eta.N = randn(n, k);
        eta = projection(X, eta);
        nrm = M.norm(X, eta);
        eta.M = eta.M / nrm;
        eta.N = eta.N / nrm;
    end
    
    M.lincomb = @lincomb;
    
    M.zerovec = @(X) struct('M', zeros(m, k), 'N', zeros(n, k));
    
    M.transp = @(x1, x2, d) projection(x2, d);
    
end


% Linear combination of tangent vectors
function d = lincomb(x, a1, d1, a2, d2) %#ok<INMSL>
    
    if nargin == 3
        d.M = a1*d1.M;
        d.N = a1*d1.N;
    elseif nargin == 5
        d.M = a1*d1.M + a2*d2.M;
        d.N = a1*d1.N + a2*d2.N;
    else
        error('Bad use of fixedrankMNquotientfactory.lincomb.');
    end
    
end


function A = uf(A)
    [L, unused, R] = svd(A, 0);
    A = L*R';
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankembeddedfactory.m
================================================
function M = fixedrankembeddedfactory(m, n, k)
% Manifold struct to optimize fixed-rank matrices w/ an embedded geometry.
%
% function M = fixedrankembeddedfactory(m, n, k)
%
% Manifold of m-by-n real matrices of fixed rank k. This follows the
% embedded geometry described in Bart Vandereycken's 2013 paper:
% "Low-rank matrix completion by Riemannian optimization".
% 
% Paper link: http://arxiv.org/pdf/1209.3834.pdf
%
% A point X on the manifold is represented as a structure with three
% fields: U, S and V. The matrices U (mxk) and V (nxk) are orthonormal,
% while the matrix S (kxk) is any /diagonal/, full rank matrix.
% Following the SVD formalism, X = U*S*V'. Note that the diagonal entries
% of S are not constrained to be nonnegative.
%
% Tangent vectors are represented as a structure with three fields: Up, M
% and Vp. The matrices Up (mxk) and Vp (mxk) obey Up'*U = 0 and Vp'*V = 0.
% The matrix M (kxk) is arbitrary. Such a structure corresponds to the
% following tangent vector in the ambient space of mxn matrices:
%   Z = U*M*V' + Up*V' + U*Vp'
% where (U, S, V) is the current point and (Up, M, Vp) is the tangent
% vector at that point.
%
% Vectors in the ambient space are best represented as mxn matrices. If
% these are low-rank, they may also be represented as structures with
% U, S, V fields, such that Z = U*S*V'. There are no resitrictions on what
% U, S and V are, as long as their product as indicated yields a real, mxn
% matrix.
%
% The chosen geometry yields a Riemannian submanifold of the embedding
% space R^(mxn) equipped with the usual trace (Frobenius) inner product.
%
%
% Please cite the Manopt paper as well as the research paper:
%     @Article{vandereycken2013lowrank,
%       Title   = {Low-rank matrix completion by {Riemannian} optimization},
%       Author  = {Vandereycken, B.},
%       Journal = {SIAM Journal on Optimization},
%       Year    = {2013},
%       Number  = {2},
%       Pages   = {1214--1236},
%       Volume  = {23},
%       Doi     = {10.1137/110845768}
%     }
%
% See also: fixedrankfactory_2factors fixedrankfactory_3factors

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%	Feb. 20, 2014 (NB):
%       Added function tangent to work with checkgradient.
%
%   June 24, 2014 (NB):
%       A couple modifications following
%       Bart Vandereycken's feedback:
%       - The checksum (hash) was replaced for a faster alternative: it's a
%         bit less "safe" in that collisions could arise with higher
%         probability, but they're still very unlikely.
%       - The vector transport was changed.
%       The typical distance was also modified, hopefully giving the
%       trustregions method a better initial guess for the trust region
%       radius, but that should be tested for different cost functions too.
%
%    July 11, 2014 (NB):
%       Added ehess2rhess and tangent2ambient, supplied by Bart.
%
%    July 14, 2014 (NB):
%       Added vec, mat and vecmatareisometries so that hessianspectrum now
%       works with this geometry. Implemented the tangent function.
%       Made it clearer in the code and in the documentation in what format
%       ambient vectors may be supplied, and generalized some functions so
%       that they should now work with both accepted formats.
%       It is now clearly stated that for a point X represented as a
%       triplet (U, S, V), the matrix S needs to be diagonal.

    M.name = @() sprintf('Manifold of %dx%d matrices of rank %d', m, n, k);
    
    M.dim = @() (m+n-k)*k;
    
    M.inner = @(x, d1, d2) d1.M(:).'*d2.M(:) + d1.Up(:).'*d2.Up(:) ...
                                             + d1.Vp(:).'*d2.Vp(:);
    
    M.norm = @(x, d) sqrt(M.inner(x, d, d));
    
    M.dist = @(x, y) error('fixedrankembeddedfactory.dist not implemented yet.');
    
    M.typicaldist = @() M.dim();
    
    % Given Z in tangent vector format, projects the components Up and Vp
    % such that they satisfy the tangent space constraints up to numerical
    % errors. If Z was indeed a tangent vector at X, this should barely
    % affect Z (it would not at all if we had infinite numerical accuracy).
    M.tangent = @tangent;
    function Z = tangent(X, Z)
        Z.Up = Z.Up - X.U*(X.U'*Z.Up);
        Z.Vp = Z.Vp - X.V*(X.V'*Z.Vp);
    end

    % For a given ambient vector Z, applies it to a matrix W. If Z is given
    % as a matrix, this is straightfoward. If Z is given as a structure
    % with fields U, S, V such that Z = U*S*V', the product is executed
    % efficiently.
    function ZW = apply_ambient(Z, W)
        if ~isstruct(Z)
            ZW = Z*W;
        else
            ZW = Z.U*(Z.S*(Z.V'*W));
        end
    end

    % Same as apply_ambient, but applies Z' to W.
    function ZtW = apply_ambient_transpose(Z, W)
        if ~isstruct(Z)
            ZtW = Z'*W;
        else
            ZtW = Z.V*(Z.S'*(Z.U'*W));
        end
    end
    
    % Orthogonal projection of an ambient vector Z represented as an mxn
    % matrix or as a structure with fields U, S, V to the tangent space at
    % X, in a tangent vector structure format.
    M.proj = @projection;
    function Zproj = projection(X, Z)
            
        ZV = apply_ambient(Z, X.V);
        UtZV = X.U'*ZV;
        ZtU = apply_ambient_transpose(Z, X.U);

        Zproj.M = UtZV;
        Zproj.Up = ZV  - X.U*UtZV;
        Zproj.Vp = ZtU - X.V*UtZV';

    end

    M.egrad2rgrad = @projection;
    
    % Code supplied by Bart.
    % Given the Euclidean gradient at X and the Euclidean Hessian at X
    % along H, where egrad and ehess are vectors in the ambient space and H
    % is a tangent vector at X, returns the Riemannian Hessian at X along
    % H, which is a tangent vector.
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, H)
        
        % Euclidean part
        rhess = projection(X, ehess);
        
        % Curvature part
        T = apply_ambient(egrad, H.Vp)/X.S;
        rhess.Up = rhess.Up + (T - X.U*(X.U'*T));
        T = apply_ambient_transpose(egrad, H.Up)/X.S;
        rhess.Vp = rhess.Vp + (T - X.V*(X.V'*T));
        
    end

    % Transforms a tangent vector Z represented as a structure (Up, M, Vp)
    % into a structure with fields (U, S, V) that represents that same
    % tangent vector in the ambient space of mxn matrices, as U*S*V'.
    % This matrix is equal to X.U*Z.M*X.V' + Z.Up*X.V' + X.U*Z.Vp'. The
    % latter is an mxn matrix, which could be too large to build
    % explicitly, and this is why we return a low-rank representation
    % instead. Note that there are no guarantees on U, S and V other than
    % that USV' is the desired matrix. In particular, U and V are not (in
    % general) orthonormal and S is not (in general) diagonal.
    % (In this implementation, S is identity, but this might change.)
    M.tangent2ambient = @tangent2ambient;
    function Zambient = tangent2ambient(X, Z)
        Zambient.U = [X.U*Z.M + Z.Up, X.U];
        Zambient.S = eye(2*k);
        Zambient.V = [X.V, Z.Vp];
    end
    
    % This retraction is second order, following general results from
    % Absil, Malick, "Projection-like retractions on matrix manifolds",
    % SIAM J. Optim., 22 (2012), pp. 135-158.
    M.retr = @retraction;
    function Y = retraction(X, Z, t)
        if nargin < 3
            t = 1.0;
        end

        % See personal notes June 28, 2012 (NB)
        [Qu, Ru] = qr(Z.Up, 0);
        [Qv, Rv] = qr(Z.Vp, 0);
        
        % Calling svds or svd should yield the same result, but BV
        % advocated svd is more robust, and it doesn't change the
        % asymptotic complexity to call svd then trim rather than call
        % svds. Also, apparently Matlab calls ARPACK in a suboptimal way
        % for svds in this scenario.
        % [Ut St Vt] = svds([X.S+t*Z.M , t*Rv' ; t*Ru , zeros(k)], k);
        [Ut, St, Vt] = svd([X.S+t*Z.M , t*Rv' ; t*Ru , zeros(k)]);
        
        Y.U = [X.U Qu]*Ut(:, 1:k);
        Y.V = [X.V Qv]*Vt(:, 1:k);
        Y.S = St(1:k, 1:k) + eps*eye(k);
        
        % equivalent but very slow code
        % [U S V] = svds(X.U*X.S*X.V' + t*(X.U*Z.M*X.V' + Z.Up*X.V' + X.U*Z.Vp'), k);
        % Y.U = U; Y.V = V; Y.S = S;
        
    end


    % Orthographic retraction provided by Teng Zhang. One interst of the
    % orthographic retraction is that if matrices are represented in full
    % size, it can be computed without any SVDs. If for an application it
    % makes sense to represent the matrices in full size, this may be a
    % good idea, but it won't shine in the present implementation of the
    % manifold.
    M.retr_ortho = @retraction_orthographic;
    function Y = retraction_orthographic(X, Z, t)
        if nargin < 3
            t = 1.0;
        end
        
        % First, write Y (the output) as U1*S0*V1', where U1 and V1 are
        % orthogonal matrices and S0 is of size r by r.
        [U1, ~] = qr(t*(X.U*Z.M  + Z.Up) + X.U*X.S, 0);
        [V1, ~] = qr(t*(X.V*Z.M' + Z.Vp) + X.V*X.S, 0);
        S0 = (U1'*X.U)*(X.S + t*Z.M)*(X.V'*V1) + ...
             t*((U1'*Z.Up)*(X.V'*V1) + (U1'*X.U)*(Z.Vp'*V1));
        
        % Then, obtain the singular value decomposition of Y.
        [U2, S2, V2] = svd(S0);
        Y.U = U1*U2;
        Y.S = S2;
        Y.V = V1*V2;
        
    end

    
    M.exp = @exponential;
    function Y = exponential(X, Z, t)
        if nargin < 3
            t = 1.0;
        end
        Y = retraction(X, Z, t);
        warning('manopt:fixedrankembeddedfactory:exp', ...
               ['Exponential for fixed rank ' ...
                'manifold not implemented yet. Used retraction instead.']);
    end

    % Less safe but much faster checksum, June 24, 2014.
    % Older version right below.
    M.hash = @(X) ['z' hashmd5([sum(X.U(:)) ; sum(X.S(:)); sum(X.V(:)) ])];
    %M.hash = @(X) ['z' hashmd5([X.U(:) ; X.S(:) ; X.V(:)])];
    
    M.rand = @random;
    % Factors U and V live on Stiefel manifolds, hence we will reuse
    % their random generator.
    stiefelm = stiefelfactory(m, k);
    stiefeln = stiefelfactory(n, k);
    function X = random()
        X.U = stiefelm.rand();
        X.V = stiefeln.rand();
        X.S = diag(sort(rand(k, 1), 1, 'descend'));
    end
    
    % Generate a random tangent vector at X.
    % TODO: consider a possible imbalance between the three components Up,
    % Vp and M, when m, n and k are widely different (which is typical).
    M.randvec = @randomvec;
    function Z = randomvec(X)
        Z.Up = randn(m, k);
        Z.Vp = randn(n, k);
        Z.M  = randn(k);
        Z = tangent(X, Z);
        nrm = M.norm(X, Z);
        Z.Up = Z.Up / nrm;
        Z.Vp = Z.Vp / nrm;
        Z.M  = Z.M  / nrm;
    end
    
    M.lincomb = @lincomb;
    
    M.zerovec = @(X) struct('Up', zeros(m, k), 'M', zeros(k, k), ...
                                                        'Vp', zeros(n, k));
    
    % New vector transport on June 24, 2014 (as indicated by Bart)
    % Reference: Absil, Mahony, Sepulchre 2008 section 8.1.3:
    % For Riemannian submanifolds of a Euclidean space, it is acceptable to
    % transport simply by orthogonal projection of the tangent vector
    % translated in the ambient space.
    M.transp = @project_tangent;
    function Z2 = project_tangent(X1, X2, Z1)
        Z2 = projection(X2, tangent2ambient(X1, Z1));
    end


    M.vec = @vec;
    function Zvec = vec(X, Z)
        Zamb = tangent2ambient(X, Z);
        Zamb_mat = Zamb.U*Zamb.S*Zamb.V';
        Zvec = Zamb_mat(:);
    end
    M.mat = @(X, Zvec) projection(X, reshape(Zvec, [m, n]));
    M.vecmatareisometries = @() true;

end

% Linear combination of tangent vectors
function d = lincomb(x, a1, d1, a2, d2) %#ok<INUSL>

    if nargin == 3
        d.Up = a1*d1.Up;
        d.Vp = a1*d1.Vp;
        d.M  = a1*d1.M;
    elseif nargin == 5
        d.Up = a1*d1.Up + a2*d2.Up;
        d.Vp = a1*d1.Vp + a2*d2.Vp;
        d.M  = a1*d1.M  + a2*d2.M;
    else
        error('fixedrank.lincomb takes either 3 or 5 inputs.');
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankfactory_2factors.m
================================================
function M = fixedrankfactory_2factors(m, n, k)
% Manifold of m-by-n matrices of rank k with balanced quotient geometry.
%
% function M = fixedrankfactory_2factors(m, n, k)
%
% The first-order geometry follows the balanced quotient geometry described 
% in the paper, 
% "Linear regression under fixed-rank constraints: a Riemannian approach",
% G. Meyer, S. Bonnabel and R. Sepulchre, ICML 2011.
%
% Paper link: http://www.icml-2011.org/papers/350_icmlpaper.pdf.
%
% The second-order geometry follows from the paper
% "Fixed-rank matrix factorizations and Riemannian low-rank optimization",
% B. Mishra, R. Meyer, S. Bonnabel and R. Sepulchre,
% Computational Statistics, 29(3 - 4), pp. 591 - 621, 2014.
%
% A point X on the manifold is represented as a structure with two
% fields: L and R. The matrices L (mxk) and R (nxk) are full column-rank
% matrices such that X = L*R'.
%
% Tangent vectors are represented as a structure with two fields: L, R.
% 
% For first-order geometry, please cite the Manopt paper as well as the research paper:
%     @InProceedings{meyer2011linear,
%       Title        = {Linear regression under fixed-rank constraints: a {R}iemannian approach},
%       Author       = {Meyer, G. and Bonnabel, S. and Sepulchre, R.},
%       Booktitle    = {{28th International Conference on Machine Learning}},
%       Year         = {2011},
%       Organization = {{ICML}}
%     }
%
% For second-order geometry, please cite the Manopt paper as well as the research paper:
%     @Article{mishra2014fixedrank,
%       Title   = {Fixed-rank matrix factorizations and {Riemannian} low-rank optimization},
%       Author  = {Mishra, B. and Meyer, G. and Bonnabel, S. and Sepulchre, R.},
%       Journal = {Computational Statistics},
%       Year    = {2014},
%       Number  = {3-4},
%       Pages   = {591--621},
%       Volume  = {29},
%       Doi     = {10.1007/s00180-013-0464-z}
%     }
%
%
% See also fixedrankembeddedfactory fixedrankfactory_3factors fixedrankfactory_2factors_preconditioned

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, Dec. 30, 2012.
% Contributors:
% Change log:
%
%   July 10, 2013 (NB):
%       Added vec, mat, tangent, tangent2ambient.
%
%	July 03, 2015 (BM):
%      Cosmetic changes including avoiding storing the inverse of a
%       k-by-k matrix.
    
    
    M.name = @() sprintf('LR'' quotient manifold of %dx%d matrices of rank %d', m, n, k);
    
    M.dim = @() (m+n-k)*k;
    
    % Some precomputations at the point X to be used in the inner product 
    % (and pretty much everywhere else).
    function X = prepare(X)
        if ~all(isfield(X,{'LtL','RtR'}))
            L = X.L;
            R = X.R;
            X.LtL = L'*L;
            X.RtR = R'*R;
        end
    end
    
    % Choice of the metric is motivated by the symmetry present in the
    % space. The metric is the natural Grassmannian metric on L and R.
    M.inner = @iproduct;
    function ip = iproduct(X, eta, zeta)
        X = prepare(X);
        ip = trace(X.LtL\(eta.L'*zeta.L)) + trace( X.RtR\(eta.R'*zeta.R));
    end
    
    M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
    
    M.dist = @(x, y) error('fixedrankfactory_2factors.dist not implemented yet.');
    
    M.typicaldist = @() 10*k;
    
    symm = @(M) .5*(M+M');
    
    M.egrad2rgrad = @egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad)
        X = prepare(X);
        rgrad.L = egrad.L*X.LtL;
        rgrad.R = egrad.R*X.RtR;
    end
    
    M.ehess2rhess = @ehess2rhess;
    function Hess = ehess2rhess(X, egrad, ehess, eta)
        X = prepare(X);
        
        % Riemannian gradient computation.
        rgrad = egrad2rgrad(X, egrad);
        
        % Directional derivative of the Riemannian gradient.
        Hess.L = ehess.L*X.LtL + 2*egrad.L*symm(eta.L'*X.L);
        Hess.R = ehess.R*X.RtR + 2*egrad.R*symm(eta.R'*X.R);
        
        % We need a correction term for the non-constant metric.
        Hess.L = Hess.L - rgrad.L*(X.LtL\(symm(X.L'*eta.L))) - eta.L*(X.LtL\(symm(X.L'*rgrad.L))) + X.L*(X.LtL\(symm(eta.L'*rgrad.L)));
        Hess.R = Hess.R - rgrad.R*(X.RtR\(symm(X.R'*eta.R))) - eta.R*(X.RtR\(symm(X.R'*rgrad.R))) + X.R*(X.RtR\(symm(eta.R'*rgrad.R)));
        
        % Projection onto the horizontal space.
        Hess = M.proj(X, Hess);
    end
    
    M.proj = @projection;
    % Projection of the vector eta in the ambient space onto the horizontal space.
    function etaproj = projection(X, eta)
        X = prepare(X);
        
        SS = (X.LtL)*(X.RtR);
        AS = (X.LtL)*(X.R'*eta.R) - (eta.L'*X.L)*(X.RtR);
        Omega = lyap(SS, SS,-AS);
        etaproj.L = eta.L + X.L*Omega';
        etaproj.R = eta.R - X.R*Omega;
    end
    
    M.tangent = M.proj;
    M.tangent2ambient = @(X, eta) eta;
    
    M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        Y.L = X.L + t*eta.L;
        Y.R = X.R + t*eta.R;
        
        % Numerical conditioning step: A simpler version.
        % We need to ensure that L and R do not have very relative
        % skewed norms.
        
        scaling = norm(X.L, 'fro')/norm(X.R, 'fro');
        scaling = sqrt(scaling);
        Y.L = Y.L / scaling;
        Y.R = Y.R * scaling;
        
        % These are reused in the computation of the gradient and Hessian.
        Y = prepare(Y);
    end
    
    M.exp = @exponential;
    function Y = exponential(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        Y = retraction(X, eta, t);
        warning('manopt:fixedrankfactory_2factors:exp', ...
            ['Exponential for fixed rank ' ...
            'manifold not implemented yet. Used retraction instead.']);
    end
    
    M.hash = @(X) ['z' hashmd5([X.L(:) ; X.R(:)])];
    
    M.rand = @random;
    function X = random()
        % A random point on the total space.
        X.L = randn(m, k);
        X.R = randn(n, k);
        X = prepare(X);
    end
    
    M.randvec = @randomvec;
    function eta = randomvec(X)
        % A random vector in the horizontal space.
        eta.L = randn(m, k);
        eta.R = randn(n, k);
        eta = projection(X, eta);
        nrm = M.norm(X, eta);
        eta.L = eta.L / nrm;
        eta.R = eta.R / nrm;
    end
    
    M.lincomb = @lincomb;
    
    M.zerovec = @(X) struct('L', zeros(m, k),'R', zeros(n, k));
    
    M.transp = @(x1, x2, d) projection(x2, d);
    
    % vec and mat are not isometries, because of the unusual inner metric.
    M.vec = @(X, U) [U.L(:) ; U.R(:)];
    M.mat = @(X, u) struct('L', reshape(u(1:(m*k)), m, k), ...
        'R', reshape(u((m*k+1):end), n, k));
    M.vecmatareisometries = @() false;
    
end

% Linear combination of tangent vectors.
function d = lincomb(x, a1, d1, a2, d2) %#ok<INUSL>
    
    if nargin == 3
        d.L = a1*d1.L;
        d.R = a1*d1.R;
    elseif nargin == 5
        d.L = a1*d1.L + a2*d2.L;
        d.R = a1*d1.R + a2*d2.R;
    else
        error('Bad use of fixedrankfactory_2factors.lincomb.');
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankfactory_2factors_preconditioned.m
================================================
function M = fixedrankfactory_2factors_preconditioned(m, n, k)
% Manifold of m-by-n matrices of rank k with two factor quotient geometry.
%
% function M = fixedrankfactory_2factors_preconditioned(m, n, k)
%
% This geometry is tuned to least-squares problems such as low-rank matrix
% completion with ell-2 loss.
%
% A point X on the manifold is represented as a structure with two
% fields: L and R. The matrices L (m-by-k) and R (n-by-k) are 
% full column-rank matrices such that X = L*R'.
%
% Tangent vectors are represented as a structure with two fields: L, R.
% 
% Please cite the Manopt paper as well as the research paper:
%     @Techreport{mishra2012optimized,
%       Title   = {A {R}iemannian geometry for low-rank matrix completion},
%       Author  = {Mishra, B. and Adithya Apuroop, K. and Sepulchre, R.},
%       Journal = {Arxiv preprint arXiv:1211.1550},
%       Year    = {2012}
%     }
%
%
% See also: fixedrankembeddedfactory fixedrankfactory_2factors fixedrankfactory_3factors_preconditioned

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, Dec. 30, 2012.
% Contributors:
% Change log:
%
%	April 04, 2015 (BM):
%      Cosmetic changes including avoiding storing the inverse of a
%       k-by-k matrix.
  
    
    M.name = @() sprintf('LR''(tuned to least square problems) quotient manifold of %dx%d matrices of rank %d', m, n, k);
    
    M.dim = @() (m+n-k)*k;
    
    
    % Some precomputations at the point X to be used in the inner product 
    % (and pretty much everywhere else).
    function X = prepare(X)
        if ~all(isfield(X,{'LtL','RtR'}))
            L = X.L;
            R = X.R;
            X.LtL = L'*L;
            X.RtR = R'*R;
        end
    end
    
    
    % The choice of metric is motivated by symmetry and 
    % tuned to least-squares cost function.
    M.inner = @iproduct;
    function ip = iproduct(X, eta, zeta)
        X = prepare(X);
        ip = trace(X.RtR*(eta.L'*zeta.L)) + trace(X.LtL*(eta.R'*zeta.R)); % Scaled metric
    end
    
    M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
    
    M.dist = @(x, y) error('fixedrankfactory_2factors_preconditioned.dist not implemented yet.');
    
    M.typicaldist = @() 10*k;
    
    M.egrad2rgrad = @egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad)
        X = prepare(X);
        
        % Riemannian gradient
        rgrad.L = egrad.L/X.RtR;
        rgrad.R = egrad.R/X.LtL;
    end
    
    M.ehess2rhess = @ehess2rhess;
    function Hess = ehess2rhess(X, egrad, ehess, eta)
        X = prepare(X);
        
        % Riemannian gradient.
        rgrad = egrad2rgrad(X, egrad);
        
        % Directional derivative of the Riemannian gradient.
        Hess.L = ehess.L/X.RtR - 2*egrad.L*(X.RtR \ (symm(eta.R'*X.R) / X.RtR));
        Hess.R = ehess.R/X.LtL - 2*egrad.R*(X.LtL \ (symm(eta.L'*X.L) / X.LtL));
        
        % We still need a correction factor for the non-constant metric.
        Hess.L = Hess.L + rgrad.L*(symm(eta.R'*X.R)/X.RtR) + eta.L*(symm(rgrad.R'*X.R)/X.RtR) - X.L*(symm(eta.R'*rgrad.R)/X.RtR);
        Hess.R = Hess.R + rgrad.R*(symm(eta.L'*X.L)/X.LtL) + eta.R*(symm(rgrad.L'*X.L)/X.LtL) - X.R*(symm(eta.L'*rgrad.L)/X.LtL);
        
        % Project on the horizontal space.
        Hess = M.proj(X, Hess);
    end
    
    M.proj = @projection;
    function etaproj = projection(X, eta)
        X = prepare(X);
        
        % Projection onto the horizontal space.
        Lambda = 0.5*((eta.R'*X.R)/X.RtR  -   X.LtL\(X.L'*eta.L));
        etaproj.L = eta.L + X.L*Lambda;
        etaproj.R = eta.R - X.R*Lambda';
    end
    
    M.tangent = M.proj;
    
    M.tangent2ambient = @(X, eta) eta;
    
    M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        Y.L = X.L + t*eta.L;
        Y.R = X.R + t*eta.R;
        
        % Numerical conditioning step: a simpler version.
        % We need to ensure that L and R are do not have very relative
        % skewed norms.
        
        scaling = norm(X.L, 'fro')/norm(X.R, 'fro');
        scaling = sqrt(scaling);
        Y.L = Y.L / scaling;
        Y.R = Y.R * scaling;
        
        % These are reused in the computations of gradient and Hessian.
        Y = prepare(Y);
    end
    
    
    M.exp = @exponential;
    function Y = exponential(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        Y = retraction(X, eta, t);
        warning('manopt:fixedrankfactory_2factors_preconditioned:exp', ...
            ['Exponential for fixed rank ' ...
            'manifold not implemented yet. Used retraction instead.']);
    end
    
    M.hash = @(X) ['z' hashmd5([X.L(:) ; X.R(:)])];
    
    M.rand = @random;
    
    function X = random()
        X.L = randn(m, k);
        X.R = randn(n, k);
    end
    
    M.randvec = @randomvec;
    function eta = randomvec(X)
        eta.L = randn(m, k);
        eta.R = randn(n, k);
        eta = projection(X, eta);
        nrm = M.norm(X, eta);
        eta.L = eta.L / nrm;
        eta.R = eta.R / nrm;
    end
    
    M.lincomb = @lincomb;
    
    M.zerovec = @(X) struct('L', zeros(m, k),'R', zeros(n, k));
    
    M.transp = @(x1, x2, d) projection(x2, d);
    
    % vec and mat are not isometries, because of the scaled inner metric.
    M.vec = @(X, U) [U.L(:) ; U.R(:)];
    
    M.mat = @(X, u) struct('L', reshape(u(1:(m*k)), m, k), ...
        'R', reshape(u((m*k+1):end), n, k));
    
    M.vecmatareisometries = @() false;
    
    % Auxiliary functions
    symm = @(M) .5*(M+M');
end

% Linear combination of tangent vectors.
function d = lincomb(x, a1, d1, a2, d2) %#ok<INUSL>
    
    if nargin == 3
        d.L = a1*d1.L;
        d.R = a1*d1.R;
    elseif nargin == 5
        d.L = a1*d1.L + a2*d2.L;
        d.R = a1*d1.R + a2*d2.R;
    else
        error('Bad use of fixedrankfactory_2factors_preconditioned.lincomb.');
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankfactory_2factors_subspace_projection.m
================================================
function M = fixedrankfactory_2factors_subspace_projection(m, n, k)
% Manifold of m-by-n matrices of rank k with two factor quotient geometry.
%
% function M = fixedrankfactory_2factors_subspace_projection(m, n, k)
%
% A point X on the manifold is represented as a structure with two
% fields: L and R. The matrix L (mxk) is orthonormal,
% while the matrix R (nxk) is a full column-rank
% matrix such that X = L*R'.
%
% Tangent vectors are represented as a structure with two fields: L, R.
%
% Note: L is orthonormal, i.e., columns are orthogonal to each other.
% Such a geometry might be of interest where the left factor has a
% subspace interpretation. A motivation is in Sections 3.3 and 6.4 of the
% paper below.
%
% Please cite the Manopt paper as well as the research paper:
%     @Article{mishra2014fixedrank,
%       Title   = {Fixed-rank matrix factorizations and {Riemannian} low-rank optimization},
%       Author  = {Mishra, B. and Meyer, G. and Bonnabel, S. and Sepulchre, R.},
%       Journal = {Computational Statistics},
%       Year    = {2014},
%       Number  = {3-4},
%       Pages   = {591--621},
%       Volume  = {29},
%       Doi     = {10.1007/s00180-013-0464-z}
%     }
%
% See also: fixedrankfactory_2factors fixedrankembeddedfactory fixedrankfactory_2factors_preconditioned


% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, Dec. 30, 2012.
% Contributors:
% Change log:
    
    M.name = @() sprintf('LR'' quotient manifold of %dx%d matrices of rank %d', m, n, k);
    
    M.dim = @() (m+n-k)*k;
    
    % Some precomputations at the point X to be used in the inner product (and
    % pretty much everywhere else).
    function X = prepare(X)
        if ~all(isfield(X,{'RtR'}) == 1)
            X.RtR = X.R'*X.R;
        end
    end
    
    % The choice of the metric is motivated by symmetry and scale
    % invariance in the total space.
    M.inner = @iproduct;
    function ip = iproduct(X, eta, zeta)
        X = prepare(X);
        
        ip = eta.L(:).'*zeta.L(:)  + trace(X.RtR\(eta.R'*zeta.R));
    end
    
    M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
    
    M.dist = @(x, y) error('fixedrankfactory_2factors_subspace_projection.dist not implemented yet.');
    
    M.typicaldist = @() 10*k;
    
    skew = @(X) .5*(X-X');
    symm = @(X) .5*(X+X');
    stiefel_proj = @(L, H) H - L*symm(L'*H);
    
    M.egrad2rgrad = @egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad)
        X = prepare(X);
        
        rgrad.L = stiefel_proj(X.L, egrad.L);
        rgrad.R = egrad.R*X.RtR;
    end
    
    
    M.ehess2rhess = @ehess2rhess;
    function Hess = ehess2rhess(X, egrad, ehess, eta)
        X = prepare(X);
        
        % Riemannian gradient.
        rgrad = egrad2rgrad(X, egrad);
        
        % Directional derivative of the Riemannian gradient.
        Hess.L = ehess.L - eta.L*symm(X.L'*egrad.L);
        Hess.L = stiefel_proj(X.L, Hess.L);
        
        Hess.R = ehess.R*X.RtR + 2*egrad.R*symm(eta.R'*X.R);
        
        % Correction factor for the non-constant metric on the factor R.
        Hess.R = Hess.R - rgrad.R*(X.RtR\(symm(X.R'*eta.R))) - eta.R*(X.RtR\(symm(X.R'*rgrad.R))) + X.R*(X.RtR\(symm(eta.R'*rgrad.R)));
        
        % Projection onto the horizontal space.
        Hess = M.proj(X, Hess);
    end
    
    
    M.proj = @projection;
    function etaproj = projection(X, eta)
        X = prepare(X);
        
        eta.L = stiefel_proj(X.L, eta.L); % On the tangent space.
        SS = X.RtR;
        AS1 = 2*X.RtR*skew(X.L'*eta.L)*X.RtR;
        AS2 = 2*skew(X.RtR*(X.R'*eta.R));
        AS  = skew(AS1 + AS2);
        
        Omega = nested_sylvester(SS,AS);
        etaproj.L = eta.L - X.L*Omega;
        etaproj.R = eta.R - X.R*Omega;
    end
    
    M.tangent = M.proj;
    M.tangent2ambient = @(X, eta) eta;
    
    M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        Y.L = uf(X.L + t*eta.L);
        Y.R = X.R + t*eta.R;
        
        % These are reused in the computation of the gradient and Hessian.
        Y = prepare(Y);
    end
    
    M.exp = @exponential;
    function R = exponential(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        R = retraction(X, eta, t);
        warning('manopt:fixedrankfactory_2factors_subspace_projection:exp', ...
            ['Exponential for fixed rank ' ...
            'manifold not implemented yet. Lsed retraction instead.']);
    end
    
    M.hash = @(X) ['z' hashmd5([X.L(:) ; X.R(:)])];
    
    M.rand = @random;
    % Factors L lives on Stiefel manifold, hence we will reuse
    % its random generator.
    stiefelm = stiefelfactory(m, k);
    function X = random()
        X.L = stiefelm.rand();
        X.R = randn(n, k);
    end
    
    M.randvec = @randomvec;
    function eta = randomvec(X)
        eta.L = randn(m, k);
        eta.R = randn(n, k);
        eta = projection(X, eta);
        nrm = M.norm(X, eta);
        eta.L = eta.L / nrm;
        eta.R = eta.R / nrm;
    end
    
    M.lincomb = @lincomb;
    
    M.zerovec = @(X) struct('L', zeros(m, k),...
        'R', zeros(n, k));
    
    M.transp = @(x1, x2, d) projection(x2, d);
    
    % vec and mat are not isometries, because of the scaled inner metric.
    M.vec = @(X, U) [U.L(:) ; U.R(:)];
    M.mat = @(X, u) struct('L', reshape(u(1:(m*k)), m, k), ...
        'R', reshape(u((m*k+1):end), n, k));
    M.vecmatareisometries = @() false;
    
    
end

% Linear combination of tangent vectors.
function d = lincomb(x, a1, d1, a2, d2) %#ok<INLSL>
    
    if nargin == 3
        d.L = a1*d1.L;
        d.R = a1*d1.R;
    elseif nargin == 5
        d.L = a1*d1.L + a2*d2.L;
        d.R = a1*d1.R + a2*d2.R;
    else
        error('Bad use of fixedrankfactory_2factors_subspace_projection.lincomb.');
    end
    
end

function A = uf(A)
    [L, unused, R] = svd(A, 0); %#ok
    A = L*R';
end

function omega = nested_sylvester(sym_mat, asym_mat)
    % omega=nested_sylvester(sym_mat,asym_mat)
    % This function solves the system of nested Sylvester equations:
    %
    %     X*sym_mat + sym_mat*X = asym_mat
    %     Omega*sym_mat+sym_mat*Omega = X
    % Mishra, Meyer, Bonnabel and Sepulchre, 'Fixed-rank matrix factorizations and Riemannian low-rank optimization'
    
    % Uses built-in lyap function, but does not exploit the fact that it's
    % twice the same sym_mat matrix that comes into play.
    
    X = lyap(sym_mat, -asym_mat);
    omega = lyap(sym_mat, -X);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankfactory_3factors.m
================================================
function M = fixedrankfactory_3factors(m, n, k)
% Manifold of m-by-n matrices of rank k with polar quotient geometry.
%
% function M = fixedrankfactory_3factors(m, n, k)
%
% The first-order geometry follows the balanced quotient geometry described 
% in the paper, 
% "Linear regression under fixed-rank constraints: a Riemannian approach",
% G. Meyer, S. Bonnabel and R. Sepulchre, ICML 2011.
%
% Paper link: http://www.icml-2011.org/papers/350_icmlpaper.pdf.
%
% The second-order geometry follows from the paper
% "Fixed-rank matrix factorizations and Riemannian low-rank optimization",
% B. Mishra, R. Meyer, S. Bonnabel and R. Sepulchre,
% Computational Statistics, 29(3 - 4), pp. 591 - 621, 2014.
%
% A point X on the manifold is represented as a structure with three
% fields: L, S and R. The matrices L (mxk) and R (nxk) are orthonormal,
% while the matrix S (kxk) is a symmetric positive definite full rank
% matrix.
%
% Tangent vectors are represented as a structure with three fields: L, S
% and R.
%
% 
% For first-order geometry, please cite the Manopt paper as well as the research paper:
%     @InProceedings{meyer2011linear,
%       Title        = {Linear regression under fixed-rank constraints: a {R}iemannian approach},
%       Author       = {Meyer, G. and Bonnabel, S. and Sepulchre, R.},
%       Booktitle    = {{28th International Conference on Machine Learning}},
%       Year         = {2011},
%       Organization = {{ICML}}
%     }
% For second-order geometry, please cite the Manopt paper as well as the research paper:
%     @Article{mishra2014fixedrank,
%       Title   = {Fixed-rank matrix factorizations and {Riemannian} low-rank optimization},
%       Author  = {Mishra, B. and Meyer, G. and Bonnabel, S. and Sepulchre, R.},
%       Journal = {Computational Statistics},
%       Year    = {2014},
%       Number  = {3-4},
%       Pages   = {591--621},
%       Volume  = {29},
%       Doi     = {10.1007/s00180-013-0464-z}
%     }
%
%
% See also fixedrankembeddedfactory fixedrankfactory_2factors fixedrankfactory_3factors_preconditioned

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, Dec. 30, 2012.
% Contributors:
% Change log:
    
    M.name = @() sprintf('LSR'' quotient manifold of %dx%d matrices of rank %d', m, n, k);
    
    M.dim = @() (m+n-k)*k;
    
    % Choice of the metric on the orthnormal space is motivated by the symmetry present in the
    % space. The metric on the positive definite space is its natural metric.
    M.inner = @(X, eta, zeta) eta.L(:).'*zeta.L(:) + eta.R(:).'*zeta.R(:) ...
        + trace( (X.S\eta.S) * (X.S\zeta.S) );
    
    M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
    
    M.dist = @(x, y) error('fixedrankfactory_3factors.dist not implemented yet.');
    
    M.typicaldist = @() 10*k;
    
    skew = @(X) .5*(X-X');
    symm = @(X) .5*(X+X');
    stiefel_proj = @(L, H) H - L*symm(L'*H);
    
    M.egrad2rgrad = @egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad)
        rgrad.L = stiefel_proj(X.L, egrad.L);
        rgrad.S = X.S*symm(egrad.S)*X.S;
        rgrad.R = stiefel_proj(X.R, egrad.R);
    end
    
    
    M.ehess2rhess = @ehess2rhess;
    function Hess = ehess2rhess(X, egrad, ehess, eta)
        
        % Riemannian gradient for the factor S.
        rgrad.S = X.S*symm(egrad.S)*X.S;
        
        % Directional derivatives of the Riemannian gradient.
        Hess.L = ehess.L - eta.L*symm(X.L'*egrad.L);
        Hess.L = stiefel_proj(X.L, Hess.L);
        
        Hess.R = ehess.R - eta.R*symm(X.R'*egrad.R);
        Hess.R = stiefel_proj(X.R, Hess.R);
        
        Hess.S = X.S*symm(ehess.S)*X.S +  2*symm(eta.S*symm(egrad.S)*X.S);
        
        % Correction factor for the non-constant metric on the factor S.
        Hess.S = Hess.S - symm(eta.S*(X.S\rgrad.S));
        
        % Projection onto the horizontal space.
        Hess = M.proj(X, Hess);
    end
    
    
    M.proj = @projection;
    function etaproj = projection(X, eta)
        % First, projection onto the tangent space of the total space.
        eta.L = stiefel_proj(X.L, eta.L);
        eta.R = stiefel_proj(X.R, eta.R);
        eta.S = symm(eta.S);
        
        % Then, projection onto the horizontal space.
        SS = X.S*X.S;
        AS = X.S*(skew(X.L'*eta.L) + skew(X.R'*eta.R) - 2*skew(X.S\eta.S))*X.S;
        omega = lyap(SS, -AS);
        
        etaproj.L = eta.L - X.L*omega;
        etaproj.S = eta.S - (X.S*omega - omega*X.S);
        etaproj.R = eta.R - X.R*omega;
    end
    
    M.tangent = M.proj;
    M.tangent2ambient = @(X, eta) eta;
    
    M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        L = chol(X.S);
        Y.S = L'*expm(L'\(t*eta.S)/L)*L;
        Y.L = uf(X.L + t*eta.L);
        Y.R = uf(X.R + t*eta.R);
    end
    
    M.exp = @exponential;
    function Y = exponential(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        Y = retraction(X, eta, t);
        warning('manopt:fixedrankfactory_3factors:exp', ...
            ['Exponential for fixed rank ' ...
            'manifold not implemented yet. Lsed retraction instead.']);
    end
    
    M.hash = @(X) ['z' hashmd5([X.L(:) ; X.S(:) ; X.R(:)])];
    
    M.rand = @random;
    % Factors L and R are on Stiefel manifolds, hence we reuse
    % their random generators.
    stiefelm = stiefelfactory(m, k);
    stiefeln = stiefelfactory(n, k);
    function X = random()
        X.L = stiefelm.rand();
        X.R = stiefeln.rand();
        X.S = diag(1+rand(k, 1));
    end
    
    M.randvec = @randomvec;
    function eta = randomvec(X)
        % A random vector on the horizontal space.
        eta.L = randn(m, k);
        eta.R = randn(n, k);
        eta.S = randn(k, k);
        eta = projection(X, eta);
        nrm = M.norm(X, eta);
        eta.L = eta.L / nrm;
        eta.R = eta.R / nrm;
        eta.S = eta.S / nrm;
    end
    
    M.lincomb = @lincomb;
    
    M.zerovec = @(X) struct('L', zeros(m, k), 'S', zeros(k, k), ...
        'R', zeros(n, k));
    
    M.transp = @(x1, x2, d) projection(x2, d);
    
    % vec and mat are not isometries, because of the scaled inner metric.
    M.vec = @(X, U) [U.L(:) ; U.S(:); U.R(:)];
    M.mat = @(X, u) struct('L', reshape(u(1:(m*k)), m, k), ...
        'S', reshape(u((m*k+1): m*k + k*k), k, k), ...
        'R', reshape(u((m*k+ k*k + 1):end), n, k));
    M.vecmatareisometries = @() false;
    
end

% Linear combination of tangent vectors.
function d = lincomb(x, a1, d1, a2, d2) %#ok<INLSL>
    
    if nargin == 3
        d.L = a1*d1.L;
        d.R = a1*d1.R;
        d.S = a1*d1.S;
    elseif nargin == 5
        d.L = a1*d1.L + a2*d2.L;
        d.R = a1*d1.R + a2*d2.R;
        d.S = a1*d1.S + a2*d2.S;
    else
        error('Bad use of fixedrankfactory_3factors.lincomb.');
    end
    
end

function A = uf(A)
    [L, unused, R] = svd(A, 0); %#ok
    A = L*R';
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedrank/fixedrankfactory_3factors_preconditioned.m
================================================
function M = fixedrankfactory_3factors_preconditioned(m, n, k)
% Manifold of m-by-n matrices of rank k with three factor quotient geometry.
%
% function M = fixedrankfactory_3factors_preconditioned(m, n, k)
%
% This geometry is tuned to least squares problems such as low-rank matrix
% completion with ell-2 loss.
%
% A point X on the manifold is represented as a structure with three
% fields: L, S and R. The matrices L (mxk) and R (nxk) are orthonormal,
% while the matrix S (kxk) is a full rank matrix such that X = L*S*R'.
%
% Tangent vectors are represented as a structure with three fields: L, S
% and R.
%
% Please cite the Manopt paper as well as the research paper:
%     @InProceedings{mishra2014r3mc,
%       Title        = {{R3MC}: A {R}iemannian three-factor algorithm for low-rank matrix completion},
%       Author       = {Mishra, B. and Sepulchre, R.},
%       Booktitle    = {{53rd IEEE Conference on Decision and Control}},
%       Year         = {2014},
%       Organization = {{IEEE CDC}}
%     }
%
%
% See also: fixedrankfactory_3factors fixedrankfactory_2factors_preconditioned

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, Dec. 30, 2012.
% Contributors:
% Change log:
%
%	April 04, 2015 (BM):
%       Cosmetic changes including avoiding storing the inverse of a kxk matrix.

    
    M.name = @() sprintf('LSR'' (tuned for least square problems) quotient manifold of %dx%d matrices of rank %d', m, n, k);
    
    M.dim = @() (m+n-k)*k;
    
    % Some precomputations at the point X that are to be used in the inner product (and
    % pretty much everywhere else).
    function X = prepare(X)
        if ~all(isfield(X,{'StS','SSt'}) == 1)
            X.SSt = X.S*X.S';
            X.StS = X.S'*X.S;
        end
    end
    
    % The choice of metric is motivated by symmetry and tuned to least square
    % objective function.
    M.inner = @iproduct;
    function ip = iproduct(X, eta, zeta)
        X = prepare(X);
        
        ip = trace(X.SSt*(eta.L'*zeta.L)) + trace(X.StS*(eta.R'*zeta.R)) ...
            + trace(eta.S'*zeta.S);
    end
    
    M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
    
    M.dist = @(x, y) error('fixedrankfactory_3factors_preconditioned.dist not implemented yet.');
    
    M.typicaldist = @() 10*k;
    
    skew = @(X) .5*(X-X');
    symm = @(X) .5*(X+X');
    
    M.egrad2rgrad = @egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad)
        X = prepare(X);
        
        SSL = X.SSt;
        ASL = 2*symm(SSL*(egrad.S*X.S'));
        
        SSR = X.StS;
        ASR = 2*symm(SSR*(egrad.S'*X.S));
        
        [BL, BR] = tangent_space_lyap(X.S, ASL, ASR); % It computes the solution without calling Matlab's Lyap.
        
        rgrad.L = (egrad.L - X.L*BL)/X.SSt;
        rgrad.R = (egrad.R - X.R*BR)/X.StS;
        rgrad.S = egrad.S;
        
        % Debug
        %         BL1 = lyap(SSL, -ASL); % Alternate way
        %         BR1 = lyap(SSR, -ASR);
        %         norm(skew(X.SSt*(rgrad.L'*X.L) + rgrad.S*X.S'), 'fro')
        %         norm(skew(X.StS*(rgrad.R'*X.R) - X.S'*rgrad.S), 'fro')
        
    end
    
    
    M.ehess2rhess = @ehess2rhess;
    function Hess = ehess2rhess(X, egrad, ehess, eta)
        X = prepare(X);
        
        % Riemannian gradient.
        SSL = X.SSt;
        ASL = 2*symm(SSL*(egrad.S*X.S'));
        SSR = X.StS;
        ASR = 2*symm(SSR*(egrad.S'*X.S));
        [BL, BR] = tangent_space_lyap(X.S, ASL, ASR);
        
        rgrad.L = (egrad.L - X.L*BL)/X.SSt;
        rgrad.R = (egrad.R - X.R*BR)/X.StS;
        rgrad.S = egrad.S;
        
        % Directional derivative of the Riemannian gradient.
        ASLdot = 2*symm((2*symm(X.S*eta.S')*(egrad.S*X.S')) + X.SSt*(ehess.S*X.S' + egrad.S*eta.S')) - 4*symm(symm(eta.S*X.S')*BL);
        ASRdot = 2*symm((2*symm(X.S'*eta.S)*(egrad.S'*X.S)) + X.StS*(ehess.S'*X.S + egrad.S'*eta.S)) - 4*symm(symm(eta.S'*X.S)*BR);
        
        %         SSLdot = X.SSt;
        %         SSRdot = X.StS;
        %         BLdot = lyap(SSLdot, -ASLdot);
        %         BRdot = lyap(SSRdot, -ASRdot);
        
        [BLdot, BRdot] = tangent_space_lyap(X.S, ASLdot, ASRdot);
        
        Hess.L = (ehess.L - eta.L*BL - X.L*BLdot - 2*rgrad.L*symm(eta.S*X.S'))/X.SSt;
        Hess.R = (ehess.R - eta.R*BR - X.R*BRdot - 2*rgrad.R*symm(eta.S'*X.S))/X.StS;
        Hess.S = ehess.S;
        
        
        % BM: Till this, everything seems correct.
        % We still need a correction factor for the non-constant metric
        % that is imposed.
        % The computation of the correction factor owes itself to the Koszul formula.
        % This corresponds to the Riemannian connection in the Euclidean space with the
        % scaled metric.
        Hess.L = Hess.L + (eta.L*symm(rgrad.S*X.S') + rgrad.L*symm(eta.S*X.S'))/X.SSt;
        Hess.R = Hess.R + (eta.R*symm(rgrad.S'*X.S) + rgrad.R*symm(eta.S'*X.S))/X.StS;
        Hess.S = Hess.S - symm(rgrad.L'*eta.L)*X.S - X.S*symm(rgrad.R'*eta.R);
        
        % The Riemannian connection on the quotient space is the
        % projection of the Riemmian connection in the ambient space onto the tangent space of the total space and
        % then onto the horizontal space. 
        % This is accomplished by the following operation.
        Hess = M.proj(X, Hess);
        
        % Debug
        %         norm(skew(X.SSt*(Hess.L'*X.L) + Hess.S*X.S'))
        %         norm(skew(X.StS*(Hess.R'*X.R) - X.S'*Hess.S))
        
    end
    
    
    M.proj = @projection;
    function etaproj = projection(X, eta)
        X = prepare(X);
        
        % First, projection onto the tangent space of the total space.
        SSL = X.SSt;
        ASL = 2*symm(X.SSt*(X.L'*eta.L)*X.SSt);
        BL = lyap(SSL, -ASL);
        eta.L = eta.L - X.L*(BL/X.SSt);
        
        SSR = X.StS;
        ASR = 2*symm(X.StS*(X.R'*eta.R)*X.StS);
        BR = lyap(SSR, -ASR);
        eta.R = eta.R - X.R*(BR/X.StS);
        
        % Project onto the horizontal space
        PU = skew((X.L'*eta.L)*X.SSt) + skew(X.S*eta.S');
        PV = skew((X.R'*eta.R)*X.StS)  + skew(X.S'*eta.S);
        [Omega1, Omega2] = coupled_lyap(X.S, PU, PV);
        %         norm(2*skew(Omega1*X.SSt) - PU -(X.S*Omega2*X.S'),'fro' )
        %         norm(2*skew(Omega2*X.StS) - PV -(X.S'*Omega1*X.S),'fro' )
        %
        
        etaproj.L = eta.L - (X.L*Omega1);
        etaproj.S = eta.S - (X.S*Omega2 - Omega1*X.S) ;
        etaproj.R = eta.R - (X.R*Omega2);
        
        
        % Debug
        %         norm(skew(X.SSt*(etaproj.L'*X.L) + etaproj.S*X.S'))
        %         norm(skew(X.StS*(etaproj.R'*X.R) - X.S'*etaproj.S))
        %
        %         norm(skew(X.SSt*(etaproj.L'*X.L) - X.S*etaproj.S'))
        %         norm(skew(X.StS*(etaproj.R'*X.R) + etaproj.S'*X.S))
        
    end
    
    
    M.tangent = M.proj;
    M.tangent2ambient = @(X, eta) eta;
    
    M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        Y.S = (X.S + t*eta.S);
        Y.L = uf((X.L + t*eta.L));
        Y.R = uf((X.R + t*eta.R));
        
        Y = prepare(Y);
    end
    
    M.exp = @exponential;
    function Y = exponential(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        Y = retraction(X, eta, t);
        warning('manopt:fixedrankfactory_3factors_preconditioned:exp', ...
            ['Exponential for fixed rank ' ...
            'manifold not implemented yet. Used retraction instead.']);
    end
    
    M.hash = @(X) ['z' hashmd5([X.L(:) ; X.S(:) ; X.R(:)])];
    
    M.rand = @random;
    % Factors L and R live on Stiefel manifolds, hence we will reuse
    % their random generator.
    stiefelm = stiefelfactory(m, k);
    stiefeln = stiefelfactory(n, k);
    function X = random()
        X.L = stiefelm.rand();
        X.R = stiefeln.rand();
        X.S = diag(1+rand(k, 1));
        
        X = prepare(X);
    end
    
    M.randvec = @randomvec;
    function eta = randomvec(X)
        % A random vector on the horizontal space
        eta.L = randn(m, k);
        eta.R = randn(n, k);
        eta.S = randn(k, k);
        eta = projection(X, eta);
        nrm = M.norm(X, eta);
        eta.L = eta.L / nrm;
        eta.R = eta.R / nrm;
        eta.S = eta.S / nrm;
    end
    
    M.lincomb = @lincomb;
    
    M.zerovec = @(X) struct('L', zeros(m, k), 'S', zeros(k, k), ...
        'R', zeros(n, k));
    
    M.transp = @(x1, x2, d) projection(x2, d);
    
    % vec and mat are not isometries, because of the unusual inner metric.
    M.vec = @(X, U) [U.L(:) ; U.S(:); U.R(:)];
    M.mat = @(X, u) struct('L', reshape(u(1:(m*k)), m, k), ...
        'S', reshape(u((m*k+1): m*k + k*k), k, k), ...
        'R', reshape(u((m*k+ k*k + 1):end), n, k));
    M.vecmatareisometries = @() false;
    
end

% Linear combination of tangent vectors
function d = lincomb(x, a1, d1, a2, d2) %#ok<INUSL>
    
    if nargin == 3
        d.L = a1*d1.L;
        d.R = a1*d1.R;
        d.S = a1*d1.S;
    elseif nargin == 5
        d.L = a1*d1.L + a2*d2.L;
        d.R = a1*d1.R + a2*d2.R;
        d.S = a1*d1.S + a2*d2.S;
    else
        error('Bad use of fixedrankfactory_3factors_preconditioned.lincomb.');
    end
    
end

function A = uf(A)
    [L, unused, R] = svd(A, 0); %#ok
    A = L*R';
end

function[BU, BV] = tangent_space_lyap(R, E, F)
    % We intent to solve a linear system    RR^T  BU + BU RR^T  = E
    %                                       R^T R BV + BV R^T R = F
    % for BU and BV.
    %
    % This can be solved using two calls to the Matlab's lyap.
    % However, we can still have a more efficient implementation
    % that does not require the full functionaliyt of Matlab's lyap.
    
    [U, Sigma, V] = svd(R);
    E_mod = U'*E*U;
    F_mod = V'*F*V;
    b1 = E_mod(:);
    b2 = F_mod(:);
    
    r = size(Sigma, 1);
    sig = diag(Sigma); % all the singular values in a vector
    sig1 = sig*ones(1, r); % columns repeat
    sig1t = sig1'; % rows repeat
    s1 = sig1(:);
    s2 = sig1t(:);
    
    % The block elements
    a =  s1.^2 + s2.^2; % a column vector
    
    % Solve the linear system of equations
    cu = b1./a; %a.\b1;
    cv = b2./a; %a.\b2;
    
    % Matricize
    CU = reshape(cu, r, r);
    CV = reshape(cv, r, r);
    
    % Do the similarity transforms
    BU = U*CU*U';
    BV = V*CV*V';
    
    % %% Debug
    %
    % norm(R*R'*BU + BU*R*R' - E, 'fro');
    % norm((Sigma.^2)*CU + CU*(Sigma.^2) - E_mod, 'fro');
    % norm(a.*cu - b1, 'fro');
    %
    % norm(R'*R*BV + BV*R'*R - F, 'fro');
    %
    % BU1 = lyap(R*R', - E);
    % norm(R*R'*BU1 + BU1*R*R' - E, 'fro');
    %
    % BV1 = lyap(R'*R, - F);
    % norm(R'*R*BV1 + BV1*R'*R - F, 'fro');
    %
    % % as accurate as the lyap
    % norm(BU - BU1, 'fro')
    % norm(BV - BV1, 'fro')
end


function[Omega1, Omega2] = coupled_lyap(R, E, F)
    % We intent to solve the coupled system of Lyapunov equations
    %
    % RR^T Omega1 + Omega1 RR^T  - R Omega2 R^T = E
    % R^T R Omega2 + Omega1 R^T R  - R^T Omega2 R = F,
    %
    % for Omega1 and Omega2, both are skew symmetric matrices.
    %
    % Below is an efficient implementation
    
    [U, Sigma, V] = svd(R);
    E_mod = U'*E*U;
    F_mod = V'*F*V;
    b1 = E_mod(:);
    b2 = F_mod(:);
    
    r = size(Sigma, 1);
    sig = diag(Sigma); % All the singular values in a vector
    sig1 = sig*ones(1, r); % Columns repeat
    sig1t = sig1'; % Rows repeat
    s1 = sig1(:);
    s2 = sig1t(:);
    
    % The block elements
    a =  s1.^2 + s2.^2; % A column vector
    c = s1.*s2;
    
    % Solve directly using the formula
    % A = diag(a);
    % C = diag(c);
    % Y1_sol = (A*(C\A) - C) \ (b2 + A*(C\b1));
    % Y2_sol = A\(b2 + C*Y1_sol);
    
    Y1_sol = (b2 + (a./c).*b1) ./ ((a.^2)./c - c);
    Y2_sol = (b2 + c.*Y1_sol)./a;
    
    % Matricize
    Omega1 = reshape(Y1_sol, r, r);
    Omega2 = reshape(Y2_sol, r, r);
    
    % Do the similarity transforms
    Omega1 = U*Omega1*U';
    Omega2 = V*Omega2*V';
    
    % %% Debug: whether we have the right solution.
    % norm(R*R'*Omega1 + Omega1*R*R'  - R*Omega2*R' - E, 'fro')
    % norm(R'*R*Omega2 + Omega2*R'*R  - R'*Omega1*R - F, 'fro')
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedranktensors/fixedrankfactory_tucker_preconditioned.m
================================================
function M = fixedrankfactory_tucker_preconditioned(tensor_size, tensor_rank)
% Manifold of fixed multilinear rank tensors in Tucker format.
%
% function M = fixedrankfactory_tucker_preconditioned(tensor_size, tensor_rank)
%
% n1 = tensor_size(1);
% n2 = tensor_size(2);
% n3 = tensor_size(3);
% r1 = tensor_rank(1);
% r2 = tensor_rank(2);
% r3 = tensor_rank(3);
%
% A point X on the manifold is represented as a structure with four
% fields: U1, U2, U3 and G. The matrices U1 (n1-by-r1), U2 (n2-by-r2),
% and U3 (n3-by-r3) are orthogonal matrices. G (r1-by-r2-by-r3) is a 
% multidimensional array.
%
% Tangent vectors are represented as a structure with four fields: 
% U1, U2, U3, and G.
%
% We exploit the quotient nature of Tucker decompositions to impose a
% scaled inner product on the manifold. This suits least-squares problems.
% For details, refer to the technical report:
% "{R}iemannian preconditioning for tensor completion",
% H. Kasai and B. Mishra, Arxiv preprint arXiv:1506.02159, 2015.
%
% Paper link: http://arxiv.org/abs/1506.02159.
%
% Please cite the Manopt paper as well as the research paper:
%     @TechReport{kasai2015precon,
%       Title   = {{R}iemannian preconditioning for tensor completion},
%       Author  = {Kasai, H. and Mishra, B.},
%       Journal = {Arxiv preprint arXiv:1506.02159},
%       Year    = {2015}
%     }

% Original authors: Hiroyuki Kasai and Bamdev Mishra, June 5, 2015.
% Contributors: 
% Change log:

    if length(tensor_rank) > 3
        error('Bad usage of fixedrankfactory_tucker_preconditioned. Currently, only handles 3-order tensors.');
    end
    
    % Tensor size
    n1 = tensor_size(1);
    n2 = tensor_size(2);
    n3 = tensor_size(3);
    
    % Core size or multilinear rank
    r1 = tensor_rank(1);
    r2 = tensor_rank(2);
    r3 = tensor_rank(3);
    
    
    speyer1 = speye(r1); % Sparse version of identity that is used in M.proj
    speyer2 = speye(r2);
    speyer3 = speye(r3);
    

    M.name = @() sprintf('G x U1 x U2 x U3 quotient Tucker manifold of %d-by-%d-by-%d tensor of rank %d-by-%d-by-%d.', n1, n2, n3, r1, r2, r3);
    
    M.dim = @() n1*r1-r1^2 + n2*r2-r2^2 + n3*r3-r3^2 + r1*r2*r3;
    
    % Some precomputations at point X to be used in the inner product (and
    % pretty much everywhere else)
    function X = prepare(X)
        if ~all(isfield(X,{'G1G1t','G1',...
                'G2G2t','G2', ...
                'G3G3t','G3'}) == 1)
            
            X.G1 =  reshape(X.G, r1, r2*r3);
            X.G1G1t = X.G1*X.G1'; % Positive definite  
            
            
            X.G2 = reshape(permute(X.G, [2 1 3]), r2, r1*r3); 
            X.G2G2t = X.G2*X.G2'; % Positive definite  
            
            
            X.G3 = reshape(permute(X.G, [3 1 2]), r3, r1*r2);  
            X.G3G3t = X.G3*X.G3'; % Positive definite  
        end
        
        
    end
    
    % Choice of metric is motivated by symmetry and tuned to least-squares
    % cost function
    M.inner = @iproduct;
    function ip = iproduct(X, eta, zeta)
        X = prepare(X);
        ip =  trace(X.G1G1t*(eta.U1'*zeta.U1)) ...
            + trace(X.G2G2t*(eta.U2'*zeta.U2)) ...
            + trace(X.G3G3t*(eta.U3'*zeta.U3)) ...
            + (eta.G(:)'*zeta.G(:));
    end
    M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
    
    M.dist = @(x, y) error('fixedrankfactory_tucker_preconditioned.dist not implemented yet.');
    
    M.typicaldist = @() 10*n1*r1; % BM: To do  
    
    skew = @(X) .5*(X-X');
    symm = @(X) .5*(X+X');
    
    M.egrad2rgrad = @egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad)
        X = prepare(X); % Reuse already computed terms
        
        SSU1 = X.G1G1t;
        ASU1 = 2*symm(SSU1*(X.U1' * egrad.U1));
        
        SSU2 = X.G2G2t;
        ASU2 = 2*symm(SSU2*(X.U2' * egrad.U2));
        
        SSU3 = X.G3G3t;
        ASU3 = 2*symm(SSU3*(X.U3' * egrad.U3));
        
        
        BU1 = lyap(SSU1, -ASU1);
        BU2 = lyap(SSU2, -ASU2);
        BU3 = lyap(SSU3, -ASU3);
        
        % The lyap solutions ensure that the Riemannian gradient rgrad 
        % is now on the tangent space. From the Riemannian submersion 
        % theory, it also belongs to the horizontal space. Therefore,
        % no need to further project it on the horizontal space.
        
        rgrad.U1 = (egrad.U1 - X.U1*BU1)/X.G1G1t;
        rgrad.U2 = (egrad.U2 - X.U2*BU2)/X.G2G2t;
        rgrad.U3 = (egrad.U3 - X.U3*BU3)/X.G3G3t;
        rgrad.G = egrad.G;

        
    end
    
    
    M.ehess2rhess = @ehess2rhess;
    function Hess = ehess2rhess(X, egrad, ehess, eta) 
        X = prepare(X); % Reuse already computed terms
        
        % Riemannian gradient
        SSU1 = X.G1G1t;
        ASU1 = 2*symm(SSU1*(X.U1' * egrad.U1));
        SSU2 = X.G2G2t;
        ASU2 = 2*symm(SSU2*(X.U2' * egrad.U2));
        SSU3 = X.G3G3t;
        ASU3 = 2*symm(SSU3*(X.U3' * egrad.U3));
        
        BU1 = lyap(SSU1, -ASU1);
        BU2 = lyap(SSU2, -ASU2);
        BU3 = lyap(SSU3, -ASU3);
        
        rgrad.U1 = (egrad.U1 - X.U1*BU1)/X.G1G1t;
        rgrad.U2 = (egrad.U2 - X.U2*BU2)/X.G2G2t;
        rgrad.U3 = (egrad.U3 - X.U3*BU3)/X.G3G3t;
        rgrad.G = egrad.G;
        
        % Directional derivative of Riemannian gradient
        
        eta_G1 = reshape(eta.G, r1, r2*r3); % double(tenmat(eta.G,1));
        eta_G2 = reshape(permute(eta.G, [2 1 3]), r2, r1*r3); % double(tenmat(eta.G,2));
        eta_G3 = reshape(permute(eta.G, [3 1 2]), r3, r1*r2); % double(tenmat(eta.G,3));
        egrad_G1 = reshape(egrad.G, r1, r2*r3); % double(tenmat(egrad.G,1));
        egrad_G2 = reshape(permute(egrad.G, [2 1 3]), r2, r1*r3); % double(tenmat(egrad.G,2));
        egrad_G3 = reshape(permute(egrad.G, [3 1 2]), r3, r1*r2); % double(tenmat(egrad.G,3));
        ehess_G1 = reshape(ehess.G, r1, r2*r3); % double(tenmat(ehess.G,1));
        ehess_G2 = reshape(permute(ehess.G, [2 1 3]), r2, r1*r3); % double(tenmat(ehess.G,2));
        ehess_G3 = reshape(permute(ehess.G, [3 1 2]), r3, r1*r2); % double(tenmat(ehess.G,3));
        rgrad_G1 = reshape(rgrad.G, r1, r2*r3); % double(tenmat(rgrad.G,1));
        rgrad_G2 = reshape(permute(rgrad.G, [2 1 3]), r2, r1*r3); % double(tenmat(rgrad.G,2));
        rgrad_G3 = reshape(permute(rgrad.G, [3 1 2]), r3, r1*r2); % double(tenmat(rgrad.G,3));
        
        ASU1dot = 2*symm((2*symm(X.G1*eta_G1')*(egrad_G1*X.G1')) + X.G1G1t*(ehess_G1*X.G1' + egrad_G1*eta_G1')) - 4*symm(symm(eta_G1*X.G1')*BU1);
        ASU2dot = 2*symm((2*symm(X.G2*eta_G2')*(egrad_G2*X.G2')) + X.G2G2t*(ehess_G2*X.G2' + egrad_G2*eta_G2')) - 4*symm(symm(eta_G2*X.G2')*BU2);
        ASU3dot = 2*symm((2*symm(X.G3*eta_G3')*(egrad_G3*X.G3')) + X.G3G3t*(ehess_G3*X.G3' + egrad_G3*eta_G3')) - 4*symm(symm(eta_G3*X.G3')*BU3);
        
        
        SSU1dot = X.G1G1t;
        SSU2dot = X.G2G2t;
        SSU3dot = X.G3G3t;
        BU1dot = lyap(SSU1dot, -ASU1dot);
        BU2dot = lyap(SSU2dot, -ASU2dot);
        BU3dot = lyap(SSU3dot, -ASU3dot);
        
        
        Hess.U1 = (ehess.U1 - eta.U1*BU1 - X.U1*BU1dot - 2*rgrad.U1*symm(eta_G1*X.G1'))/X.G1G1t;
        Hess.U2 = (ehess.U2 - eta.U2*BU2 - X.U2*BU2dot - 2*rgrad.U2*symm(eta_G2*X.G2'))/X.G2G2t;
        Hess.U3 = (ehess.U3 - eta.U3*BU3 - X.U3*BU3dot - 2*rgrad.U3*symm(eta_G3*X.G3'))/X.G3G3t;
        Hess.G = ehess.G;
        
        
        % BM: we need a correction factor for the non-constant metric
        % The correction factor owes itself to the Koszul formula.
        % This is the Riemannian connection in the Euclidean space with the
        % scaled metric.
        
        
        Hess.U1 = Hess.U1 + (eta.U1*symm(rgrad_G1*X.G1') + rgrad.U1*symm(eta_G1*X.G1'))/X.G1G1t;
        Hess.U2 = Hess.U2 + (eta.U2*symm(rgrad_G2*X.G2') + rgrad.U2*symm(eta_G2*X.G2'))/X.G2G2t;
        Hess.U3 = Hess.U3 + (eta.U3*symm(rgrad_G3*X.G3') + rgrad.U3*symm(eta_G3*X.G3'))/X.G3G3t;
        Hess.G = Hess.G  - permute(reshape(symm(rgrad.U1'*eta.U1)*X.G1,r1,r2,r3), [1 2 3]) ...
            - permute(reshape(symm(rgrad.U2'*eta.U2)*X.G2,r2,r1,r3), [2 1 3]) ...
            - permute(reshape(symm(rgrad.U3'*eta.U3)*X.G3,r3,r1,r2), [2 3 1]);
        
        % The Riemannian connection on the quotient space is the
        % projection on the tangent space of the total space and then onto the horizontal
        % space. This is accomplished with the following operation.
        
        Hess = M.proj(X, Hess);
        
        
    end
    
    
    M.proj = @projection;
    function etaproj = projection(X, eta)
        X = prepare(X); % Reuse already computed terms
        
        % First, projection onto tangent space of total space
        SSU1 = X.G1G1t;
        ASU1 = 2*symm(X.G1G1t*(X.U1'*eta.U1)*X.G1G1t);
        BU1 = lyap(SSU1, -ASU1);
        eta.U1 = eta.U1 - X.U1*(BU1/X.G1G1t);
        
        SSU2 = X.G2G2t;
        ASU2 = 2*symm(X.G2G2t*(X.U2'*eta.U2)*X.G2G2t);
        BU2 = lyap(SSU2, -ASU2);
        eta.U2 = eta.U2 - X.U2*(BU2/X.G2G2t);
        
        SSU3 = X.G3G3t;
        ASU3 = 2*symm(X.G3G3t*(X.U3'*eta.U3)*X.G3G3t);
        BU3 = lyap(SSU3, -ASU3);
        eta.U3 = eta.U3 - X.U3*(BU3/X.G3G3t);
        

        eta_G1 = reshape(eta.G, r1, r2*r3); 
        eta_G2 = reshape(permute(eta.G, [2 1 3]), r2, r1*r3); 
        eta_G3 = reshape(permute(eta.G, [3 1 2]), r3, r1*r2);
        
        
        % Project onto the horizontal space.
        PU1 = skew((X.U1'*eta.U1)*X.G1G1t) + skew(X.G1*eta_G1');
        PU2 = skew((X.U2'*eta.U2)*X.G2G2t) + skew(X.G2*eta_G2');
        PU3 = skew((X.U3'*eta.U3)*X.G3G3t) + skew(X.G3*eta_G3');
        
        % Calculate Omega1, Omega2, Omega3 that are required in finding the
        % horizontal component. 
        % We use the Matlab's pcg function to solve the system efficiently.
        % We exploit the structure by designing a good preconditioner as well.
        % The preconditioner takes the block positive definite part of the
        % linear system.
        
        % Options for PCG
        tol_omegax_pcg = 1e-6; % BM: standard tolerance as suggested in PCG.
        max_iterations_pcg = 15;% BM: fix this to 15 for simulations. In practice, it requires 7 to 10 iteraions.
        
        % Preconditioner for PCG
        M1 = kron(speyer1,SSU1) + kron(SSU1, speyer1);
        M2 = kron(speyer2,SSU2) + kron(SSU2, speyer2);
        M3 = kron(speyer3,SSU3) + kron(SSU3, speyer3);
        
        Mprecon_pcg = sparse(zeros(r1^2 + r2^2 + r3^2));
        Mprecon_pcg(1 : r1^2, 1 : r1^2 ) = M1;
        Mprecon_pcg(1 + r1^2 : r1^2 + r2^2, 1 + r1^2 : r1^2 + r2^2) = M2;
        Mprecon_pcg(1 + r1^2 + r2^2 : end, 1 + r1^2 + r2^2 : end) = M3;
        
        % Call PCG
        [Omegaxsol, unused] = pcg(@compute_residual, [PU1(:); PU2(:); PU3(:)],  tol_omegax_pcg, max_iterations_pcg, Mprecon_pcg);
        
        Omega1 = reshape(Omegaxsol(1:r1^2), r1, r1);
        Omega2 = reshape(Omegaxsol(1 + r1^2 : r1^2 + r2^2), r2, r2);
        Omega3 = reshape(Omegaxsol(1 + r1^2 + r2^2 : end), r3, r3);
            
        function AOmegax = compute_residual(Omegax)
            Omegax1 = reshape(Omegax(1:r1^2), r1, r1);
            Omegax2 = reshape(Omegax(1 + r1^2 : r1^2 + r2^2), r2, r2);
            Omegax3 = reshape(Omegax(1 + r1^2 + r2^2 : end), r3, r3);
            
            OffsetU1 = X.G1*((kron(speyer3,Omegax2) + kron(Omegax3, speyer2))*X.G1');
            OffsetU2 = X.G2*((kron(speyer3,Omegax1) + kron(Omegax3, speyer1))*X.G2');
            OffsetU3 = X.G3*((kron(speyer2,Omegax1) + kron(Omegax2, speyer1))*X.G3');
            
            residual1 = Omegax1*SSU1 + SSU1*Omegax1 - OffsetU1;
            residual2 = Omegax2*SSU2 + SSU2*Omegax2 - OffsetU2;
            residual3 = Omegax3*SSU3 + SSU3*Omegax3 - OffsetU3;
            
            AOmegax = [residual1(:); residual2(:); residual3(:)];
        end
        
        
        % Calculate projection along U1, U2, and U3
        etaproj.U1 = eta.U1 - (X.U1*Omega1);
        etaproj.U2 = eta.U2 - (X.U2*Omega2);
        etaproj.U3 = eta.U3 - (X.U3*Omega3);
        
        % Calculate projection algong G 
        GOmega1 = reshape(Omega1*X.G1, r1, r2, r3);
        GOmega2 = permute(reshape(Omega2*X.G2, r2, r1, r3), [2 1 3]);
        GOmega3 = permute(reshape(Omega3*X.G3, r3, r1, r2), [2 3 1]); 
        etaproj.G = eta.G -(-(GOmega1+GOmega2+GOmega3));
        
    end
    
    
    M.tangent = M.proj;
    M.tangent2ambient = @(X, eta) eta;
    
    M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        Y.G = (X.G + t*eta.G);
        Y.U1 = uf((X.U1 + t*eta.U1)); % U factor of Polar factorization
        Y.U2 = uf((X.U2 + t*eta.U2));
        Y.U3 = uf((X.U3 + t*eta.U3));
        
        Y = prepare(Y);
    end
    
    M.exp = @exponential;
    function Y = exponential(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        Y = retraction(X, eta, t);
        warning('manopt:fixedrankfactory_tucker_preconditioned:exp', ...
            ['Exponential for fixed rank ' ...
            'Tucker manifold not implemented yet. Used retraction instead.']);
    end
    
    M.hash = @(X) ['z' hashmd5([sum(X.U1(:)) ; sum(X.U2(:)); sum(X.U3(:)); sum(X.G(:)) ])]; % Efficient, suggested by Bart Vandereycken.
    % M.hash = @(X) ['z' hashmd5([X.U1(:); X.U2(:); X.U3(:); X.G(:)])];
    
    M.rand = @random;
    function X = random()
        %         % Random generator on the total space
        %         % Factors U1, U2, and U3 are on Stiefel manifolds, hence we reuse
        %         % their random generator.
        %         stiefell = stiefelfactory(n1, r1);
        %         stiefelm = stiefelfactory(n2, r2);
        %         stiefeln = stiefelfactory(n3, r3);
        %
        %         X.U1 = stiefell.rand();
        %         X.U2 = stiefelm.rand();
        %         X.U3 = stiefeln.rand();
        %
        %         % Random initialization: generalization of randn(r1, r1 = r2) in the
        %         % matrix case.
        %         X.G = randn(r1,r2,r3);
        
        
        %  Random generator on the fixed-rank space from a uniform distribution on [0, 1].
        [U1, R1] = qr(rand(n1, r1), 0);
        [U2, R2] = qr(rand(n2, r2), 0);
        [U3, R3] = qr(rand(n3, r3), 0);
        C  = rand(r1, r2, r3);
        
        C1 = reshape(C, r1, r2*r3);
        CR1 = reshape(R1*C1, r1, r2, r3); % Multplication by R1
        
        C2 = reshape(permute(CR1, [2 1 3]), r2, r1*r3);
        CR1R2 = permute(reshape(R2*C2, r2, r1, r3), [2 1 3]); % Multplication by R2
        
        C3 = reshape(permute(CR1R2, [3 1 2]), r3, r1*r2);
        CR1R2R3 = permute(reshape(R3*C3, r3, r1, r2), [2 3 1]); % Multplication by R3
        
        X.U1 = U1;
        X.U2 = U2;
        X.U3 = U3;
        X.G = CR1R2R3;
    
        
        % Compute some terms that are used subsequently.
        X = prepare(X);
        
    end
    
    M.randvec = @randomvec;
    function eta = randomvec(X)
        % A random vector on the horizontal space
        eta.U1 = randn(n1, r1);
        eta.U2 = randn(n2, r2);
        eta.U3 = randn(n3, r3);
        eta.G = randn(r1, r2, r3);
        eta = projection(X, eta);
        nrm = M.norm(X, eta);
        eta.U1 = eta.U1 / nrm;
        eta.U2 = eta.U2 / nrm;
        eta.U3 = eta.U3 / nrm;
        eta.G = eta.G / nrm;
    end
    
    M.lincomb = @lincomb;
    
    M.zerovec = @(X) struct('U1', zeros(n1, r1), 'U2', zeros(n2, r2), ...
        'U3', zeros(n3, r3), 'G', zeros(r1, r2, r3));
    
    M.transp = @(x1, x2, d) projection(x2, d);
    
    % vec and mat are not isometries, because of the scaled metric.
    M.vec = @(X, U1) [U1.U1(:); U1.U2(:); U1.U3(:); U1.G(:)];
    M.mat = @(X, u) struct ...
        ('U1', reshape(u(1  : n1*r1), n1, r1), ...
        'U2', reshape(u(n1*r1 + 1 : n1*r1 + n2*r2), n2, r2), ...
        'U3', reshape(u(n1*r1 + n2*r2 + 1 : n1*r1 + n2*r2 + n3*r3), n3, r3), ...
        'G', reshape(u(n1*r1 + n2*r2 + n3*r3 + 1 : end), r1, r2, r3));
    M.vecmatareisometries = @() false;
    
end

% Linear combination of tangent vectors
function d = lincomb(X, a1, d1, a2, d2) %#ok<INUSL>
    
    if nargin == 3
        d.U1 = a1*d1.U1;
        d.U2 = a1*d1.U2;
        d.U3 = a1*d1.U3;
        d.G = a1*d1.G;
    elseif nargin == 5
        d.U1 = a1*d1.U1 + a2*d2.U1;
        d.U2 = a1*d1.U2 + a2*d2.U2;
        d.U3 = a1*d1.U3 + a2*d2.U3;
        d.G = a1*d1.G + a2*d2.G;
    else
        error('Bad use of fixedrankfactory_tucker_preconditioned.lincomb.');
    end
    
end

function U = uf(A) % U factor of Polar factorization of a tall matrix A.
    [L, unused, R] = svd(A, 0); %#ok
    U = L*R';
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/fixedranktensors/tucker2multiarray.m
================================================
function Xtensor = tucker2multiarray(X)
% Converts a 3d Tucker form tensor to a multiarray.
%
% function Xtensor = tucker2multiarray(X)
%
% X has fields U1, U2, U3, and G.
%
% The matrices U1 (n1-by-r1), U2 (n2-by-r2) and U3 (n3-by-r3) are
% orthogonal matrices.
% G (r1-by-r2-by-r3) is a multidimensional array.
%
% See also: fixedrankfactory_tucker_preconditioned

% This file is part of Manopt: www.manopt.org.
% Original authors: Hiroyuki Kasai and Bamdev Mishra, June 05, 2015.
% Contributors:
% Change log:
    
    U1 = X.U1;
    U2 = X.U2;
    U3 = X.U3;
    G = X.G;
    
    % Tensor size
    n1 = size(U1, 1);
    n2 = size(U2, 1);
    n3 = size(U3, 1);
    
    % Core size
    [r1, r2, r3] = size(G);
    
    % Multplication by U1
    G1 = reshape(G, r1, r2*r3);
    GU1 = reshape(U1*G1, n1, r2, r3);
    
    % Further multplication by U2
    G2 = reshape(permute(GU1, [2 1 3]), r2, n1*r3);
    GU1U2 = permute(reshape(U2*G2, n2, n1, r3), [2 1 3]);
    
    % Further multplication by U3
    G3 = reshape(permute(GU1U2, [3 1 2]), r3, n1*n2);    
    GU1U2U3 = permute(reshape(U3*G3, n3, n1, n2), [2 3 1]);
    
    Xtensor = GU1U2U3;% Full tensor
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/grassmann/grassmanncomplexfactory.m
================================================
function M = grassmanncomplexfactory(n, p, k)
% Returns a manifold struct to optimize over the set of subspaces in C^n.
%
% function M = grassmanncomplexfactory(n, p)
% function M = grassmanncomplexfactory(n, p, k)
%
% Complex Grassmann manifold: each point on this manifold is a collection
% of k vector subspaces of dimension p embedded in C^n.
%
% The metric is obtained by making the Grassmannian a Riemannian quotient
% manifold of the complex Stiefel manifold, i.e., the manifold of
% orthonormal matrices, itself endowed with a metric by making it a
% Riemannian submanifold of the Euclidean space, endowed with the usual
% real-trace inner product, that is, it is the usual metric for the complex
% plane identified with R^2.
% 
% This structure deals with complex matrices X of size n x p x k
% (or n x p if k = 1, which is the default) such that each n x p matrix is
% orthonormal, i.e., X'*X = eye(p) if k = 1, or X(:, :, i)' * X(:, :, i) =
% eye(p) for i = 1 : k if k > 1. Each n x p matrix is a numerical
% representation of the vector subspace its columns span.
%
% By default, k = 1.
%
% See also: grassmannfactory, stiefelcomplexfactory, grassmanngeneralizedfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Hiroyuki Sato, May 21, 2015.
% Contributors: 
% Change log: 

    assert(n >= p, ...
           ['The dimension n of the ambient space must be larger ' ...
	        'than the dimension p of the subspaces.']);
    
    if ~exist('k', 'var') || isempty(k)
        k = 1;
    end
    
    if k == 1
        M.name = @() sprintf('Complex Grassmann manifold Gr(%d, %d)', n, p);
    elseif k > 1
        M.name = @() sprintf(['Multi complex Grassmann manifold ' ...
            'Gr(%d, %d)^%d'], n, p, k);
    else
        error('k must be an integer no less than 1.');
    end
    
    M.dim = @() 2*k*p*(n-p); %! k*p*(n-p) -> 2*k*p*(n-p)
    
    M.inner = @(x, d1, d2) real(d1(:)'*d2(:)); %! trace -> real-trace
    
    M.norm = @(x, d) norm(d(:));
    
    M.dist = @distance;
    function d = distance(x, y)
        principal_angles = zeros(p, k);
        XHY = multiprod(multihconj(x), y); %! XtY -> XHY, multitransp -> multihconj
        for i = 1 : k
            cos_princ_angle = svd(XHY(:, :, i));
            principal_angles(:, i) = acos(cos_princ_angle);
        end
        d = norm(real(principal_angles), 'fro');
    end
    
    M.typicaldist = @() sqrt(p*k);
    
    % Orthogonal projection of an ambient vector U to the horizontal space
    % at X.
    M.proj = @projection;
    function Up = projection(X, U)
        
        XHU = multiprod(multihconj(X), U); %! XtU -> XHU, multitransp -> multihconj
        Up = U - multiprod(X, XHU); %! XtU -> XHU

    end
    
    M.tangent = M.proj;
    
	M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, H)
        PXehess = projection(X, ehess);
        XHG = multiprod(multihconj(X), egrad); %! XtG -> XHG, multitransp -> multihconj
        HXHG = multiprod(H, XHG); %! HXtG -> HXHG, XtG -> XHG
        rhess = PXehess - HXHG; %! HXtG -> HXHG
    end
    
    M.retr = @retraction;
    function Y = retraction(X, U, t)
        if nargin < 3
            t = 1.0;
        end
        Y = X + t*U;
        for i = 1 : k 
		
            % Compute the polar factorization of Y = X+tU
            [u, s, v] = svd(Y(:, :, i), 'econ'); %#ok
            Y(:, :, i) = u*v';
			
            % Another popular retraction uses QR instead of SVD.
            % As compared with the Stiefel factory, we do not need to
			% worry about flipping signs of columns here, since only
			% the column space is important, not the actual columns.
            % [Q, unused] = qr(Y(:, :, i), 0); %#ok
            % Y(:, :, i) = Q;
			
        end
    end
    
    M.exp = @exponential;
    function Y = exponential(X, U, t)
        if nargin == 3
            tU = t*U;
        else
            tU = U;
        end
        Y = zeros(size(X));
        for i = 1 : k
            [u, s, v] = svd(tU(:, :, i), 0);
            cos_s = diag(cos(diag(s)));
            sin_s = diag(sin(diag(s)));
            Y(:, :, i) = X(:, :, i)*v*cos_s*v' + u*sin_s*v';
            % From numerical experiments, it seems necessary to
            % re-orthonormalize. This is overall quite expensive.
            [q, unused] = qr(Y(:, :, i), 0); %#ok
            Y(:, :, i) = q;
        end
    end

    % Test code for the logarithm:
    % Gr = grassmanncomplexfactory(5, 2, 3);
    % x = Gr.rand()
    % y = Gr.rand()
    % u = Gr.log(x, y)
    % Gr.dist(x, y) % These two numbers should
    % Gr.norm(x, u) % be the same.
    % z = Gr.exp(x, u) % z needs not be the same matrix as y, but it should
    % v = Gr.log(x, z) % be the same point as y on Grassmann: dist almost 0.
    M.log = @logarithm;
    function U = logarithm(X, Y)
        U = zeros(n, p, k);
        for i = 1 : k
            x = X(:, :, i);
            y = Y(:, :, i);
            yHx = y'*x; %! ytx -> yHx, y.' -> y'
            AH = y'-yHx*x'; %! At -> AH, x.' -> x', y.' -> y'
            BH = yHx\AH; %! Bt -> BH, ytx -> yHx, At -> AH
            [u, s, v] = svd(BH', 'econ'); %! Bt.' -> BH'

            u = u(:, 1:p);
            s = diag(s);
            s = s(1:p);
            v = v(:, 1:p);

            U(:, :, i) = u*diag(atan(s))*v'; %! v.' -> v'
        end
    end

    M.hash = @(X) ['z' hashmd5([real(X(:)); imag(X(:))])]; %! X(:) -> [real(X(:)); imag(X(:))]
    
    M.rand = @random;
    function X = random()
        X = zeros(n, p, k);
        for j = 1 : k
            [Q, unused] = qr(randn(n, p) + 1i*randn(n, p), 0); %#ok<NASGU> %! Complex version
            X(:, :, j) = Q;
        end
    end
    
    M.randvec = @randomvec;
    function U = randomvec(X)
        U = projection(X, randn(n, p, k) + 1i*randn(n, p, k)); %! Complex version
        U = U / norm(U(:));
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, p, k);
    
    % This transport is compatible with the polar retraction.
    M.transp = @(x1, x2, d) projection(x2, d);
    
    M.vec = @(x, u_mat) [real(u_mat(:)) ; imag(u_mat(:))];
    M.mat = @(x, u_vec) reshape(u_vec(1:(n*p*k)) + 1i*u_vec((n*p*k+1):end), [n, p, k]);
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/grassmann/grassmannfactory.m
================================================
function M = grassmannfactory(n, p, k)
% Returns a manifold struct to optimize over the space of vector subspaces.
%
% function M = grassmannfactory(n, p)
% function M = grassmannfactory(n, p, k)
%
% Grassmann manifold: each point on this manifold is a collection of k
% vector subspaces of dimension p embedded in R^n.
%
% The metric is obtained by making the Grassmannian a Riemannian quotient
% manifold of the Stiefel manifold, i.e., the manifold of orthonormal
% matrices, itself endowed with a metric by making it a Riemannian
% submanifold of the Euclidean space, endowed with the usual inner product.
% In short: it is the usual metric used in most cases.
% 
% This structure deals with matrices X of size n x p x k (or n x p if
% k = 1, which is the default) such that each n x p matrix is orthonormal,
% i.e., X'*X = eye(p) if k = 1, or X(:, :, i)' * X(:, :, i) = eye(p) for
% i = 1 : k if k > 1. Each n x p matrix is a numerical representation of
% the vector subspace its columns span.
%
% By default, k = 1.
%
% See also: stiefelfactory grassmanncomplexfactory grassmanngeneralizedfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%   March 22, 2013 (NB) :
%       Implemented geodesic distance.
% 
%   April 17, 2013 (NB) :
%       Retraction changed to the polar decomposition, so that the vector
%       transport is now correct, in the sense that it is compatible with
%       the retraction, i.e., transporting a tangent vector G from U to V
%       where V = Retr(U, H) will give Z, and transporting GQ from UQ to VQ
%       will give ZQ: there is no dependence on the representation, which
%       is as it should be. Notice that the polar factorization requires an
%       SVD whereas the qfactor retraction requires a QR decomposition,
%       which is cheaper. Hence, if the retraction happens to be a
%       bottleneck in your application and you are not using vector
%       transports, you may want to replace the retraction with a qfactor.
% 
%   July  4, 2013 (NB) :
%       Added support for the logarithmic map 'log'.
%
%   July  5, 2013 (NB) :
%       Added support for ehess2rhess.
%
%   June 24, 2014 (NB) :
%       Small bug fix in the retraction, and added final
%       re-orthonormalization at the end of the exponential map. This
%       follows discussions on the forum where it appeared there is a
%       significant loss in orthonormality without that extra step. Also
%       changed the randvec function so that it now returns a globally
%       normalized vector, not a vector where each component is normalized
%       (this only matters if k>1).

    assert(n >= p, ...
           ['The dimension n of the ambient space must be larger ' ...
	        'than the dimension p of the subspaces.']);
    
    if ~exist('k', 'var') || isempty(k)
        k = 1;
    end
    
    if k == 1
        M.name = @() sprintf('Grassmann manifold Gr(%d, %d)', n, p);
    elseif k > 1
        M.name = @() sprintf('Multi Grassmann manifold Gr(%d, %d)^%d', ...
                             n, p, k);
    else
        error('k must be an integer no less than 1.');
    end
    
    M.dim = @() k*p*(n-p);
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d(:));
    
    M.dist = @distance;
    function d = distance(x, y)
        square_d = 0;
        XtY = multiprod(multitransp(x), y);
        for i = 1 : k
            cos_princ_angle = svd(XtY(:, :, i));
            square_d = square_d + sum(real(acos(cos_princ_angle)).^2);
        end
        d = sqrt(square_d);
    end
    
    M.typicaldist = @() sqrt(p*k);
    
    % Orthogonal projection of an ambient vector U to the horizontal space
    % at X.
    M.proj = @projection;
    function Up = projection(X, U)
        
        XtU = multiprod(multitransp(X), U);
        Up = U - multiprod(X, XtU);

    end
    
    M.tangent = M.proj;
    
	M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, H)
        PXehess = projection(X, ehess);
        XtG = multiprod(multitransp(X), egrad);
        HXtG = multiprod(H, XtG);
        rhess = PXehess - HXtG;
    end
    
    M.retr = @retraction;
    function Y = retraction(X, U, t)
        if nargin < 3
            t = 1.0;
        end
        Y = X + t*U;
        for i = 1 : k
		
            % Compute the polar factorization of Y = X+tU
            [u, s, v] = svd(Y(:, :, i), 'econ'); %#ok
            Y(:, :, i) = u*v';
			
            % Another popular retraction uses QR instead of SVD.
            % As compared with the Stiefel factory, we do not need to
			% worry about flipping signs of columns here, since only
			% the column space is important, not the actual columns.
            % [Q, unused] = qr(Y(:, :, i), 0); %#ok
            % Y(:, :, i) = Q;
			
        end
    end
    
    M.exp = @exponential;
    function Y = exponential(X, U, t)
        if nargin == 3
            tU = t*U;
        else
            tU = U;
        end
        Y = zeros(size(X));
        for i = 1 : k
            [u, s, v] = svd(tU(:, :, i), 0);
            cos_s = diag(cos(diag(s)));
            sin_s = diag(sin(diag(s)));
            Y(:, :, i) = X(:, :, i)*v*cos_s*v' + u*sin_s*v';
            % From numerical experiments, it seems necessary to
            % re-orthonormalize. This is overall quite expensive.
            [q, unused] = qr(Y(:, :, i), 0); %#ok
            Y(:, :, i) = q;
        end
    end

    % Test code for the logarithm:
    % Gr = grassmannfactory(5, 2, 3);
    % x = Gr.rand()
    % y = Gr.rand()
    % u = Gr.log(x, y)
    % Gr.dist(x, y) % These two numbers should
    % Gr.norm(x, u) % be the same.
    % z = Gr.exp(x, u) % z needs not be the same matrix as y, but it should
    % v = Gr.log(x, z) % be the same point as y on Grassmann: dist almost 0.
    M.log = @logarithm;
    function U = logarithm(X, Y)
        U = zeros(n, p, k);
        for i = 1 : k
            x = X(:, :, i);
            y = Y(:, :, i);
            ytx = y.'*x;
            At = y.'-ytx*x.';
            Bt = ytx\At;
            [u, s, v] = svd(Bt.', 'econ');

            u = u(:, 1:p);
            s = diag(s);
            s = s(1:p);
            v = v(:, 1:p);

            U(:, :, i) = u*diag(atan(s))*v.';
        end
    end

    M.hash = @(X) ['z' hashmd5(X(:))];
    
    M.rand = @random;
    function X = random()
        X = zeros(n, p, k);
        for i = 1 : k
            [Q, unused] = qr(randn(n, p), 0); %#ok<NASGU>
            X(:, :, i) = Q;
        end
    end
    
    M.randvec = @randomvec;
    function U = randomvec(X)
        U = projection(X, randn(n, p, k));
        U = U / norm(U(:));
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, p, k);
    
    % This transport is compatible with the polar retraction.
    M.transp = @(x1, x2, d) projection(x2, d);
    
    M.vec = @(x, u_mat) u_mat(:);
    M.mat = @(x, u_vec) reshape(u_vec, [n, p, k]);
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/grassmann/grassmanngeneralizedfactory.m
================================================
function M = grassmanngeneralizedfactory(n, p, B)
% Returns a manifold struct of "scaled" vector subspaces.
%
% function M = grassmanngeneralizedfactory(n, p)
% function M = grassmanngeneralizedfactory(n, p, B)
%
% Generalized Grassmann manifold: each point on this manifold is a
% collection of "scaled" vector subspaces of dimension p embedded in R^n.
% The scaling is due to the symmetric positive definite matrix B.
%
% When B is identity, the manifold is the standard Grassmann manifold.
%
% The metric is obtained by viewing the generalized Grassmannian
% a Riemannian quotient manifold of the generalized Stiefel manifold, 
% which is the manifold of "scaled" orthonormal matrices. Specifically, 
% the scaled Stiefel manifold is the set {X : X'*B*X = I}. 
% The generalized Grassmann manifold is the Grassmannian of the 
% generalized Stiefel manifold.
%
% The generalized Stiefel manifold is endowed with a scaled metric
% by viewing it as a Riemannian submanifold of the Euclidean space, which
% is again endowed with the scaled inner product.
%
% Some notions (not all) are from Section 4.5 of the paper
% "The geometry of algorithms with orthogonality constraints",
% A. Edelman, T. A. Arias, S. T. Smith, SIMAX, 1998.
%
% Paper link: http://arxiv.org/abs/physics/9806030.
%
% 
% Note: some computations such as restricted_svd, distance, logarithm, and 
% exponential are new and we believe them to be correct.
% Also, we hope that the computations are numerically stable.
% In case some things do not work out as expected or there is some trouble,
% please contact us at http://www.manopt.org.
%
% Note: egrad2rgrad and ehess2rhess involve solving linear systems in B. If
% this is a bottleneck for a specific application, then a way forward is to
% create a modified version of this file which preprocesses B to speed this
% up (typically, by computing a Cholesky factorization of it, then calling
% an appropriate solver).
%
% See also: stiefelgeneralizedfactory  stiefelfactory  grassmannfactory


% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, June 30, 2015.
% Contributors:
%
% Change log:
%   
    
    assert(n >= p, ...
        ['The dimension n of the ambient space must be larger ' ...
        'than the dimension p of the subspaces.']);
    
    if ~exist('B', 'var') || isempty(B)
        B = speye(n); % Standard Grassmann manifold.
    end
    
    M.name = @() sprintf('Generalized Grassmann manifold Gr(%d, %d)', n, p);
    
    M.dim = @() p*(n - p);   
    
    M.inner = @(X, eta, zeta) trace(eta'*(B*zeta)); % Scaled metric, but horizontally invariant.
    
    M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
    
    M.dist = @distance; 
    function d = distance(X, Y)
        XtBY = X'*(B*Y); % XtY ---> XtBY
        cos_princ_angle = svd(XtBY); % svd(XtY) ---> svd(XtBY)
        % Two next instructions not necessary: the imaginary parts that
        % would appear if the cosines are not between -1 and 1, when
        % passed to the acos function, would be very small, and would
        % thus vanish when the norm is taken.
        % cos_princ_angle = min(cos_princ_angle,  1);
        % cos_princ_angle = max(cos_princ_angle, -1);
        square_d = norm(acos(cos_princ_angle))^2;
        
        d = sqrt(square_d);
    end
    
    M.typicaldist = @() sqrt(p);
    
    
    % Orthogonal projection of an ambient vector U onto the 
    % horizontal space at X.
    M.proj = @projection;
    function Up = projection(X, U)
        BX = B*X;
        
        % Projection onto the tangent space
        % U = U - X*symm(BX'*U);
        % Projection onto the horizontal space
        % Up = U - X*skew(BX'*U);
        
        Up = U - X*(BX'*U);
    end
    
    M.tangent = M.proj;
    
    M.egrad2rgrad = @egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad)
        
        % First, scale egrad according to the scaled metric in the
        % Euclidean space.
        egrad_scaled = B\egrad;
        
        % Second, project onto the tangent space.
        % No need to project onto the horizontal space as
        % by the Riemannian submersion theory, this quantity automatically
        % belongs to the horizontal space.
        %
        %
        % rgrad = egrad_scaled - X*symm((B*X)'*egrad_scaled);
        %
        % Verify that symm(BX'*egrad_scaled) = symm(X'*egrad).
        
        rgrad = egrad_scaled - X*symm(X'*egrad);
    end
    
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, H)
        egraddot = ehess;
        Xdot = H;
        
        % Directional derivative of the Riemannian gradient.
        egrad_scaleddot = B\egraddot;
        rgraddot = egrad_scaleddot - Xdot*symm(X'*egrad)...
            - X*symm(Xdot'*egrad)...
            - X*symm(X'*egraddot);
        
        % Project onto the horizontal space.
        rhess = M.proj(X, rgraddot);
    end
    
    
    M.retr = @retraction;
    function Y = retraction(X, U, t)
        if nargin < 3
            t = 1.0;
        end
        Y = guf(X + t*U); % Ensure that Y'*B*Y is identity.
    end
    
    
    M.exp = @exponential;
    function Y = exponential(X, U, t)
        if nargin == 3
            tU = t*U;
        else
            tU = U;
        end
        
        % restricted_svd is defined later in the file.
        [u, s, v] = restricted_svd(tU);% svd(tU, 0) ---> restricted_svd(tU).
        cos_s = diag(cos(diag(s)));
        sin_s = diag(sin(diag(s)));
        Y = X*v*cos_s*v' + u*sin_s*v';% Verify that Y'*B*Y is identity
        
        % From numerical experiments, it seems necessary to
        % re-orthonormalize.
        Y = guf(Y);% Ensure that Y'*B*Y is identity.
    end
    
    
    % Test code for the logarithm:
    % gGr = grassmanngeneralizedfactory(5, 2, diag(rand(5,1)));
    % x = gGr.rand()
    % y = gGr.rand()
    % u = gGr.log(x, y)
    % gGr.dist(x, y) % These two numbers should
    % gGr.norm(x, u) % be the same.
    % z = gGr.exp(x, u) % z needs not be the same matrix as y, but it should
    % v = gGr.log(x, z) % be the same point as y on Grassmann: dist almost 0.
    % gGr.dist(z, y)
    M.log = @logarithm;
    function U = logarithm(X, Y)
        YtBX = Y'*(B*X); % YtX ---> YtBX.
        At = (Y' - YtBX*X');
        Bt = YtBX\At;
        [u, s, v] = restricted_svd(Bt');% svd(Bt', 'econ') ---> restricted_svd(Bt').
        
        u = u(:, 1:p);
        s = diag(s);
        s = s(1:p);
        v = v(:, 1:p);
        U = u*diag(atan(s))*v'; % A horizontal vector, i.e., U'*(B*X) is zero.
    end
    
    
    M.hash = @(X) ['z' hashmd5(X(:))];
    
    M.rand = @random;
    function X = random()
        X = guf(randn(n, p)); % Ensure that X'*B*X is identity;
    end
    
    M.randvec = @randomvec;
    function U = randomvec(X)
        U = projection(X, randn(n, p));
        U = U / norm(U(:));
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(X) zeros(n, p);
    
    % This transport is compatible with the generalized polar retraction.
    M.transp = @(X1, X2, d) projection(X2, d);
    
    M.vec = @(X, u_mat) u_mat(:);
    M.mat = @(X, u_vec) reshape(u_vec, [n, p]);
    M.vecmatareisometries = @() false;
    
    % Some auxiliary functions
    symm = @(D) (D + D')/2;
    
    function X = guf(Y)
        % Generalized polar decomposition of an n-by-p matrix Y.
        % X'*B*X is identity.
        
        % Method 1
        [u, ~, v] = svd(Y, 0);
  
        % Instead of the following three steps, an equivalent, but an 
        % expensive, way is to do X = u*(sqrtm(u'*(B*u))\(v')).
        [q, ssquare] = eig(u'*(B*u));
        qsinv = q/sparse(diag(sqrt(diag(ssquare))));
        X = u*((qsinv*q')*v'); % X'*B*X is identity.
        
        
        % Another computation using restricted_svd
        % [u, ~, v] = restricted_svd(Y);
        % X = u*v'; % X'*B*X is identity.
        
    end
    
    function [u, s, v] = restricted_svd(Y)
        % We compute a thin svd-like decomposition of an n-by-p matrix Y 
        % into matrices u, s, and v such that u is an n-by-p matrix
        % with u'*B*u being identity, s is a p-by-p diagonal matrix 
        % with positive entries, and v is a p-by-p orthogonal matrix.
        % Y = u*s*v'.
        
        [v, ssquare] = eig(symm(Y'*(B*Y))); % Y*B*Y is positive definite
        ssquarevec = diag(ssquare);
        
        s = sparse(diag(abs(sqrt(ssquarevec))));
        u = Y*(v/s); % u'*B*u is identity.
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/multinomial/multinomialfactory.m
================================================
function M = multinomialfactory(n, m)
% Manifold of n-by-m column-stochastic matrices with positive entries.
%
% function M = multinomialfactory(n)
% function M = multinomialfactory(n, m)
%
% The returned structure M is a Manopt manifold structure to optimize over
% the set of n-by-m matrices with (strictly) positive entries and such that
% the entries of each column sum to one. By default, m = 1.
%
% The metric imposed on the manifold is the Fisher metric such that 
% the set of n-by-m column-stochastic matrices (aka the multinomial manifold)
% is a Riemannian submanifold of the space of n-by-m matrices. Also it
% should be noted that the retraction operation that we define 
% is first order and as such the checkhessian tool cannot verify 
% the slope correctly.
%             
% The file is based on developments in the research paper
% Y. Sun, J. Gao, X. Hong, B. Mishra, and B. Yin,
% "Heterogeneous tensor decomposition for clustering via manifold
% optimization", arXiv:1504.01777, 2015.
%
% Link to the paper: http://arxiv.org/abs/1504.01777.
%
% Please cite the Manopt paper as well as the research paper:
% @Article{sun2015multinomial,
%   author  = {Y. Sun and J. Gao and X. Hong and B. Mishra and B. Yin},
%   title   = {Heterogeneous Tensor Decomposition for Clustering via Manifold Optimization},
%   journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
%   year    = {2016},
%   volume  = {38},
%   number  = {3},
%   pages   = {476--489},
%   doi     = {10.1109/TPAMI.2015.2465901}
% }

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, April 06, 2015.
% Contributors:
% Change log:
    
    if ~exist('m', 'var') || isempty(m)
        m = 1;
    end

    M.name = @() sprintf('%dx%d column-stochastic matrices with positive entries', n, m);
    
    M.dim = @() (n-1)*m;
    
    % We impose the Fisher metric.
    M.inner = @iproduct;
    function ip = iproduct(X, eta, zeta)
        ip = sum((eta(:).*zeta(:))./X(:));
    end
    
    M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
    
    M.dist = @(X, Y) error('multinomialfactory.dist not implemented yet.');
    
    M.typicaldist = @() m*pi/2; % This is an approximation.
    
    % Column vector of ones of length n. 
    e = ones(n, 1);
    
    M.egrad2rgrad = @egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad)
        lambda = -sum(X.*egrad, 1); % Row vector of length m.
        rgrad = X.*egrad + (e*lambda).*X; % This is in the tangent space.
    end
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, eta)
        
        % Riemannian gradient computation.
        % lambda is a row vector of length m.
        lambda = - sum(X.*egrad, 1);
        rgrad =  X.*egrad + (e*lambda).*X;
        
        % Directional derivative of the Riemannian gradient.
        % lambdadot is a row vector of length m.
        lambdadot = -sum(eta.*egrad, 1) - sum(X.*ehess, 1); 
        rgraddot = eta.*egrad + X.*ehess + (e*lambdadot).*X + (e*lambda).*eta;
        
        % Correction term because of the non-constant metric that we
        % impose. The computation of the correction term follows the use of
        % Koszul formula.
        correction_term = - 0.5*(eta.*rgrad)./X;
        rhess = rgraddot + correction_term;
        
        % Finally, projection onto the tangent space.
        rhess = M.proj(X, rhess);
    end
    
    % Projection of the vector eta in the ambeint space onto the tangent
    % space.
    M.proj = @projection;
    function etaproj = projection(X, eta)
        alpha = sum(eta, 1); % Row vector of length m.
        etaproj = eta - (e*alpha).*X;
    end
    
    M.tangent = M.proj;
    M.tangent2ambient = @(X, eta) eta;
    
    M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        % A first-order retraction.
        Y = X.*exp(t*(eta./X)); % Based on mapping for positive scalars.
        Y = Y./(e*(sum(Y, 1))); % Projection onto the constraint set.
        % For numerical reasons, so that we avoid entries going to zero:
        Y = max(Y, eps);
    end
    
    M.exp = @exponential;
    function Y = exponential(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        Y = retraction(X, eta, t);
        warning('manopt:multinomialfactory:exp', ...
            ['Exponential for the Multinomial manifold' ...
            'manifold not implemented yet. Used retraction instead.']);
    end
    
    M.hash = @(X) ['z' hashmd5(X(:))];
    
    M.rand = @random;
    function X = random()
        % A random point in the ambient space.
        X = rand(n, m); %
        X = X./(e*(sum(X, 1)));
    end
    
    M.randvec = @randomvec;
    function eta = randomvec(X)
        % A random vector in the tangent space
        eta = randn(n, m);
        eta = M.proj(X, eta); % Projection onto the tangent space.
        nrm = M.norm(X, eta);
        eta = eta / nrm;
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(X) zeros(n, m);
    
    M.transp = @(X1, X2, d) projection(X2, d);
    
    % vec and mat are not isometries, because of the scaled metric.
    M.vec = @(X, U) U(:);
    M.mat = @(X, u) reshape(u, n, m);
    M.vecmatareisometries = @() false;
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/oblique/obliquecomplexfactory.m
================================================
function M = obliquecomplexfactory(n, m, transposed)
% Returns a manifold struct defining complex matrices w/ unit-norm columns.
%
% function M = obliquecomplexfactory(n, m)
% function M = obliquecomplexfactory(n, m, transposed)
%
% Oblique manifold: deals with complex matrices of size n x m such that
% each column has unit 2-norm, i.e., is a point on the unit sphere in C^n.
% The geometry is a product geometry of m unit spheres in C^n. For the
% metric, C^n is treated as R^(2n), so that the real part and imaginary
% parts are treated separately as 2n real coordinates. As such, the complex
% oblique manifold is a Riemannian submanifold of (R^2)^(n x m), with the
% usual metric <u, v> = real(u'*v).
% 
% If transposed is set to true (it is false by default), then the matrices
% are transposed: a point Y on the manifold is a matrix of size m x n and
% each row has unit 2-norm. It is the same geometry, just a different
% representation.
%
% In transposed form, a point Y is such that Y*Y' is a Hermitian, positive
% semidefinite matrix of size m and of rank at most n, such that all the
% diagonal entries are equal to 1.
%
% Note: obliquecomplexfactory(1, n, true) is equivalent to (but potentially
% slower than) complexcirclefactory(n).
%
% See also: spherecomplexfactory complexcirclefactory obliquefactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Sep. 3, 2014.
% Contributors: 
% Change log: 
%
%   Oct. 21, 2016 (NB)
%       Formatted for inclusion in Manopt release.
%
%   July 20, 2017 (NB)
%       Distance function is now accurate for close-by points. See notes
%       inside the spherefactory file for details. Also improvies distances
%       computation as part of the log function.

    
    if ~exist('transposed', 'var') || isempty(transposed)
        transposed = false;
    end
    
    if transposed
        trnsp = @(X) X.';
    else
        trnsp = @(X) X;
    end

    M.name = @() sprintf('Complex oblique manifold COB(%d, %d)', n, m);
    
    M.dim = @() (2*n-1)*m;
    
    M.inner = @(x, d1, d2) real(d1(:)'*d2(:));
    
    M.norm = @(x, d) norm(d(:));
    
    M.dist = @(x, y) norm(real(2*asin(.5*sqrt(sum(trnsp(abs(x - y).^2), 1)))));
    
    M.typicaldist = @() pi*sqrt(m);
    
    M.proj = @(X, U) trnsp(projection(trnsp(X), trnsp(U)));
    
    M.tangent = M.proj;
    
    % For Riemannian submanifolds, converting a Euclidean gradient into a
    % Riemannian gradient amounts to an orthogonal projection.
	M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, U)
        X = trnsp(X);
        egrad = trnsp(egrad);
        ehess = trnsp(ehess);
        U = trnsp(U);
        
        PXehess = projection(X, ehess);
        inners = sum(real(conj(X).*egrad), 1);
        rhess = PXehess - bsxfun(@times, U, inners);
        
        rhess = trnsp(rhess);
    end
    
    M.exp = @exponential;
    % Exponential on the complex oblique manifold
    function y = exponential(x, d, t)
        x = trnsp(x);
        d = trnsp(d);
        
        if nargin == 2
            % t = 1;
            td = d;
        else
            td = t*d;
        end

        nrm_td = sqrt(sum(real(td).^2 + imag(td).^2, 1));

        y = bsxfun(@times, x, cos(nrm_td)) + ...
            bsxfun(@times, td, sin(nrm_td) ./ nrm_td);
        
        % For those columns where the step is 0, replace y by x
        exclude = (nrm_td == 0);
        y(:, exclude) = x(:, exclude);

        y = trnsp(y);
    end

    M.log = @logarithm;
    function v = logarithm(x1, x2)
        x1 = trnsp(x1);
        x2 = trnsp(x2);
        
        v = projection(x1, x2 - x1);
        dists = real(2*asin(.5*sqrt(sum(trnsp(abs(x - y).^2), 1))));
        norms = sqrt(sum(real(v).^2 + imag(v).^2, 1));
		factors = dists./norms;
        % For very close points, dists is almost equal to norms, but
        % because they are both almost zero, the division above can return
        % NaN's. To avoid that, we force those ratios to 1.
		factors(dists <= 1e-10) = 1;
		v = bsxfun(@times, v, factors);
        
        v = trnsp(v);
    end

    M.retr = @retraction;
    % Retraction on the oblique manifold
    function y = retraction(x, d, t)
        x = trnsp(x);
        d = trnsp(d);
        
        if nargin < 3
            td = d;
        else
            td = t*d;
        end

        y = normalize_columns(x + td);
        
        y = trnsp(y);
    end

    M.hash = @(x) ['z' hashmd5([real(x(:)) ; imag(x(:))])];
    
    M.rand = @() trnsp(random(n, m));
    
    M.randvec = @(x) trnsp(randomvec(n, m, trnsp(x)));
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) trnsp(zeros(n, m));
    
    M.transp = @(x1, x2, d) M.proj(x2, d);
    
    M.pairmean = @pairmean;
    function y = pairmean(x1, x2)
        y = trnsp(x1+x2);
        y = normalize_columns(y);
        y = trnsp(y);
    end

    % vec returns a vector representation of an input tangent vector which
    % is represented as a matrix. mat returns the original matrix
    % representation of the input vector representation of a tangent
    % vector. vec and mat are thus inverse of each other. They are
    % furthermore isometries between a subspace of R^2nm and the tangent
    % space at x.
    vect = @(X) X(:);
    M.vec = @(x, u_mat) [vect(real(trnsp(u_mat))) ; ...
                         vect(imag(trnsp(u_mat)))];
    M.mat = @(x, u_vec)    trnsp(reshape(u_vec(1:(n*m)),     [n, m])) + ...
                        1i*trnsp(reshape(u_vec((n*m+1):end), [n, m]));
    M.vecmatareisometries = @() true;

end

% Given a matrix X, returns the same matrix but with each column scaled so
% that they have unit 2-norm.
function X = normalize_columns(X)
	norms = sqrt(sum(real(X).^2 + imag(X).^2, 1));
	X = bsxfun(@times, X, 1./norms);
end

% Orthogonal projection of the ambient vector H onto the tangent space at X
function PXH = projection(X, H)

    % Compute the inner product between each vector H(:, i) with its root
    % point X(:, i), that is, real(X(:, i)' * H(:, i)).
    % Returns a row vector.
    inners = real(sum(conj(X).*H, 1));
    
    % Subtract from H the components of the H(:, i)'s that are parallel to
    % the root points X(:, i).
    PXH = H - bsxfun(@times, X, inners);

end

% Uniform random sampling on the sphere.
function x = random(n, m)

    x = normalize_columns(randn(n, m) + 1i*randn(n, m));

end

% Random normalized tangent vector at x.
function d = randomvec(n, m, x)

    d = randn(n, m) + 1i*randn(n, m);
    d = projection(x, d);
    d = d / norm(d(:));

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/oblique/obliquefactory.m
================================================
function M = obliquefactory(n, m, transposed)
% Returns a manifold struct to optimize over matrices w/ unit-norm columns.
%
% function M = obliquefactory(n, m)
% function M = obliquefactory(n, m, transposed)
%
% Oblique manifold: deals with matrices of size n x m such that each column
% has unit 2-norm, i.e., is a point on the unit sphere in R^n. The metric
% is such that the oblique manifold is a Riemannian submanifold of the
% space of nxm matrices with the usual trace inner product, i.e., the usual
% metric.
%
% If transposed is set to true (it is false by default), then the matrices
% are transposed: a point Y on the manifold is a matrix of size m x n and
% each row has unit 2-norm. It is the same geometry, just a different
% representation.
%
% See also: spherefactory obliquecomplexfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%	July 16, 2013 (NB) :
%       Added 'transposed' option, mainly for ease of comparison with the
%       elliptope geometry.
%
%	Nov. 29, 2013 (NB) :
%       Added normalize_columns function to make it easier to exploit the
%       bsxfun formulation of column normalization, which avoids using for
%       loops and provides performance gains. The exponential still uses a
%       for loop.
%
%	April 4, 2015 (NB) :
%       Log function modified to avoid NaN's appearing for close by points.
%
%	April 13, 2015 (NB) :
%       Exponential now without for-loops.
%
%   Oct. 8, 2016 (NB)
%       Code for exponential was simplified to only treat the zero vector
%       as a particular case.
%
%  Oct. 21, 2016 (NB)
%       Bug caught in M.log: the function called v = M.proj(x1, x2 - x1),
%       which internally applies transp to inputs and outputs. But since
%       M.log had already taken care of transposing things, this introduced
%       a bug (which only triggered if using M.log in transposed mode.)
%       The code now calls "v = projection(x1, x2 - x1);" since projection
%       assumes the inputs and outputs do not need to be transposed.
%
%   July 20, 2017 (NB)
%       Distance function is now accurate for close-by points. See notes
%       inside the spherefactory file for details. Also improvies distances
%       computation as part of the log function.

    
    if ~exist('transposed', 'var') || isempty(transposed)
        transposed = false;
    end
    
    if transposed
        trnsp = @(X) X.';
    else
        trnsp = @(X) X;
    end

    M.name = @() sprintf('Oblique manifold OB(%d, %d)', n, m);
    
    M.dim = @() (n-1)*m;
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d(:));
    
    M.dist = @(x, y) norm(real(2*asin(.5*sqrt(sum(trnsp(x - y).^2, 1)))));
    
    M.typicaldist = @() pi*sqrt(m);
    
    M.proj = @(X, U) trnsp(projection(trnsp(X), trnsp(U)));
    
    M.tangent = M.proj;
    
    % For Riemannian submanifolds, converting a Euclidean gradient into a
    % Riemannian gradient amounts to an orthogonal projection.
	M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, U)
        X = trnsp(X);
        egrad = trnsp(egrad);
        ehess = trnsp(ehess);
        U = trnsp(U);
        
        PXehess = projection(X, ehess);
        inners = sum(X.*egrad, 1);
        rhess = PXehess - bsxfun(@times, U, inners);
        
        rhess = trnsp(rhess);
    end
    
    M.exp = @exponential;
    % Exponential on the oblique manifold
    function y = exponential(x, d, t)
        x = trnsp(x);
        d = trnsp(d);
        
        if nargin < 3
            % t = 1;
            td = d;
        else
            td = t*d;
        end

        nrm_td = sqrt(sum(td.^2, 1));

        y = bsxfun(@times, x, cos(nrm_td)) + ...
            bsxfun(@times, td, sin(nrm_td) ./ nrm_td);
        
        % For those columns where the step is 0, replace y by x
        exclude = (nrm_td == 0);
        y(:, exclude) = x(:, exclude);

        y = trnsp(y);
    end

    M.log = @logarithm;
    function v = logarithm(x1, x2)
        x1 = trnsp(x1);
        x2 = trnsp(x2);
        
        v = projection(x1, x2 - x1);
        dists = real(2*asin(.5*sqrt(sum((x1 - x2).^2, 1))));
        norms = real(sqrt(sum(v.^2, 1)));
		factors = dists./norms;
        % For very close points, dists is almost equal to norms, but
        % because they are both almost zero, the division above can return
        % NaN's. To avoid that, we force those ratios to 1.
		factors(dists <= 1e-10) = 1;
		v = bsxfun(@times, v, factors);
        
        v = trnsp(v);
    end

    M.retr = @retraction;
    % Retraction on the oblique manifold
    function y = retraction(x, d, t)
        x = trnsp(x);
        d = trnsp(d);
        
        if nargin < 3
            % t = 1;
            td = d;
        else
            td = t*d;
        end
        
        y = normalize_columns(x + td);
        
        y = trnsp(y);
    end

    M.hash = @(x) ['z' hashmd5(x(:))];
    
    M.rand = @() trnsp(random(n, m));
    
    M.randvec = @(x) trnsp(randomvec(n, m, trnsp(x)));
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) trnsp(zeros(n, m));
    
    M.transp = @(x1, x2, d) M.proj(x2, d);
    
    M.pairmean = @pairmean;
    function y = pairmean(x1, x2)
        y = trnsp(x1+x2);
        y = normalize_columns(y);
        y = trnsp(y);
    end

    % vec returns a vector representation of an input tangent vector which
    % is represented as a matrix. mat returns the original matrix
    % representation of the input vector representation of a tangent
    % vector. vec and mat are thus inverse of each other. They are
    % furthermore isometries between a subspace of R^nm and the tangent
    % space at x.
    vect = @(X) X(:);
    M.vec = @(x, u_mat) vect(trnsp(u_mat));
    M.mat = @(x, u_vec) trnsp(reshape(u_vec, [n, m]));
    M.vecmatareisometries = @() true;

end

% Given a matrix X, returns the same matrix but with each column scaled so
% that they have unit 2-norm.
function X = normalize_columns(X)
	% This is faster than norms(X, 2, 1) for small X, and as fast for large X.
	nrms = sqrt(sum(X.^2, 1));
	X = bsxfun(@times, X, 1./nrms);
end

% Orthogonal projection of the ambient vector H onto the tangent space at X
function PXH = projection(X, H)

    % Compute the inner product between each vector H(:, i) with its root
    % point X(:, i), that is, X(:, i).' * H(:, i). Returns a row vector.
    inners = sum(X.*H, 1);
    
    % Subtract from H the components of the H(:, i)'s that are parallel to
    % the root points X(:, i).
    PXH = H - bsxfun(@times, X, inners);

    % % Equivalent but slow code:
    % m = size(X, 2);
    % PXH = zeros(size(H));
    % for i = 1 : m
    %     PXH(:, i) = H(:, i) - X(:, i) * (X(:, i)'*H(:, i));
    % end

end

% Uniform random sampling on the sphere.
function x = random(n, m)

    x = normalize_columns(randn(n, m));

end

% Random normalized tangent vector at x.
function d = randomvec(n, m, x)

    d = randn(n, m);
    d = projection(x, d);
    d = d / norm(d(:));

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/rotations/randrot.m
================================================
function R = randrot(n, N)
% Generates uniformly random rotation matrices.
%
% function R = randrot(n, N)
%
% R is a n-by-n-by-N matrix such that each slice R(:, :, i) is an
% orthogonal matrix of size n of determinant +1 (i.e., a matrix in SO(n)).
% By default, N = 1.
% Complexity: N times O(n^3).
% Theory in Diaconis and Shahshahani 1987 for the uniformity on O(n);
% With details in Mezzadri 2007,
% "How to generate random matrices from the classical compact groups."
% To ensure matrices in SO(n), we permute the two first columns when
% the determinant is -1.
%
% See also: randskew

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Sept. 25, 2012.
% Contributors: 
% Change log: 

    if nargin < 2
        N = 1;
    end
    
    if n == 1
        R = ones(1, 1, N);
        return;
    end
    
    R = zeros(n, n, N);
    
    for i = 1 : N
        
        % Generated as such, Q is uniformly distributed over O(n), the set
        % of orthogonal matrices.
        A = randn(n);
        [Q, RR] = qr(A);
        Q = Q * diag(sign(diag(RR))); %% Mezzadri 2007
        
        % If Q is in O(n) but not in SO(n), we permute the two first
        % columns of Q such that det(new Q) = -det(Q), hence the new Q will
        % be in SO(n), uniformly distributed.
        if det(Q) < 0
            Q(:, [1 2]) = Q(:, [2 1]);
        end
        
        R(:, :, i) = Q;
        
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/rotations/randskew.m
================================================
function S = randskew(n, N)
% Generates random skew symmetric matrices with normal entries.
% 
% function S = randskew(n, N)
%
% S is an n-by-n-by-N matrix where each slice S(:, :, i) for i = 1..N is a
% random skew-symmetric matrix with upper triangular entries distributed
% independently following a normal distribution (Gaussian, zero mean, unit
% variance).
%
% See also: randrot

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Sept. 25, 2012.
% Contributors: 
% Change log: 


    if nargin < 2
        N = 1;
    end

    % Subindices of the (strictly) upper triangular entries of an n-by-n
    % matrix
    [I J] = find(triu(ones(n), 1));
    
    K = repmat(1:N, n*(n-1)/2, 1);
    
    % Indices of the strictly upper triangular entries of all N slices of
    % an n-by-n-by-N matrix
    L = sub2ind([n n N], repmat(I, N, 1), repmat(J, N, 1), K(:));
    
    % Allocate memory for N random skew matrices of size n-by-n and
    % populate each upper triangular entry with a random number following a
    % normal distribution and copy them with opposite sign on the
    % corresponding lower triangular side.
    S = zeros(n, n, N);
    S(L) = randn(size(L));
    S = S-multitransp(S);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/rotations/rotationsfactory.m
================================================
function M = rotationsfactory(n, k)
% Returns a manifold structure to optimize over rotation matrices.
% 
% function M = rotationsfactory(n)
% function M = rotationsfactory(n, k)
%
% Special orthogonal group (the manifold of rotations): deals with matrices
% R of size n x n x k (or n x n if k = 1, which is the default) such that
% each n x n matrix is orthogonal, with determinant 1, i.e., X'*X = eye(n)
% if k = 1, or X(:, :, i)' * X(:, :, i) = eye(n) for i = 1 : k if k > 1.
%
% This is a description of SO(n)^k with the induced metric from the
% embedding space (R^nxn)^k, i.e., this manifold is a Riemannian
% submanifold of (R^nxn)^k endowed with the usual trace inner product.
%
% Tangent vectors are represented in the Lie algebra, i.e., as skew
% symmetric matrices. Use the function M.tangent2ambient(X, H) to switch
% from the Lie algebra representation to the embedding space
% representation. This is often necessary when defining
% problem.ehess(X, H).
%
% By default, the retraction is only a first-order approximation of the
% exponential. To force the use of a second-order approximation, call
% M.retr = M.retr2 after creating M. This switches from a QR-based
% computation to an SVD-based computation.
%
% By default, k = 1.
%
% See also: stiefelfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log:
%   Jan. 31, 2013 (NB)
%       Added egrad2rgrad and ehess2rhess
%   Oct. 21, 2016 (NB)
%       Added M.retr2: a second-order retraction based on SVD.

    
    if ~exist('k', 'var') || isempty(k)
        k = 1;
    end
    
    if k == 1
        M.name = @() sprintf('Rotations manifold SO(%d)', n);
    elseif k > 1
        M.name = @() sprintf('Product rotations manifold SO(%d)^%d', n, k);
    else
        error('k must be an integer no less than 1.');
    end
    
    M.dim = @() k*nchoosek(n, 2);
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d(:));
    
    M.typicaldist = @() pi*sqrt(n*k);
    
    M.proj = @(X, H) multiskew(multiprod(multitransp(X), H));
    
    M.tangent = @(X, H) multiskew(H);
    
    M.tangent2ambient = @(X, U) multiprod(X, U);
	
	M.egrad2rgrad = M.proj;
	
	M.ehess2rhess = @ehess2rhess;
	function Rhess = ehess2rhess(X, Egrad, Ehess, H)
        % Reminder : H contains skew-symmeric matrices. The actual
        % direction that the point X is moved along is X*H.
		Xt = multitransp(X);
		XtEgrad = multiprod(Xt, Egrad);
        symXtEgrad = multisym(XtEgrad);
		XtEhess = multiprod(Xt, Ehess);
		Rhess = multiskew( XtEhess - multiprod(H, symXtEgrad) );
	end
    
    M.retr = @retraction;
    function Y = retraction(X, U, t)
        if nargin == 3
            tU = t*U;
        else
            tU = U;
        end
        Y = X + multiprod(X, tU);
        for i = 1 : k
            % This QR-based retraction is only a first-order approximation
            % of the exponential map, not a second-order one.
            [Q, R] = qr(Y(:, :, i));
            % The instruction with R ensures we are not flipping signs
            % of some columns, which should never happen in modern Matlab
            % versions but may be an issue with older versions.
            Y(:, :, i) = Q * diag(sign(sign(diag(R))+.5));
            % This is guaranteed to always yield orthogonal matrices with
            % determinant +1. Simply look at the eigenvalues of a skew
            % symmetric matrix, than at those of identity plus that matrix,
            % and compute their product for the determinant: it's stricly
            % positive in all cases.
        end
    end
    
    % A second order retraction is implemented here. To force its use,
    % after creating the factory M, execute M.retr = M.retr2.
    M.retr2 = @retraction2;
    function Y = retraction2(X, U, t)
        if nargin == 3
            tU = t*U;
        else
            tU = U;
        end
        Y = X + multiprod(X, tU);
        for i = 1 : k
            [Uk, ~, Vk] = svd(Y(:, :, k));
            Y(:, :, k) = Uk*Vk';
        end
    end
    
    M.exp = @exponential;
    function Y = exponential(X, U, t)
        if nargin == 3
            exptU = t*U;
        else
            exptU = U;
        end
        for i = 1 : k
            exptU(:, :, i) = expm(exptU(:, :, i));
        end
        Y = multiprod(X, exptU);
    end
    
    M.log = @logarithm;
    function U = logarithm(X, Y)
		U = multiprod(multitransp(X), Y);
        for i = 1 : k
            % The result of logm should be real in theory, but it is
            % numerically useful to force it.
            U(:, :, i) = real(logm(U(:, :, i)));
        end
        % Ensure the tangent vector is in the Lie algebra.
        U = multiskew(U);
    end

    M.hash = @(X) ['z' hashmd5(X(:))];
    
    M.rand = @() randrot(n, k);
    
    M.randvec = @randomvec;
    function U = randomvec(X) %#ok<INUSD>
        U = randskew(n, k);
        nrmU = sqrt(U(:).'*U(:));
        U = U / nrmU;
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, n, k);
    
    M.transp = @(x1, x2, d) d;
    
    M.pairmean = @pairmean;
    function Y = pairmean(X1, X2)
        V = M.log(X1, X2);
        Y = M.exp(X1, .5*V);
    end
    
    M.dist = @(x, y) M.norm(x, M.log(x, y));
    
    M.vec = @(x, u_mat) u_mat(:);
    M.mat = @(x, u_vec) reshape(u_vec, [n, n, k]);
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/specialeuclidean/specialeuclideanfactory.m
================================================
function M = specialeuclideanfactory(n, k)
% Returns a manifold structure to optimize over the special Euclidean group
% 
% function M = specialeuclideanfactory(n)
% function M = specialeuclideanfactory(n, k)
%
% The special Euclidean group (the manifold of rigid transformations):
% This is a product manifold of the rotations group SO(n) and the
% translation group R^n, copied k times. (See note below.)
%
% Points on the manifold are represented as structures X with two fields.
% X.R is a 3D array of size nxnxk such that each slice X.R(:, :, i)
% corresponds to a rotation matrix (orthogonal with determinant 1).
% X.t is a matrix of size nxk such that each column X.t(:, i) corresponds
% to a translation vector.
%
% Tangent vectors are represented as structures with the same fields. Note
% that rotational components of the tangent vectors are represented in the
% Lie algebra, i.e., each slice Xdot.R(:, :, i) is a skew-symmetric matrix.
% Use M.tangent2ambient(X, Xdot) to obtain a representation in the ambient
% space. This is often necessary when defining problem.ehess(X, Xdot).
%
% This is a description of SE(n)^k with the induced metric from the
% embedding space (R^nxn)^k x (R^n)^k, i.e., this manifold is a Riemannian
% submanifold of the embedding Euclidean space with the usual inner
% product.
%
% By default, k = 1.
%
% Note: this is a product geometry: it may not be the "appropriate"
% geometry to give to SE(n) for your application. In particular, this is
% not the Lie geometry of SE(n), because SE(n) is not a direct product of
% SO(n) and R^n: it is only a semidirect product. Following a comment by
% Martijn Zeestraten on the Manopt forum, see this file for more
% information about the Lie geometry:
%   http://ethaneade.com/lie.pdf
%
% See rotationsfactory and euclideanfactory for details.
%
% See also: rotationsfactory euclideanfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Sep. 23, 2014.
% Contributors: 
% Change log:

    
    if ~exist('k', 'var') || isempty(k)
        k = 1;
    end
    
    elements = struct();
    elements.R = rotationsfactory(n, k);
    elements.t = euclideanfactory(n, k);
    
    M = productmanifold(elements);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/sphere/spherecomplexfactory.m
================================================
function M = spherecomplexfactory(n, m)
% Returns a manifold struct to optimize over unit-norm complex matrices.
%
% function M = spherecomplexfactory(n)
% function M = spherecomplexfactory(n, m)
%
% Manifold of n-by-m complex matrices of unit Frobenius norm.
% By default, m = 1, which corresponds to the unit sphere in C^n. The
% metric is such that the sphere is a Riemannian submanifold of the space
% of 2nx2m real matrices with the usual trace inner product, i.e., the
% usual metric.
% 
% See also: spherefactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   Sep. 4, 2014 (NB):
%       Added ehess2rhess.
%
%   April 7, 2015 (NB):
%       Added vec/mat pair (for use with hessianspectrum, for example).
%
%   April 13, 2015 (NB):
%       Added logarithm
%
%   Oct. 8, 2016 (NB)
%       Code for exponential was simplified to only treat the zero vector
%       as a particular case.
%
%   Oct. 22, 2016 (NB)
%       Distance function dist now significantly more accurate for points
%       within 1e-7 and less from each other.

    
    if ~exist('m', 'var')
        m = 1;
    end

    if m == 1
        M.name = @() sprintf('Complex sphere S^%d', n-1);
    else
        M.name = @() sprintf('Unit F-norm %dx%d complex matrices', n, m);
    end
    
    M.dim = @() 2*(n*m)-1;
    
    M.inner = @(x, d1, d2) real(d1(:)'*d2(:));
    
    M.norm = @(x, d) norm(d, 'fro');
    
    M.dist = @(x, y) real(2*asin(.5*norm(x - y, 'fro')));
    
    M.typicaldist = @() pi;
    
    M.proj = @(x, d) reshape(d(:) - x(:)*(real(x(:)'*d(:))), n, m);
    
    % For Riemannian submanifolds, converting a Euclidean gradient into a
    % Riemannian gradient amounts to an orthogonal projection.
	M.egrad2rgrad = M.proj;
    
	M.ehess2rhess = @ehess2rhess;
	function rhess = ehess2rhess(x, egrad, ehess, u)
        rhess = M.proj(x, ehess) - real((x(:)'*egrad(:)))*u;
	end
    
	M.tangent = M.proj;
    
    M.exp = @exponential;
    
    M.retr = @retraction;

    M.log = @logarithm;
    function v = logarithm(x1, x2)
        v = M.proj(x1, x2 - x1);
        di = M.dist(x1, x2);
        % If the two points are "far apart", correct the norm.
        if di > 1e-6
            nv = norm(v, 'fro');
            v = v * (di / nv);
        end
    end
    
    M.hash = @(x) ['z' hashmd5([real(x(:)) ; imag(x(:))])];
    
    M.rand = @() random(n, m);
    
    M.randvec = @(x) randomvec(n, m, x);
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, m);
    
    M.transp = @(x1, x2, d) M.proj(x2, d);
    
    M.pairmean = @pairmean;
    function y = pairmean(x1, x2)
        y = x1+x2;
        y = y / norm(y, 'fro');
    end

    mn = m*n;
    M.vec = @(x, u_mat) [real(u_mat(:)) ; imag(u_mat(:))];
    M.mat = @(x, u_vec) reshape(u_vec(1:mn), m, n) + 1i*reshape(u_vec((mn+1):end), m, n);
    M.vecmatareisometries = @() true;

end

% Exponential on the sphere
function y = exponential(x, d, t)

    if nargin == 2
        % t = 1;
        td = d;
    else
        td = t*d;
    end
    
    nrm_td = norm(td, 'fro');
    
    if nrm_td > 0
        y = x*cos(nrm_td) + td*(sin(nrm_td)/nrm_td);
    else
        y = x;
    end

end

% Retraction on the sphere
function y = retraction(x, d, t)

    if nargin == 2
        t = 1;
    end
    
    y = x+t*d;
    y = y/norm(y, 'fro');

end

% Uniform random sampling on the sphere.
function x = random(n, m)

    x = randn(n, m) + 1i*randn(n, m);
    x = x/norm(x, 'fro');

end

% Random normalized tangent vector at x.
function d = randomvec(n, m, x)

    d = randn(n, m) + 1i*randn(n, m);
    d = reshape(d(:) - x(:)*(real(x(:)'*d(:))), n, m);
    d = d / norm(d, 'fro');

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/sphere/spherefactory.m
================================================
function M = spherefactory(n, m)
% Returns a manifold struct to optimize over unit-norm vectors or matrices.
%
% function M = spherefactory(n)
% function M = spherefactory(n, m)
%
% Manifold of n-by-m real matrices of unit Frobenius norm.
% By default, m = 1, which corresponds to the unit sphere in R^n. The
% metric is such that the sphere is a Riemannian submanifold of the space
% of nxm matrices with the usual trace inner product, i.e., the usual
% metric.
% 
% See also: obliquefactory spherecomplexfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   Oct. 8, 2016 (NB)
%       Code for exponential was simplified to only treat the zero vector
%       as a particular case.
%
%   Oct. 22, 2016 (NB)
%       Distance function dist now significantly more accurate for points
%       within 1e-7 and less from each other.
%
%   July 20, 2017 (NB)
%       Following conversations with Bruno Iannazzo and P.-A. Absil,
%       the distance function is now even more accurate.
%
%   Sep. 7, 2017 (NB)
%       New isometric vector transport available in M.isotransp,
%       contributed by Changshuo Liu.

    
    if ~exist('m', 'var')
        m = 1;
    end

    if m == 1
        M.name = @() sprintf('Sphere S^%d', n-1);
    else
        M.name = @() sprintf('Unit F-norm %dx%d matrices', n, m);
    end
    
    M.dim = @() n*m-1;
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d, 'fro');
    
    M.dist = @dist;
    function d = dist(x, y)
        
        % The following code is mathematically equivalent to the
        % computation d = acos(x(:)'*y(:)) but is much more accurate when
        % x and y are close.
        
        chordal_distance = norm(x - y, 'fro');
        d = real(2*asin(.5*chordal_distance));
        
        % Note: for x and y almost antipodal, the accuracy is good but not
        % as good as possible. One way to improve it is by using the
        % following branching:
        % % if chordal_distance > 1.9
        % %     d = pi - dist(x, -y);
        % % end
        % It is rarely necessary to compute distance between
        % almost-antipodal points with full accuracy in Manopt, hence we
        % favor a simpler code.
        
    end
    
    M.typicaldist = @() pi;
    
    M.proj = @(x, d) d - x*(x(:).'*d(:));
    
    M.tangent = M.proj;
	
    % For Riemannian submanifolds, converting a Euclidean gradient into a
    % Riemannian gradient amounts to an orthogonal projection.
	M.egrad2rgrad = M.proj;
	
	M.ehess2rhess = @ehess2rhess;
	function rhess = ehess2rhess(x, egrad, ehess, u)
        rhess = M.proj(x, ehess) - (x(:)'*egrad(:))*u;
	end
    
    M.exp = @exponential;
    
    M.retr = @retraction;

    M.log = @logarithm;
    function v = logarithm(x1, x2)
        v = M.proj(x1, x2 - x1);
        di = M.dist(x1, x2);
        % If the two points are "far apart", correct the norm.
        if di > 1e-6
            nv = norm(v, 'fro');
            v = v * (di / nv);
        end
    end
    
    M.hash = @(x) ['z' hashmd5(x(:))];
    
    M.rand = @() random(n, m);
    
    M.randvec = @(x) randomvec(n, m, x);
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, m);
    
    M.transp = @(x1, x2, d) M.proj(x2, d);
    
    % Isometric vector transport of d from the tangent space at x1 to x2.
    % This is actually a parallel vector transport, see 5 in
    % http://epubs.siam.org/doi/pdf/10.1137/16M1069298
    % "A Riemannian Gradient Sampling Algorithm for Nonsmooth Optimization
    %  on Manifolds", by Hosseini and Uschmajew, SIOPT 2017
    M.isotransp = @(x1, x2, d) isometricTransp(x1, x2, d);
    function Td = isometricTransp(x1, x2, d)
        v = logarithm(x1, x2);
        dist_x1x2 = norm(v, 'fro');
        if dist_x1x2 > 0
            u = v / dist_x1x2;
            utd = u(:)'*d(:);
            Td = d + (cos(dist_x1x2)-1)*utd*u ...
                    -  sin(dist_x1x2)   *utd*x1;
        else
            % x1 == x2, so the transport is identity
            Td = d;
        end
    end
    
    M.pairmean = @pairmean;
    function y = pairmean(x1, x2)
        y = x1+x2;
        y = y / norm(y, 'fro');
    end

    M.vec = @(x, u_mat) u_mat(:);
    M.mat = @(x, u_vec) reshape(u_vec, [n, m]);
    M.vecmatareisometries = @() true;

end

% Exponential on the sphere
function y = exponential(x, d, t)

    if nargin == 2
        % t = 1
        td = d;
    else
        td = t*d;
    end
    
    nrm_td = norm(td, 'fro');
    
    % Former versions of Manopt avoided the computation of sin(a)/a for
    % small a, but further investigations suggest this computation is
    % well-behaved numerically.
    if nrm_td > 0
        y = x*cos(nrm_td) + td*(sin(nrm_td)/nrm_td);
    else
        y = x;
    end

end

% Retraction on the sphere
function y = retraction(x, d, t)

    if nargin == 2
        % t = 1;
        td = d;
    else
        td = t*d;
    end
    
    y = x + td;
    y = y / norm(y, 'fro');

end

% Uniform random sampling on the sphere.
function x = random(n, m)

    x = randn(n, m);
    x = x / norm(x, 'fro');

end

% Random normalized tangent vector at x.
function d = randomvec(n, m, x)

    d = randn(n, m);
    d = d - x*(x(:).'*d(:));
    d = d / norm(d, 'fro');

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/sphere/spheresymmetricfactory.m
================================================
function M = spheresymmetricfactory(n)
% Returns a manifold struct to optimize over unit-norm symmetric matrices.
%
% function M = spheresymmetricfactory(n)
%
% Manifold of n-by-n real symmetric matrices of unit Frobenius norm.
% The metric is such that the sphere is a Riemannian submanifold of the
% space of nxn symmetric matrices with the usual trace inner product, i.e.,
% the usual metric <A, B> = trace(A'*B).
% 
% See also: spherefactory obliquefactory spherecomplexfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 17, 2015.
% Contributors: 
% Change log: 
%
%   Oct. 8, 2016 (NB)
%       Code for exponential was simplified to only treat the zero vector
%       as a particular case.
%
%   Oct. 22, 2016 (NB)
%       Distance function dist now significantly more accurate for points
%       within 1e-7 and less from each other.
%
%   July 20, 2017 (NB)
%       The distance function is now even more accurate.


    M.name = @() sprintf('Sphere of symmetric matrices of size %d', n);
    
    M.dim = @() n*(n+1)/2 - 1;
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d, 'fro');
    
    M.dist = @(x, y) real(2*asin(.5*norm(x - y, 'fro')));
    
    M.typicaldist = @() pi;
    
    M.proj = @proj;
    function xdot = proj(x, d)
        d = (d+d.')/2;
        xdot = d - x*(x(:).'*d(:));
    end
    
    M.tangent = @proj;
	
    % For Riemannian submanifolds, converting a Euclidean gradient into a
    % Riemannian gradient amounts to an orthogonal projection.
	M.egrad2rgrad = @proj;
	
	M.ehess2rhess = @ehess2rhess;
	function rhess = ehess2rhess(x, egrad, ehess, u)
        % these are not explicitly required, given the use.
        % egrad = (egrad + egrad.')/2;
        % ehess = (ehess + ehess.')/2;
        rhess = proj(x, ehess) - (x(:)'*egrad(:))*u;
	end
    
    M.exp = @exponential;
    
    M.retr = @retraction;

    M.log = @logarithm;
    function v = logarithm(x1, x2)
        v = proj(x1, x2 - x1);
        di = M.dist(x1, x2);
        % If the two points are "far apart", correct the norm.
        if di > 1e-6
            nv = norm(v, 'fro');
            v = v * (di / nv);
        end
    end
    
    M.hash = @(x) ['z' hashmd5(x(:))];
    
    M.rand = @() random(n);
    
    M.randvec = @(x) randomvec(n, x);
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n);
    
    M.transp = @(x1, x2, d) proj(x2, d);
    
    M.pairmean = @pairmean;
    function y = pairmean(x1, x2)
        y = x1+x2;
        y = y / norm(y, 'fro');
    end

    % TODO : check isometry and fix.
    M.vec = @(x, u_mat) u_mat(:);
    M.mat = @(x, u_vec) reshape(u_vec, [n, m]);
    M.vecmatareisometries = @() false;

end

% Exponential on the sphere
function y = exponential(x, d, t)

    if nargin == 2
        % t = 1;
        td = d;
    else
        td = t*d;
    end
    
    nrm_td = norm(td, 'fro');
    
    if nrm_td > 0
        y = x*cos(nrm_td) + td*(sin(nrm_td)/nrm_td);
    else
        y = x;
    end

end

% Retraction on the sphere
function y = retraction(x, d, t)

    if nargin == 2
        t = 1;
    end
    
    y = x + t*d;
    y = y / norm(y, 'fro');

end

% Uniform random sampling on the sphere.
function x = random(n)

    x = randn(n);
    x = (x + x.')/2;
    x = x/norm(x, 'fro');

end

% Random normalized tangent vector at x.
function d = randomvec(n, x)

    d = randn(n);
    d = (d + d.')/2;
    d = d - x*(x(:).'*d(:));
    d = d / norm(d, 'fro');

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/stiefel/stiefelcomplexfactory.m
================================================
function M = stiefelcomplexfactory(n, p, k)
% Returns a manifold struct. to optimize over complex orthonormal matrices.
%
% function M = stiefelcomplexfactory(n, p)
% function M = stiefelcomplexfactory(n, p, k)
%
% The complex Stiefel manifold is the set of complex orthonormal nxp
% matrices. If k is larger than 1, this is the Cartesian product of the
% complex Stiefel manifold taken k times. The metric is such that the
% manifold is a Riemannian submanifold of C^nxp equipped with the usual
% real-trace inner product, that is, it is the usual metric for the complex
% plane identified with R^2.
%
% Points are represented as matrices X of size n x p x k (or n x p if k=1,
% which is the default) such that each complex n x p matrix is orthonormal,
% i.e., X'*X = eye(p) if k = 1, or X(:, :, i)' * X(:, :, i) = eye(p) for
% i = 1 : k if k > 1. Tangent vectors are represented as matrices the same
% size as points.
%
% By default, k = 1.
%
%
% Please cite the Manopt paper as well as either of these research papers
% pertaining to this specific geometry:
% @InProceedings{sato2013complex,
%   Title        = {A complex singular value decomposition algorithm based on the {R}iemannian {N}ewton method},
%   Author       = {Sato, H. and Iwai, T.},
%   Booktitle    = {Decision and Control ({CDC}), 2013 {IEEE} 52nd Annual Conference on},
%   Year         = {2013},
%   Organization = {IEEE},
%   Pages        = {2972--2978}
% }
% @InProceedings{sato2014Riemannian,
%   Title        = {{R}iemannian conjugate gradient method for complex singular value decomposition problem},
%   Author       = {Sato, H.},
%   Booktitle    = {Decision and Control ({CDC}), 2014 {IEEE} 53rd Annual Conference on},
%   Year         = {2014},
%   Organization = {IEEE},
%   Pages        = {5849--5854}
% }
%
%
% See also: stiefelfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Hiroyuki Sato, April 27, 2015.
% Contributors: 
% Change log: 
    
    if ~exist('k', 'var') || isempty(k)
        k = 1;
    end
    
    if k == 1
        M.name = @() sprintf('Complex Stiefel manifold St(%d, %d)', n, p);
    elseif k > 1
        M.name = @() sprintf('Product complex Stiefel manifold St(%d, %d)^%d', n, p, k);
    else
        error('k must be an integer no less than 1.');
    end
    
    M.dim = @() k*(2*n*p - p^2); %! k*(n*p - .5*p*(p+1)) -> k*(2*n*p - p^2)
    
    M.inner = @(x, d1, d2) real(d1(:)'*d2(:)); %! trace -> real-trace
    
    M.norm = @(x, d) norm(d(:));
    
    M.dist = @(x, y) error('stiefel.dist not implemented yet.');
    
    M.typicaldist = @() sqrt(p*k);
    
    M.proj = @projection;
    function Up = projection(X, U)
        
        XHU = multiprod(multihconj(X), U); %! XtU -> XHU, multitransp -> multihconj
        herXHU = multiherm(XHU); %! symXtU -> herXHU, multisym -> multiherm
        Up = U - multiprod(X, herXHU); %! symXtU -> herXHU
        
    end
    
    M.tangent = M.proj;
    
    % For Riemannian submanifolds, converting a Euclidean gradient into a
    % Riemannian gradient amounts to an orthogonal projection.
	M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, H)
        XHG = multiprod(multihconj(X), egrad); %! XtG -> XHG, multitransp -> multihconj
        herXHG = multiherm(XHG); %! symXtG -> herXHG, multisym(XtG) -> multiherm(XHG)
        HherXHG = multiprod(H, herXHG); %! HsymXtG -> HherXHG, symXtG -> herXHG
        rhess = projection(X, ehess - HherXHG); %! HsymXtG -> HherXHG
    end
    
    M.retr = @retraction;
    function Y = retraction(X, U, t)
        if nargin < 3
            t = 1.0;
        end
        Y = X + t*U;
        for i = 1 : k
            [Q, R] = qr(Y(:, :, i), 0);
            % The instruction with R assures we are not flipping signs
            % of some columns, which should never happen in modern Matlab
            % versions but may be an issue with older versions.
            Y(:, :, i) = Q * diag(sign(sign(diag(R))+.5));
        end
    end
    
    M.exp = @exponential;
    function Y = exponential(X, U, t)
        if nargin == 2
            t = 1;
        end
        tU = t*U;
        Y = zeros(size(X));
        for i = 1 : k
            % From a formula by Ross Lippert, Example 5.4.2 in AMS08.
            Xi = X(:, :, i);
            Ui = tU(:, :, i);
            Y(:, :, i) = [Xi Ui] * ...
                         expm([Xi'*Ui , -Ui'*Ui ; eye(p) , Xi'*Ui]) * ...
                         [ expm(-Xi'*Ui) ; zeros(p) ];
        end
        
    end

    M.hash = @(X) ['z' hashmd5([real(X(:)) ; imag(X(:))])]; %! X(:) -> [real(X(:)) ; imag(X(:))]
    
    M.rand = @random;
    function X = random()
        X = zeros(n, p, k);
        for i = 1 : k
            [Q, unused] = qr(randn(n, p) + 1i*randn(n,p), 0); %#ok<NASGU> %! Complex version
            X(:, :, i) = Q;
        end
    end
    
    M.randvec = @randomvec;
    function U = randomvec(X)
        U = projection(X, randn(n, p, k) + 1i*randn(n, p, k)); %! Complex version
        U = U / norm(U(:));
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, p, k);
    
    M.transp = @(x1, x2, d) projection(x2, d);
    
    M.vec = @(x, u_mat) [real(u_mat(:)) ; imag(u_mat(:))];
    M.mat = @(x, u_vec) reshape(u_vec(1:(n*p*k)) + 1i*u_vec((n*p*k+1):end), [n, p, k]);
    M.vecmatareisometries = @() true; % TODO : to check.

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/stiefel/stiefelfactory.m
================================================
function M = stiefelfactory(n, p, k)
% Returns a manifold structure to optimize over orthonormal matrices.
%
% function M = stiefelfactory(n, p)
% function M = stiefelfactory(n, p, k)
%
% The Stiefel manifold is the set of orthonormal nxp matrices. If k
% is larger than 1, this is the Cartesian product of the Stiefel manifold
% taken k times. The metric is such that the manifold is a Riemannian
% submanifold of R^nxp equipped with the usual trace inner product, that
% is, it is the usual metric.
%
% Points are represented as matrices X of size n x p x k (or n x p if k=1,
% which is the default) such that each n x p matrix is orthonormal,
% i.e., X'*X = eye(p) if k = 1, or X(:, :, i)' * X(:, :, i) = eye(p) for
% i = 1 : k if k > 1. Tangent vectors are represented as matrices the same
% size as points.
%
% By default, k = 1.
%
% See also: grassmannfactory rotationsfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%  July  5, 2013 (NB) : Added ehess2rhess.
%  Jan. 27, 2014 (BM) : Bug in ehess2rhess corrected.
%  June 24, 2014 (NB) : Added true exponential map and changed the randvec
%                       function so that it now returns a globally
%                       normalized vector, not a vector where each
%                       component is normalized (this only matters if k>1).

    
    if ~exist('k', 'var') || isempty(k)
        k = 1;
    end
    
    if k == 1
        M.name = @() sprintf('Stiefel manifold St(%d, %d)', n, p);
    elseif k > 1
        M.name = @() sprintf('Product Stiefel manifold St(%d, %d)^%d', n, p, k);
    else
        error('k must be an integer no less than 1.');
    end
    
    M.dim = @() k*(n*p - .5*p*(p+1));
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d(:));
    
    M.dist = @(x, y) error('stiefel.dist not implemented yet.');
    
    M.typicaldist = @() sqrt(p*k);
    
    M.proj = @projection;
    function Up = projection(X, U)
        
        XtU = multiprod(multitransp(X), U);
        symXtU = multisym(XtU);
        Up = U - multiprod(X, symXtU);
        
% The code above is equivalent to, but much faster than, the code below.
%         
%     Up = zeros(size(U));
%     function A = sym(A), A = .5*(A+A'); end
%     for i = 1 : k
%         Xi = X(:, :, i);
%         Ui = U(:, :, i);
%         Up(:, :, i) = Ui - Xi*sym(Xi'*Ui);
%     end

    end
    
    M.tangent = M.proj;
    
    % For Riemannian submanifolds, converting a Euclidean gradient into a
    % Riemannian gradient amounts to an orthogonal projection.
	M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, H)
        XtG = multiprod(multitransp(X), egrad);
        symXtG = multisym(XtG);
        HsymXtG = multiprod(H, symXtG);
        rhess = projection(X, ehess - HsymXtG);
    end
    
    M.retr = @retraction;
    function Y = retraction(X, U, t)
        if nargin < 3
            t = 1.0;
        end
        Y = X + t*U;
        for i = 1 : k
            [Q, R] = qr(Y(:, :, i), 0);
            % The instruction with R assures we are not flipping signs
            % of some columns, which should never happen in modern Matlab
            % versions but may be an issue with older versions.
            Y(:, :, i) = Q * diag(sign(sign(diag(R))+.5));
        end
    end
    
    M.exp = @exponential;
    function Y = exponential(X, U, t)
        if nargin == 2
            t = 1;
        end
        tU = t*U;
        Y = zeros(size(X));
        for i = 1 : k
            % From a formula by Ross Lippert, Example 5.4.2 in AMS08.
            Xi = X(:, :, i);
            Ui = tU(:, :, i);
            Y(:, :, i) = [Xi Ui] * ...
                         expm([Xi'*Ui , -Ui'*Ui ; eye(p) , Xi'*Ui]) * ...
                         [ expm(-Xi'*Ui) ; zeros(p) ];
        end
        
    end

    M.hash = @(X) ['z' hashmd5(X(:))];
    
    M.rand = @random;
    function X = random()
        X = zeros(n, p, k);
        for i = 1 : k
            [Q, unused] = qr(randn(n, p), 0); %#ok<NASGU>
            X(:, :, i) = Q;
        end
    end
    
    M.randvec = @randomvec;
    function U = randomvec(X)
        U = projection(X, randn(n, p, k));
        U = U / norm(U(:));
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, p, k);
    
    M.transp = @(x1, x2, d) projection(x2, d);
    
    M.vec = @(x, u_mat) u_mat(:);
    M.mat = @(x, u_vec) reshape(u_vec, [n, p, k]);
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/stiefel/stiefelgeneralizedfactory.m
================================================
function M = stiefelgeneralizedfactory(n, p, B)
% Returns a manifold structure of "scaled" orthonormal matrices.
%
% function M = stiefelgeneralizedfactory(n, p)
% function M = stiefelgeneralizedfactory(n, p, B)
%
% The generalized Stiefel manifold is the set of "scaled" orthonormal 
% nxp matrices X such that X'*B*X is identity. B must be positive definite.
% If B is identity, then this is the standard Stiefel manifold.
%
% The generalized Stiefel manifold is endowed with a scaled metric
% by making it a Riemannian submanifold of the Euclidean space,
% again endowed with the scaled inner product.
%
% Some notions (not all) are from Section 4.5 of the paper
% "The geometry of algorithms with orthogonality constraints",
% A. Edelman, T. A. Arias, S. T. Smith, SIMAX, 1998.
%
% Paper link: http://arxiv.org/abs/physics/9806030.
%
% Note: egrad2rgrad and ehess2rhess involve solving linear systems in B. If
% this is a bottleneck for a specific application, then a way forward is to
% create a modified version of this file which preprocesses B to speed this
% up (typically, by computing a Cholesky factorization of it, then calling
% an appropriate solver).
%
% See also: stiefelfactory  grassmannfactory  grassmanngeneralizedfactory 

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, June 30, 2015.
% Contributors:
%
% Change log:
%   

    
    if ~exist('B', 'var') || isempty(B)
        B = speye(n); % Standard Stiefel manifold.
    end
    
    M.name = @() sprintf('Generalized Stiefel manifold St(%d, %d)', n, p);
    
    M.dim = @() (n*p - .5*p*(p+1));
    
    M.inner = @(X, eta, zeta) trace(eta'*(B*zeta)); % Scaled metric.
    
    M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
    
    M.dist = @(X, Y) error('stiefelgeneralizedfactory.dist not implemented yet.');
    
    M.typicaldist = @() sqrt(p);
    
    % Orthogonal projection of an ambient vector U to the tangent space
    % at X.
    M.proj = @projection;
    function Up = projection(X, U)
        BX = B*X;
        
        % Projection onto the tangent space
        Up = U - X*symm(BX'*U);  
    end
    
    M.tangent = M.proj;
    
    M.egrad2rgrad = @egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad)
        
        % First, scale egrad according the to the scaled metric in the
        % Euclidean space.
        egrad_scaled = B\egrad;
        
        % Second, project onto the tangent space.
        % rgrad = egrad_scaled - X*symm((B*X)'*egrad_scaled);
        %
        % Verify that symm(BX'*egrad_scaled) = symm(X'*egrad).
        
        rgrad = egrad_scaled - X*symm(X'*egrad);
    end
    
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, H)
        egraddot = ehess;
        Xdot = H;
        
        % Directional derivative of the Riemannian gradient.
        egrad_scaleddot = B\egraddot;
        rgraddot = egrad_scaleddot - Xdot*symm(X'*egrad)...
            - X*symm(Xdot'*egrad)...
            - X*symm(X'*egraddot);
        
        % Project onto the tangent space.
        rhess = M.proj(X, rgraddot);
    end
    
    
    M.retr = @retraction;
    function Y = retraction(X, U, t)
        if nargin < 3
            t = 1.0;
        end
        Y = guf(X + t*U); % Ensure that Y'*B*Y is identity.
    end
    
    
    M.exp = @exponential;
    function Y = exponential(X, Z, t)
        if nargin < 3
            t = 1.0;
        end
        Y = retraction(X, Z, t);
        warning('manopt:stiefelgeneralizedfactory:exp', ...
               ['Exponential for generalized Stiefel manifold ' ...
                'manifold not implemented yet. Used retraction instead.']);
    end


    M.hash = @(X) ['z' hashmd5(X(:))];
    
    M.rand = @random;
    function X = random()
        X = guf(randn(n, p)); % Ensure that X'*B*X is identity;
    end
    
    M.randvec = @randomvec;
    function U = randomvec(X)
        U = projection(X, randn(n, p));
        U = U / norm(U(:));
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(X) zeros(n, p);
    
    % This transport is compatible with the generalized polar retraction.
    M.transp = @(X1, X2, d) projection(X2, d);
    
    M.vec = @(X, u_mat) u_mat(:);
    M.mat = @(X, u_vec) reshape(u_vec, [n, p]);
    M.vecmatareisometries = @() false;
    
    % Some auxiliary functions
    symm = @(D) (D + D')/2;
    
    function X = guf(Y)
        % Generalized polar decomposition of an n-by-p matrix Y.
        % X'*B*X is identity.
        
        % Method 1
        [u, ~, v] = svd(Y, 0);
  
        % Instead of the following three steps, an equivalent, but an 
        % expensive way is to do X = u*(sqrtm(u'*(B*u))\(v')).
        [q, ssquare] = eig(u'*(B*u));
        qsinv = q/sparse(diag(sqrt(diag(ssquare))));
        X = u*((qsinv*q')*v'); % X'*B*X is identity.
        
        
        % Another computation using restricted_svd
        % [u, ~, v] = restricted_svd(Y);
        % X = u*v'; % X'*B*X is identity.
        
    end
    
    function [u, s, v] = restricted_svd(Y)
        % We compute a thin svd-like decomposition of an n-by-p matrix Y 
        % into matrices u, s, and v such that u is an n-by-p matrix
        % with u'*B*u being identity, s is a p-by-p diagonal matrix 
        % with positive entries, and v is a p-by-p orthogonal matrix.
        % Y = u*s*v'.
        [v, ssquare] = eig(symm(Y'*(B*Y))); % Y*B*Y is positive definite
        ssquarevec = diag(ssquare);
        
        s = sparse(diag(abs(sqrt(ssquarevec))));
        u = Y*(v/s); % u'*B*u is identity.
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/stiefel/stiefelstackedfactory.m
================================================
function M = stiefelstackedfactory(m, d, k)
% Stiefel(k, d)^m, represented as matrices of size m*d-by-k.
%
% function M = stiefelstackedfactory(m, d, k)
%
% Points on this manifold are matrices Y of size n x k, with n = m*d.
% Y is thought of as m matrices of size d x k each, stacked on top of each
% other. Call them Y1, ..., Ym. Each Yi is an orthonormal matrix, that is,
% its d rows are unit norm and are orthogonal to each other. Thus, this
% geometry is a product of Stiefel manifolds.
% 
% To easily transform matrices Y to 3D arrays Y3 of size d x k x m such
% that each slice Y3(:, :, i) corresponds to one of the matrices Yi, use
% the functions
% 
%    Y3 = M.to3D(Y)   and   Y = M.to2D(Y3).
%
% The ambient space R^(nxk) is endowed with the usual inner product
% <A, B> = trace(A'*B). This inner product is restricted to the tangent
% spaces of the present manifold, thus making it a Riemannian submanifold
% of the Euclidean space R^(nxk). Tangent vectors are represented as
% matrices of the same size as Y, and can likewise be converted to 3D
% arrays and back using to3D() and to2D().
%
% In dealing with this geometry, especially when dealing with the 3D array
% representations of points and tangent vectors, the tools multiprod,
% multitransp, multitrace, multiscale etc. available in Manopt are often
% useful.
%
% See also: stiefelfactory obliquefactory multiprod multitransp

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, May 4, 2015.
% Contributors: 
% Change log: 

    assert(k >= d, 'k must be at least as large as d.');

    n = m*d;
    
    M.name = @() sprintf('Manifold of %d orthonormal matrices of size %dx%d, stacked', m, d, k);
    
    M.dim = @() m*(k*d - .5*d*(d+1));
    
    M.size = @() [m, d, k];
    
    M.inner = @(x, d1, d2) d1(:).'*d2(:);
    
    M.norm = @(x, d) norm(d(:));
    
    M.dist = @(x, y) error('stiefelstackedfactory.dist not implemented yet.');
    
    M.typicaldist = @() sqrt(M.dim());

    % Convert a dxkxm matrix to an nxk matrix
    M.to2D = @to2D;
    function A2 = to2D(A3)
        A2 = reshape(multitransp(A3), [k, m*d])';
    end

    % Convert an nxk matrix to a dxkxm matrix
    M.to3D = @to3D;
    function A3 = to3D(A2)
        A3 = multitransp(reshape(A2', [k, d, m]));
    end

    % Given 2 3D matrices A and B of size dxkxm, returns a 3D matrix C of
    % size dxdxm such that each slice C(:, :, i) is the symmetric part of
    % the product A(:, :, i) * B(:, :, i)'. The name is short for
    % "symmetric-block-diagonal", because if A and B were transformed to
    % their 2D equivalents via to2D, then the output would contain the
    % symmetric parts of the diagonal blocks of A*B'.
    M.symbdiag = @symbdiag;
    function C = symbdiag(A, B)
        C = multisym(multiprod(A, multitransp(B)));
    end
    
    % Orthogonal projection from the ambient space R^(nxk) to the tangent
    % space at X.
    M.proj = @projection;
    function Zt = projection(Y, Z)
        Y3 = to3D(Y);
        Z3 = to3D(Z);
        Lambda = symbdiag(Y3, Z3);
        Zt3 = Z3 - multiprod(Lambda, Y3);
        Zt = to2D(Zt3);
    end    
    
    M.tangent = M.proj;
    
	M.egrad2rgrad = M.proj;
    
    M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(Y, egrad, ehess, Ydot)
        Y3 = to3D(Y);
        Ydot3 = to3D(Ydot);
        egrad3 = to3D(egrad);
        C = symbdiag(Y3, egrad3);
        CYdot = to2D(multiprod(C, Ydot3));
        rhess = projection(Y, ehess - CYdot);
    end
    
    M.retr = @retraction;
    function Y = retraction(Y, U, t)
        if nargin < 3
            t = 1.0;
        end
        Y = Y + t*U;
        Y3 = to3D(Y);
        for i = 1 : m
            % Orthonormalize the rows of Y3(:, :, i):
            [u, s, v] = svd(Y3(:, :, i), 'econ'); %#ok<ASGLU>
            Y3(:, :, i) = u*v';
            % Alternative code if one desires to use QR instead of SVD.
            % The instruction with the signs of R assures we are not
            % flipping signs of some columns.
            % [Q, R] = qr(Y3(:, :, i)', 0);
            % Y3(:, :, i) = (Q * diag(sign(sign(diag(R))+.5)))';
        end
        Y = to2D(Y3);
    end
    
    M.exp = @exponential;
    function Y = exponential(Y, U, t)
        if nargin == 2
            t = 1;
        end
        tU3 = multitransp(to3D(t*U));
        Y3 = multitransp(to3D(Y));
        % From a formula by Ross Lippert, Example 5.4.2 in AMS08.
        for i = 1 : m
            X = Y3(:, :, i);
            Z = tU3(:, :, i);
            Y3(:, :, i) = [X, Z] * ...
                          expm([  X'*Z , -Z'*Z ; eye(d) , X'*Z]) * ...
                          [ expm(-X'*Z) ; zeros(d) ];
            % We may loose orthonormality here. Just to be sure:
            [u, s, v] = svd(Y3(:, :, i), 'econ'); %#ok<ASGLU>
            Y3(:, :, i) = u*v';
        end
        Y = to2D(multitransp(Y3));
    end

    M.hash = @(Y) ['z' hashmd5(Y(:))];
    
    M.rand = @random;
    function Y = random()
        Y3 = zeros(d, k, m);
        for i = 1 : m
            [Q, unused] = qr(randn(k, d), 0); %#ok<NASGU>
            Y3(:, :, i) = Q';
        end
        Y = to2D(Y3);
    end
    
    M.randvec = @randomvec;
    function U = randomvec(Y)
        U = projection(Y, randn(n, k));
        U = U / M.norm(Y, U);
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(x) zeros(n, k);
    
    M.transp = @(x1, x2, u) projection(x2, u);
    
    M.vec = @(x, u_mat) u_mat(:);
    M.mat = @(x, u_vec) reshape(u_vec, [n, k]);
    M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/symfixedrank/elliptopefactory.m
================================================
function M = elliptopefactory(n, k)
% Manifold of n-by-n psd matrices of rank k with unit diagonal elements.
%
% function M = elliptopefactory(n, k)
%
% A point X on the manifold is parameterized as YY^T where Y is a matrix of
% size nxk. As such, X is symmetric, positive semidefinite. We restrict to
% full-rank Y's, such that X has rank exactly k. The point X is numerically
% represented by Y (this is more efficient than working with X, which may
% be big). Tangent vectors are represented as matrices of the same size as
% Y, call them Ydot, so that Xdot = Y Ydot' + Ydot Y and diag(Xdot) == 0.
% The metric is the canonical Euclidean metric on Y.
% 
% The diagonal constraints on X (X(i, i) == 1 for all i) translate to
% unit-norm constraints on the rows of Y: norm(Y(i, :)) == 1 for all i.
% The set of such Y's forms the oblique manifold. But because for any
% orthogonal Q of size k, it holds that (YQ)(YQ)' = YY', we "group" all
% matrices of the form YQ in an equivalence class. The set of equivalence
% classes is a Riemannian quotient manifold, implemented here.
%
% Note that this geometry formally breaks down at rank-deficient Y's.
% This does not appear to be a major issue in practice when optimization
% algorithms converge to rank-deficient Y's, but convergence theorems no
% longer hold. As an alternative, you may use the oblique manifold (it has
% larger dimension, but does not break down at rank drop.)
%
% The geometry is taken from the 2010 paper:
% M. Journee, P.-A. Absil, F. Bach and R. Sepulchre,
% "Low-Rank Optimization on the Cone of Positive Semidefinite Matrices".
% Paper link: http://www.di.ens.fr/~fbach/journee2010_sdp.pdf
% 
% 
% Please cite the Manopt paper as well as the research paper:
%     @Article{journee2010low,
%       Title   = {Low-rank optimization on the cone of positive semidefinite matrices},
%       Author  = {Journ{\'e}e, M. and Bach, F. and Absil, P.-A. and Sepulchre, R.},
%       Journal = {SIAM Journal on Optimization},
%       Year    = {2010},
%       Number  = {5},
%       Pages   = {2327--2351},
%       Volume  = {20},
%       Doi     = {10.1137/080731359}
%     }
% 
%
% See also: obliquefactory symfixedrankYYfactory spectrahedronfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, July 12, 2013.
% Contributors:
% Change log:
%   July 18, 2013 (NB):
%       Fixed projection operator for rank-deficient Y'Y.
% 
%   Aug.  8, 2013 (NB):
%       No longer using nested functions, to aim at Octave compatibility.
%       Sign error in right hand side of the call to minres corrected.
% 
%   June 24, 2014 (NB):
%       Used code snippets from obliquefactory to speed up projection,
%       retraction, egrad2rgrad and rand: the code now uses bsxfun for this.
% 
%   April 3, 2015 (NB):
%       Replaced trace(A'*B) by A(:)'*B(:) : equivalent but faster.

% TODO: modify normalize_rows and project_rows to work without transposes.
% TODO: enhance ehess2rhess to also use bsxfun.
    
	
	if ~exist('lyap', 'file')
		warning('manopt:elliptopefactory:slowlyap', ...
		       ['The function lyap to solve Lyapunov equations seems not to ' ...
				'be available. This may slow down optimization over this ' ...
				'manifold significantly. lyap is part of the control system ' ...
				'toolbox.']);
    end
    
    if k < 2
        warning('manopt:elliptopefactory:lowk', ...
                'k should be an integer >= 2. At k = 1, the set is discrete.');
    end
    
    
    M.name = @() sprintf('YY'' quotient manifold of %dx%d psd matrices of rank %d with diagonal elements being 1', n, k);
    
    M.dim = @() n*(k-1) - k*(k-1)/2; % Extra -1 is because of the diagonal constraint that
    
    % Euclidean metric on the total space
    M.inner = @(Y, eta, zeta) eta(:)'*zeta(:);
    
    M.norm = @(Y, eta) sqrt(M.inner(Y, eta, eta));
    
    M.dist = @(Y, Z) error('elliptopefactory.dist not implemented yet.');
    
    M.typicaldist = @() 10*k;
    
    M.proj = @projection;
    
    M.tangent = M.proj;
    M.tangent2ambient = @(Y, eta) eta;
    
    M.retr = @retraction;
    
    M.egrad2rgrad = @egrad2rgrad;
    
    M.ehess2rhess = @ehess2rhess;
    
    M.exp = @exponential;
    
    % Notice that the hash of two equivalent points will be different...
    M.hash = @(Y) ['z' hashmd5(Y(:))];
    
    M.rand = @() random(n, k);
    
    M.randvec = @randomvec;
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(Y) zeros(n, k);
    
    M.transp = @(Y1, Y2, d) projection(Y2, d);
    
    M.vec = @(Y, u_mat) u_mat(:);
    M.mat = @(Y, u_vec) reshape(u_vec, [n, k]);
    M.vecmatareisometries = @() true;
    
end

% Given a matrix X, returns the same matrix but with each column scaled so
% that they have unit 2-norm.
% See obliquefactory.
function X = normalize_rows(X)
    X = X';
	norms = sqrt(sum(X.^2, 1));
	X = bsxfun(@times, X, 1./norms);
    X = X';
end

% Orthogonal projection of each row of H to the tangent space at the
% corresponding row of X, seen as a point on a sphere.
% See obliquefactory.
function PXH = project_rows(X, H)
    X = X';
    H = H';
    % Compute the inner product between each vector H(:, i) with its root
    % point X(:, i), that is, X(:, i).' * H(:, i). Returns a row vector.
    inners = sum(X.*H, 1);
    % Subtract from H the components of the H(:, i)'s that are parallel to
    % the root points X(:, i).
    PXH = H - bsxfun(@times, X, inners);
    PXH = PXH';
end


% Projection onto the tangent space, i.e., on the tangent space of
% ||Y(i, :)|| = 1
function etaproj = projection(Y, eta)
    [unused, k] = size(Y); %#ok<ASGLU>
    eta = project_rows(Y, eta);

    % Projection onto the horizontal space
    YtY = Y'*Y;
    SS = YtY;
    AS = Y'*eta - eta'*Y;
    try
        % This is supposed to work and indeed return a skew-symmetric
        % solution Omega.
        Omega = lyap(SS, -AS);
    catch up %#ok<NASGU>
        % It can happen though that SS will be rank deficient. The
        % Lyapunov equation we solve still has a unique skew-symmetric
        % solution, but solutions with a symmetric part now also exist,
        % and the lyap function doesn't like that. So we want to
        % extract the minimum norm solution. This is also useful if lyap is
		% not available (it is part of the control system toolbox).
        mat = @(x) reshape(x, [k k]);
        vec = @(X) X(:);
        is_octave = exist('OCTAVE_VERSION', 'builtin');
        if ~is_octave
            [vecomega, unused] = minres(@(x) vec(SS*mat(x) + mat(x)*SS), vec(AS)); %#ok<NASGU>
        else
            [vecomega, unused] = gmres(@(x) vec(SS*mat(x) + mat(x)*SS), vec(AS)); %#ok<NASGU>
        end
        Omega = mat(vecomega);
    end
    % % Make sure the result is skew-symmetric (does not seem necessary).
    % Omega = (Omega-Omega')/2;
    etaproj = eta - Y*Omega;
end

% Retraction
function Ynew = retraction(Y, eta, t)
    if nargin < 3
        t = 1.0;
    end
    Ynew = Y + t*eta;
    Ynew = normalize_rows(Ynew);
end

% Exponential map
function Ynew = exponential(Y, eta, t)
    if nargin < 3
        t = 1.0;
    end

    Ynew = retraction(Y, eta, t);
    warning('manopt:elliptopefactory:exp', ...
        ['Exponential for fixed rank spectrahedron ' ...
        'manifold not implemented yet. Used retraction instead.\n' ...
        'To disable this warning: warning(''off'', ''manopt:elliptopefactory:exp'')']);
end

% Euclidean gradient to Riemannian gradient conversion.
% We only need the ambient space projection: the remainder of the
% projection function is not necessary because the Euclidean gradient must
% already be orthogonal to the vertical space.
function rgrad = egrad2rgrad(Y, egrad)
    rgrad = project_rows(Y, egrad);
end

% Euclidean Hessian to Riemannian Hessian conversion.
% TODO: speed this function up using bsxfun.
function Hess = ehess2rhess(Y, egrad, ehess, eta)
    k = size(Y, 2);

    % Directional derivative of the Riemannian gradient
    scaling_grad = sum((egrad.*Y), 2); % column vector of size n
    scaling_grad_repeat = scaling_grad*ones(1, k);

    Hess = ehess - scaling_grad_repeat.*eta;

    scaling_hess = sum((eta.*egrad) + (Y.*ehess), 2);
    scaling_hess_repeat = scaling_hess*ones(1, k);
    % directional derivative of scaling_grad_repeat
    Hess = Hess - scaling_hess_repeat.*Y;

    % Project on the horizontal space
    Hess = projection(Y, Hess);
end

% Random point generation on the manifold
function Y = random(n, k)
    Y = randn(n, k);
    Y = normalize_rows(Y);
end

% Random vector generation at Y
function eta = randomvec(Y)
    eta = randn(size(Y));
    eta = projection(Y, eta);
    nrm = norm(eta, 'fro');
    eta = eta / nrm;
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/symfixedrank/spectrahedronfactory.m
================================================
function M = spectrahedronfactory(n, k)
% Manifold of n-by-n symmetric positive semidefinite matrices of rank k
% with trace (sum of diagonal elements) equal to 1.
%
% function M = spectrahedronfactory(n, k)
%
% A point X on the manifold is parameterized as YY^T where Y is a matrix of
% size nxk. As such, X is symmetric, positive semidefinite. We restrict to
% full-rank Y's, such that X has rank exactly k. The point X is numerically
% represented by Y (this is more efficient than working with X, which may
% be big). Tangent vectors are represented as matrices of the same size as
% Y, call them Ydot, so that Xdot = Y Ydot' + Ydot Y and trace(Xdot) == 0.
% The metric is the canonical Euclidean metric on Y.
% 
% The trace constraint on X (trace(X) == 1) translates to a unit Frobenius
% norm constraint on Y: trace(X) = norm(Y, 'fro')^2 == 1. The set of such
% Y's forms the unit sphere in R^(nxk): see spherefactory. But because for
% any orthogonal Q of size k, it holds that (YQ)(YQ)' = YY', we "group" all
% matrices of the form YQ in an equivalence class. The set of equivalence
% classes is a Riemannian quotient manifold, implemented here.
%
%
% Note that this geometry formally breaks down at rank-deficient Y's.
% As an alternative, you may use the sphere manifold (it has larger
% dimension (by 1), but does not break down at rank drop.)
%
% The geometry is taken from the 2010 paper:
% M. Journee, P.-A. Absil, F. Bach and R. Sepulchre,
% "Low-Rank Optimization on the Cone of Positive Semidefinite Matrices".
% Paper link: http://www.di.ens.fr/~fbach/journee2010_sdp.pdf
% 
% 
% Please cite the Manopt paper as well as the research paper:
%     @Article{journee2010low,
%       Title   = {Low-rank optimization on the cone of positive semidefinite matrices},
%       Author  = {Journ{\'e}e, M. and Bach, F. and Absil, P.-A. and Sepulchre, R.},
%       Journal = {SIAM Journal on Optimization},
%       Year    = {2010},
%       Number  = {5},
%       Pages   = {2327--2351},
%       Volume  = {20},
%       Doi     = {10.1137/080731359}
%     }
% 
%
% See also: spherefactory elliptopefactory symfixedrankYYfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, July 11, 2013.
% Contributors: Nicolas Boumal
% Change log:
%
%   April 2, 2015 (NB):
%       Replaced trace(A'*B) by A(:)'*B(:) (equivalent but faster).
%       Updated documentation.
    
    
    M.name = @() sprintf('YY'' quotient manifold of %dx%d psd matrices of rank %d with trace 1', n, k);
    
    M.dim = @() n*k - 1 - k*(k-1)/2;
    
    % Euclidean metric on the total space
    M.inner = @(Y, eta, zeta) eta(:)'*zeta(:);
    
    M.norm = @(Y, eta) sqrt(M.inner(Y, eta, eta));
    
    M.dist = @(Y, Z) error('spectrahedronfactory.dist not implemented yet.');
    
    M.typicaldist = @() 10*k;
    
    M.proj = @projection;
    function etaproj = projection(Y, eta)
        % Projection onto the tangent space, i.e., on the tangent space of
        % ||Y|| = 1
        
        eta = eta - (eta(:)'*Y(:))*Y;
        
        % Projection onto the horizontal space
        YtY = Y'*Y;
        SS = YtY;
        AS = Y'*eta - eta'*Y;
        Omega = lyap(SS, -AS);
        etaproj = eta - Y*Omega;
    end
    
    M.tangent = M.proj;
    M.tangent2ambient = @(Y, eta) eta;
    
    M.retr = @retraction;
    function Ynew = retraction(Y, eta, t)
        if nargin < 3
            t = 1.0;
        end
        Ynew = Y + t*eta;
        Ynew = Ynew/norm(Ynew, 'fro');
    end
    
    
    M.egrad2rgrad = @(Y, eta) eta - (eta(:)'*Y(:))*Y;
    
    M.ehess2rhess = @ehess2rhess;
    function Hess = ehess2rhess(Y, egrad, ehess, eta)
       
        % Directional derivative of the Riemannian gradient
        Hess = ehess - (egrad(:)'*Y(:))*eta - ( (ehess(:)'*Y(:)) + (eta(:)'*egrad(:)) )*Y;
        Hess = Hess - (Hess(:)'*Y(:))*Y;
        
        % Project on the horizontal space
        Hess = M.proj(Y, Hess);
        
    end
    
    M.exp = @exponential;
    function Ynew = exponential(Y, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        Ynew = retraction(Y, eta, t);
        warning('manopt:spectrahedronfactory:exp', ...
            ['Exponential for fixed rank spectrahedron ' ...
            'manifold not implenented yet. Used retraction instead.']);
    end
    
    % Notice that the hash of two equivalent points will be different...
    M.hash = @(Y) ['z' hashmd5(Y(:))];
    
    M.rand = @random;
    
    function Y = random()
        Y = randn(n, k);
        Y = Y/norm(Y,'fro');
    end
    
    M.randvec = @randomvec;
    function eta = randomvec(Y)
        eta = randn(n, k);
        eta = projection(Y, eta);
        nrm = M.norm(Y, eta);
        eta = eta / nrm;
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(Y) zeros(n, k);
    
    M.transp = @(Y1, Y2, d) projection(Y2, d);
    
    M.vec = @(Y, u_mat) u_mat(:);
    M.mat = @(Y, u_vec) reshape(u_vec, [n, k]);
    M.vecmatareisometries = @() true;
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/symfixedrank/symfixedrankYYcomplexfactory.m
================================================
function M = symfixedrankYYcomplexfactory(n, k)
% Manifold of n x n complex Hermitian pos. semidefinite matrices of rank k.
%
% function M = symfixedrankYYcomplexfactory(n, k)
%
% Manifold of n-by-n complex Hermitian positive semidefinite matrices of 
% fixed rank k. This follows the quotient geometry described 
% in Sarod Yatawatta's 2013 paper:
% "Radio interferometric calibration using a Riemannian manifold", ICASSP.
%
% Paper link: http://dx.doi.org/10.1109/ICASSP.2013.6638382.
%
% A point X on the manifold M is parameterized as YY^*, where 
% Y is a complex matrix of size nxk. For any point Y on the manifold M, 
% given any kxk complex unitary matrix U, we say Y*U  is equivalent to Y, 
% i.e., YY^* does not change. Therefore, M is the set of equivalence 
% classes and is a Riemannian quotient manifold C^{nk}/SU(k). 
% The metric is the usual real-trace inner product, that is, 
% it is the usual metric for the complex plane identified with R^2.
%
% Notice that this manifold is not complete: if optimization leads Y to be
% rank-deficient, the geometry will break down. Hence, this geometry should
% only be used if it is expected that the points of interest will have rank
% exactly k. Reduce k if that is not the case.
%
% The geometry is based on the following papers (and references therein).
% Please cite the Manopt paper as well as the research papers:
%
% @INPROCEEDINGS{Yatawatta2013A,
%  author={Yatawatta, S.},
%  booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on},
%  title={Radio interferometric calibration using a {R}iemannian manifold},
%  year={2013},
%  month={May},
%  pages={3866--3870},
%  doi={10.1109/ICASSP.2013.6638382},
%  ISSN={1520-6149},
% }
%
% @article{Yatawatta2013B,
%  author = {Yatawatta, S.}, 
%  title = {On the interpolation of calibration solutions obtained in radio interferometry},
%  volume = {428}, 
%  number = {1}, 
%  pages = {828--833}, 
%  year = {2013}, 
%  doi = {10.1093/mnras/sts069}, 
%  journal = {Monthly Notices of the Royal Astronomical Society} 
% }
%
% See also: symfixedrankYYfactory sympositivedefinitefactory


% This file is part of Manopt: www.manopt.org.
% Original author: Sarod Yatawatta, June 29, 2015.
% Contributors: Bamdev Mishra.
% Change log:
%
%   June 28, 2016 (NB):
%       Metric scaled down by factor 2 to match the metric used in
%       euclideancomplexfactory.
    
    M.name = @() sprintf('YY'' quotient manifold of Hermitian %dx%d complex matrices of rank %d.', n, n, k);
    
    M.dim = @() 2*k*n - k*k; % SY: dim of ambient space (2*k*n) - dim of kxk unitary matrix  (k^2).
    
    % Euclidean metric on the total space.
    % BM: equivalent to real(trace(eta'*zeta)), but more efficient.
    M.inner = @(Y, eta, zeta) real(eta(:)'*zeta(:));
    
    M.norm = @(Y, eta) sqrt(M.inner(Y, eta, eta));
    
    % Find unitary U to minimize ||Y - Z*U||,
    % i.e., the Procrustes problem, with svd(Y'*Z).
    M.dist = @(Y, Z) distance;
    function distval = distance(Y, Z)
        [u, ignore, v] = svd(Z'*Y); %#ok<ASGLU>
        E = Y - Z*u*v'; % SY: checked.
        distval = real(E(:)'*E(:));
    end
    
    M.typicaldist = @() 10*k; % BM: To do.
    
    M.proj = @projection;
    function etaproj = projection(Y, eta)
        % Projection onto the horizontal space
        xx = Y'*Y;
        rr = Y'*eta - eta'*Y;
        Omega = lyap(xx, -rr);
        etaproj = eta - Y*Omega;
    end
    
    M.tangent = M.proj;
    M.tangent2ambient = @(Y, eta) eta;
    
    M.retr = @retraction;
    function Ynew = retraction(Y, eta, t)
        if nargin < 3
            t = 1.0;
        end
        Ynew = Y + t*eta;
    end
    
    
    M.egrad2rgrad = @(Y, eta) eta;
    M.ehess2rhess = @(Y, egrad, ehess, U) M.proj(Y, ehess);
    
    
    M.exp = @exponential;
    function Ynew = exponential(Y, eta, t)
        if nargin < 3
            t = 1.0;
        end
        
        Ynew = retraction(Y, eta, t);
        warning('manopt:symfixedrankYYcomplexfactory:exp', ...
            ['Exponential for symmetric fixed-rank complex ' ...
            'manifold not implemented yet. Used retraction instead.']);
    end
    
    % Notice that the hash of two equivalent points will be different...
    M.hash = @(Y) ['z' hashmd5([real(Y(:)); imag(Y(:))])];
    
    M.rand = @random;
    function Y = random()
        Y = randn(n, k) + 1i*randn(n,k);
    end
    
    M.randvec = @randomvec;
    function eta = randomvec(Y)
        eta = randn(n, k) + 1i*randn(n,k);
        eta = projection(Y, eta);
        nrm = M.norm(Y, eta);
        eta = eta / nrm;
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(Y) zeros(n, k);
    
    M.transp = @(Y1, Y2, d) projection(Y2, d);
    
    M.vec = @(Y, u_mat) [real(u_mat(:)); imag(u_mat(:))];
    M.mat = @(Y, u_vec) reshape(u_vec(1 : n*k), [n, k]) + 1i*reshape(u_vec(n*k + 1: end), [n, k]);
    M.vecmatareisometries = @() true; 
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/symfixedrank/symfixedrankYYfactory.m
================================================
function M = symfixedrankYYfactory(n, k)
% Manifold of n-by-n symmetric positive semidefinite matrices of rank k.
%
% function M = symfixedrankYYfactory(n, k)
%
% A point X on the manifold is parameterized as YY^T where Y is a matrix of
% size nxk. As such, X is symmetric, positive semidefinite. We restrict to
% full-rank Y's, such that X has rank exactly k. The point X is numerically
% represented by Y (this is more efficient than working with X, which may
% be big). Tangent vectors are represented as matrices of the same size as
% Y, call them Ydot, so that Xdot = Y Ydot' + Ydot Y. The metric is the
% canonical Euclidean metric on Y.
% 
% Since for any orthogonal Q of size k, it holds that (YQ)(YQ)' = YY',
% we "group" all matrices of the form YQ in an equivalence class. The set
% of equivalence classes is a Riemannian quotient manifold, implemented
% here.
%
% Notice that this manifold is not complete: if optimization leads Y to be
% rank-deficient, the geometry will break down. Hence, this geometry should
% only be used if it is expected that the points of interest will have rank
% exactly k. Reduce k if that is not the case.
% 
% An alternative, complete, geometry for positive semidefinite matrices of
% rank k is described in Bonnabel and Sepulchre 2009, "Riemannian Metric
% and Geometric Mean for Positive Semidefinite Matrices of Fixed Rank",
% SIAM Journal on Matrix Analysis and Applications.
%
%
% The geometry here implemented is the simplest case of the 2010 paper:
% M. Journee, P.-A. Absil, F. Bach and R. Sepulchre,
% "Low-Rank Optimization on the Cone of Positive Semidefinite Matrices".
% Paper link: http://www.di.ens.fr/~fbach/journee2010_sdp.pdf
% 
% 
% Please cite the Manopt paper as well as the research paper:
%     @Article{journee2010low,
%       Title   = {Low-rank optimization on the cone of positive semidefinite matrices},
%       Author  = {Journ{\'e}e, M. and Bach, F. and Absil, P.-A. and Sepulchre, R.},
%       Journal = {SIAM Journal on Optimization},
%       Year    = {2010},
%       Number  = {5},
%       Pages   = {2327--2351},
%       Volume  = {20},
%       Doi     = {10.1137/080731359}
%     }
%
% See also: elliptopefactory spectrahedronfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, Dec. 30, 2012.
% Contributors:
% Change log:
%
%  July 10, 2013 (NB):
%       Added vec, mat, tangent, tangent2ambient ;
%       Correction for the dimension of the manifold.
%
%   April 2, 2015 (NB):
%       Replaced trace(A'*B) by A(:)'*B(:) (equivalent but faster).


	M.name = @() sprintf('YY'' quotient manifold of %dx%d psd matrices of rank %d', n, k);

	M.dim = @() k*n - k*(k-1)/2;

	% Euclidean metric on the total space
	M.inner = @(Y, eta, zeta) eta(:)'*zeta(:);

	M.norm = @(Y, eta) sqrt(M.inner(Y, eta, eta));

	M.dist = @(Y, Z) error('symfixedrankYYfactory.dist not implemented yet.');

	M.typicaldist = @() 10*k;

	M.proj = @projection;
	function etaproj = projection(Y, eta)
		% Projection onto the horizontal space
		YtY = Y'*Y;
		SS = YtY;
		AS = Y'*eta - eta'*Y;
		Omega = lyap(SS, -AS);
		etaproj = eta - Y*Omega;
	end

	M.tangent = M.proj;
	M.tangent2ambient = @(Y, eta) eta;

	M.retr = @retraction;
	function Ynew = retraction(Y, eta, t)
		if nargin < 3
			t = 1.0;
		end
		Ynew = Y + t*eta;
	end


	M.egrad2rgrad = @(Y, eta) eta;
	M.ehess2rhess = @(Y, egrad, ehess, U) M.proj(Y, ehess);

	M.exp = @exponential;
	function Ynew = exponential(Y, eta, t)
		if nargin < 3
			t = 1.0;
		end
		
		Ynew = retraction(Y, eta, t);
		warning('manopt:symfixedrankYYfactory:exp', ...
			['Exponential for symmetric, fixed-rank ' ...
			'manifold not implemented yet. Used retraction instead.']);
	end

	% Notice that the hash of two equivalent points will be different...
	M.hash = @(Y) ['z' hashmd5(Y(:))];

	M.rand = @random;
	function Y = random()
		Y = randn(n, k);
	end

	M.randvec = @randomvec;
	function eta = randomvec(Y)
		eta = randn(n, k);
		eta = projection(Y, eta);
		nrm = M.norm(Y, eta);
		eta = eta / nrm;
	end

	M.lincomb = @matrixlincomb;

	M.zerovec = @(Y) zeros(n, k);

	M.transp = @(Y1, Y2, d) projection(Y2, d);
		
	M.vec = @(Y, u_mat) u_mat(:);
	M.mat = @(Y, u_vec) reshape(u_vec, [n, k]);
	M.vecmatareisometries = @() true;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/manifolds/symfixedrank/sympositivedefinitefactory.m
================================================
function M = sympositivedefinitefactory(n)
% Manifold of n-by-n symmetric positive definite matrices with
% the bi-invariant geometry.
%
% function M = sympositivedefinitefactory(n)
%
% A point X on the manifold is represented as a symmetric positive definite
% matrix X (nxn). Tangent vectors are symmetric matrices of the same size
% (but not necessarily definite).
%
% The Riemannian metric is the bi-invariant metric, described notably in
% Chapter 6 of the 2007 book "Positive definite matrices"
% by Rajendra Bhatia, Princeton University Press.
%
%
% The retraction / exponential map involves expm (the matrix exponential).
% If too large a vector is retracted / exponentiated (e.g., a solver tries
% to make too big a step), this may result in NaN's in the returned point,
% which most likely would lead to NaN's in the cost / gradient / ... and
% will result in failure of the optimization. For trustregions, this can be
% controlled by setting options.Delta0 and options.Delta_bar, to prevent
% too large steps.
%
%
% Note also that many of the functions involve solving linear systems in X
% (a point on the manifold), taking matrix exponentals and logarithms, etc.
% It could therefore be beneficial to do some precomputation on X (an
% eigenvalue decomposition for example) and store both X and the
% preprocessing in a structure. This would require modifying the present
% factory to work with such structures to represent both points and tangent
% vectors. We omit this in favor of simplicity, but it may be good to keep
% this in mind if efficiency becomes an issue in your application.

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, August 29, 2013.
% Contributors: Nicolas Boumal
% Change log:
%
%   March 5, 2014 (NB)
%       There were a number of mistakes in the code owing to the tacit
%       assumption that if X and eta are symmetric, then X\eta is
%       symmetric too, which is not the case. See discussion on the Manopt
%       forum started on Jan. 19, 2014. Functions norm, dist, exp and log
%       were modified accordingly. Furthermore, they only require matrix
%       inversion (as well as matrix log or matrix exp), not matrix square
%       roots or their inverse.
% 
%   July 28, 2014 (NB)
%       The dim() function returned n*(n-1)/2 instead of n*(n+1)/2.
%       Implemented proper parallel transport from Sra and Hosseini (not
%       used by default).
%       Also added symmetrization in exp and log (to be sure).
% 
%   April 3, 2015 (NB):
%       Replaced trace(A*B) by a faster equivalent that does not compute
%       the whole product A*B, for inner product, norm and distance.
%
%   May 23, 2017 (NB):
%       As seen in a talk of Wen Huang at the SIAM Optimization Conference
%       today, replaced the retraction of this factory (which was simply
%       equal to the exponential map) with a simpler, second-order
%       retraction. That this retraction is second order can be verified
%       numerically with checkretraction(sympositivedefinitefactory(5));
%       Notice that, for this retraction, it would be cheap to evaluate for
%       many values of t, that is, it is cheap to retract many points along
%       the same tangent direction. This could in principle be exploited to
%       speed up line-searches.
    
    symm = @(X) .5*(X+X');
    
    M.name = @() sprintf('Symmetric positive definite geometry of %dx%d matrices', n, n);
    
    M.dim = @() n*(n+1)/2;
    
	% Helpers to avoid computing full matrices simply to extract their trace
	vec     = @(A) A(:);
	trinner = @(A, B) vec(A')'*vec(B);  % = trace(A*B)
	trnorm  = @(A) sqrt(trinner(A, A)); % = sqrt(trace(A^2))
	
    % Choice of the metric on the orthonormal space is motivated by the
    % symmetry present in the space. The metric on the positive definite
    % cone is its natural bi-invariant metric.
	% The result is equal to: trace( (X\eta) * (X\zeta) )
    M.inner = @(X, eta, zeta) trinner(X\eta, X\zeta);
    
    % Notice that X\eta is *not* symmetric in general.
	% The result is equal to: sqrt(trace((X\eta)^2))
    % There should be no need to take the real part, but rounding errors
    % may cause a small imaginary part to appear, so we discard it.
    M.norm = @(X, eta) real(trnorm(X\eta));
    
    % Same here: X\Y is not symmetric in general.
    % Same remark about taking the real part.
    M.dist = @(X, Y) real(trnorm(real(logm(X\Y))));
    
    
    M.typicaldist = @() sqrt(n*(n+1)/2);
    
    
    M.egrad2rgrad = @egrad2rgrad;
    function eta = egrad2rgrad(X, eta)
        eta = X*symm(eta)*X;
    end
    
    
    M.ehess2rhess = @ehess2rhess;
    function Hess = ehess2rhess(X, egrad, ehess, eta)
        % Directional derivatives of the Riemannian gradient
        Hess = X*symm(ehess)*X + 2*symm(eta*symm(egrad)*X);
        
        % Correction factor for the non-constant metric
        Hess = Hess - symm(eta*symm(egrad)*X);
    end
    
    
    M.proj = @(X, eta) symm(eta);
    
    M.tangent = M.proj;
    M.tangent2ambient = @(X, eta) eta;
    
    M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            teta = eta;
        else
            teta = t*eta;
        end
        % The symm() call is mathematically unnecessary but numerically
        % necessary.
        Y = symm(X + teta + .5*teta*(X\teta));
    end
    
    M.exp = @exponential;
    function Y = exponential(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        % The symm() and real() calls are mathematically not necessary but
        % are numerically necessary.
        Y = symm(X*real(expm(X\(t*eta))));
    end
    
    M.log = @logarithm;
    function H = logarithm(X, Y)
        % Same remark regarding the calls to symm() and real().
        H = symm(X*real(logm(X\Y)));
    end
    
    M.hash = @(X) ['z' hashmd5(X(:))];
    
    % Generate a random symmetric positive definite matrix following a
    % certain distribution. The particular choice of a distribution is of
    % course arbitrary, and specific applications might require different
    % ones.
    M.rand = @random;
    function X = random()
        D = diag(1+rand(n, 1));
        [Q, R] = qr(randn(n)); %#ok<NASGU>
        X = Q*D*Q';
    end
    
    % Generate a uniformly random unit-norm tangent vector at X.
    M.randvec = @randomvec;
    function eta = randomvec(X)
        eta = symm(randn(n));
        nrm = M.norm(X, eta);
        eta = eta / nrm;
    end
    
    M.lincomb = @matrixlincomb;
    
    M.zerovec = @(X) zeros(n);
    
    % Poor man's vector transport: exploit the fact that all tangent spaces
    % are the set of symmetric matrices, so that the identity is a sort of
    % vector transport. It may perform poorly if the origin and target (X1
    % and X2) are far apart though. This should not be the case for typical
    % optimization algorithms, which perform small steps.
    M.transp = @(X1, X2, eta) eta;
    
    % For reference, a proper vector transport is given here, following
    % work by Sra and Hosseini: "Conic geometric optimisation on the
    % manifold of positive definite matrices", to appear in SIAM J. Optim.
    % in 2015; also available here: http://arxiv.org/abs/1312.1039
    % This will not be used by default. To force the use of this transport,
    % execute "M.transp = M.paralleltransp;" on your M returned by the
    % present factory.
    M.paralleltransp = @parallel_transport;
    function zeta = parallel_transport(X, Y, eta)
        E = sqrtm((Y/X));
        zeta = E*eta*E';
    end
    
    % vec and mat are not isometries, because of the unusual inner metric.
    M.vec = @(X, U) U(:);
    M.mat = @(X, u) reshape(u, n, n);
    M.vecmatareisometries = @() false;
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/readme
================================================
test


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/barzilaiborwein/barzilaiborwein.m
================================================
function [x, cost, info, options] = barzilaiborwein(problem, x, options)
% Riemannian Barzilai-Borwein solver with non-monotone line-search.
%
% function [x, cost, info, options] = barzilaiborwein(problem)
% function [x, cost, info, options] = barzilaiborwein(problem, x0)
% function [x, cost, info, options] = barzilaiborwein(problem, x0, options)
% function [x, cost, info, options] = barzilaiborwein(problem, [], options)
%
% Apply the Barzilai-Borwein minimization algorithm to the problem defined
% in the problem structure, starting at x0 if it is provided (otherwise, at
% a random point on the manifold). To specify options whilst not specifying
% an initial guess, give x0 as [] (the empty matrix).
%
% The algorithm uses its own special non-monotone line-search strategy.
% Therefore, no lin-search algorithm should be specified in the problem
% structure or in the options structure.
%
% In most of the examples bundled with the toolbox (see link below), the
% solver can be replaced by the present one if need be.
%
% The outputs x and cost are the last reached point on the manifold and its
% cost. This is not necessarily the best point generated since the method
% is not monotone. The struct-array info contains information about the
% iterations:
%   iter : the iteration number (0 for the initial guess)
%   cost : cost value
%   time : elapsed time in seconds
%   gradnorm : Riemannian norm of the gradient
%   stepsize : norm of the last tangent vector retracted
%   linesearch : information logged by the line-search algorithm
%   And possibly additional information logged by options.statsfun.
% For example, type [info.gradnorm] to obtain a vector of the successive
% gradient norms reached.
%
% The options structure is used to overwrite the default values. All
% options have a default value and are hence optional. To force an option
% value, pass an options structure with a field options.optionname, where
% optionname is one of the following and the default value is indicated
% between parentheses:
%
%   tolgradnorm (1e-6)
%       The algorithm terminates if the norm of the gradient drops below this.
%   maxiter (1000)
%       The algorithm terminates if maxiter iterations have been executed.
%   maxtime (Inf)
%       The algorithm terminates if maxtime seconds elapsed.
%   minstepsize (1e-10)
%       The algorithm terminates if the linesearch returns a displacement
%       vector (to be retracted) smaller in norm than this value.
%   linesearch (@linesearch_hint)
%       This option should not be changed, as the present solver has its
%       own dedicated line-search strategy.
%   strategy ('direct')
%       The strategy used for the Barzilai-Borwein stepsize
%       'direct', compute the direct step <s_k,s_k>/<s_k,y_k>
%       'inverse', compute the inverse step <s_k,y_k>/<y_k,y_k>
%       'alternate', alternates between direct and inverse step
%   lambdamax (1e3)
%       The maximum stepsize allowed by the Barzilai-Borwein method
%   lambdamin (1e-3)
%       The minimum stepsize allowed by the Barzilai-Borwein method
%   lambda0 (1/10)
%       The initial stepsize of the Barzilai-Borwein method
%   ls_nmsteps (10)
%       The non-monotone line-search checks a sufficient decrease with respect
%       to the previous ls_nmsteps objective function values.
%   statsfun (none)
%       Function handle to a function that will be called after each
%       iteration to provide the opportunity to log additional statistics.
%       They will be returned in the info struct. See the generic Manopt
%       documentation about solvers for further information.
%   stopfun (none)
%       Function handle to a function that will be called at each iteration
%       to provide the opportunity to specify additional stopping criteria.
%       See the generic Manopt documentation about solvers for further
%       information.
%   verbosity (3)
%       Integer number used to tune the amount of output the algorithm
%       generates during execution (mostly as text in the command window).
%       The higher, the more output. 0 means silent.
%   storedepth (2)
%       Maximum number of different points x of the manifold for which a
%       store structure will be kept in memory in the storedb. If the
%       caching features of Manopt are not used, this is irrelevant. For
%       this algorithm, a store depth of 2 should always be sufficient.
%   
%
% The implementation of the Barzilai-Borwein method is based on the paper:
%
% B. Iannazzo, M. Porcelli, "The Riemannian Barzilai-Borwein method with 
% nonmonotone line-search and the matrix geometric mean computation",
% IMA Journal of Numerical Analysis, to appear, https://doi.org/10.1093/imanum/drx015.
%
% See also: steepestdescent conjugategradient trustregions

% This file is part of Manopt: www.manopt.org.
% Original author: Margherita Porcelli, May 31, 2017
% Contributors: Nicolas Boumal, Bruno Iannazzo
% Change log: 

    
    % Verify that the problem description is sufficient for the solver.
    if ~canGetCost(problem)
        warning('manopt:getCost', ...
                'No cost provided. The algorithm will likely abort.');  
    end
    if ~canGetGradient(problem) && ~canGetApproxGradient(problem)
        % Note: we do not give a warning if an approximate gradient is
        % explicitly given in the problem description, as in that case the
        % user seems to be aware of the issue.
        warning('manopt:getGradient:approx', ...
               ['No gradient provided. Using an FD approximation instead (slow).\n' ...
                'It may be necessary to increase options.tolgradnorm.\n' ...
                'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']);
        problem.approxgrad = approxgradientFD(problem);
    end

    % Ensure options exists as a structure
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    
    % Set local defaults here
    localdefaults.minstepsize = 1e-10;
    localdefaults.maxiter = 1000;
    localdefaults.tolgradnorm = 1e-6;

    % Upper and lower bound for the Barzilai-Borwein stepsize
    localdefaults.lambdamax = 1e3;
    localdefaults.lambdamin = 1e-3;
    % Initial Barzilai-Borwein stepsize
    localdefaults.lambda0 = 1/10;

    % Barzilai-Borwein strategy (direct, inverse or alternate)
    localdefaults.strategy = 'direct';

    % Line-search parameters
    % 1) Make sure the user didn't try to define a line search
    if canGetLinesearch(problem) || isfield(options, 'linesearch')
        error('manopt:BB:ls', ...
              ['The problem structure may not specify a line-search ' ...
               'hint for the BB solver,\nand the options structure ' ...
               'may not specify a line-search algorithm for BB.']);
    end
    % 2) Define the line-search parameters
    problem.linesearch = @(x, d, storedb, key) 1;
    options.linesearch = @linesearch_hint;
    % The Armijo sufficient decrease parameter
    localdefaults.ls_suff_decr = 1e-4;
    % The previous steps checked in the non-monotone line-search strategy
    localdefaults.ls_nmsteps = 10;
    
    
    % Merge global and local defaults, then merge w/ user options, if any.
    localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
    options = mergeOptions(localdefaults, options); 

    
    % Shorthands for some parameters
    strategy = options.strategy;
    lambdamax = options.lambdamax;
    lambdamin = options.lambdamin;
    lambda0 = options.lambda0;
    
    timetic = tic();
    
    
    % If no initial point x is given by the user, generate one at random.
    if ~exist('x', 'var') || isempty(x)
        x = problem.M.rand();
    end

    % Create a store database and get a key for the current x
    storedb = StoreDB(options.storedepth);
    key = storedb.getNewKey();
    
    % Compute objective-related quantities for x
    [cost, grad] = getCostGrad(problem, x, storedb, key);
    gradnorm = problem.M.norm(x, grad);

    % Some variables below need to store information about iterations. We
    % preallocate for a reasonable amount of intended iterations to avoid
    % memory re-allocations.
    mem_init_size = min(10000, options.maxiter+1);
    
    % Store the cost value
    f = zeros(mem_init_size, 1);
    f(1) = cost;
    
    % Iteration counter (at any point, iter is the number of fully executed
    % iterations so far)
    iter = 0;
    
    % Save stats in a struct array info, and preallocate.
    stats = savestats();
    info(1) = stats;
    info(mem_init_size).iter = [];
    
    if options.verbosity >= 2
        fprintf(' iter\t                cost val\t     grad. norm\n');
    end

    % Set the initial Barzilai-Borwein stepsize
    lambda = lambda0;

    % Start iterating until stopping criterion triggers
    while true

        % Display iteration information
        if options.verbosity >= 2
            fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm);
        end
        
        % Start timing this iteration
        timetic = tic();
        
        % Run standard stopping criterion checks
        [stop, reason] = stoppingcriterion(problem, x, options, ...
                                                             info, iter+1);
        
        % If none triggered, run specific stopping criterion check
        if ~stop && stats.stepsize < options.minstepsize
            stop = true;
            reason = sprintf(['Last stepsize smaller than minimum '  ...
                              'allowed; options.minstepsize = %g.'], ...
                              options.minstepsize);
        end
    
        if stop
            if options.verbosity >= 1
                fprintf([reason '\n']);
            end
            break;
        end

        % Pick the descent direction as minus the gradient (scaled)
        desc_dir = problem.M.lincomb(x, -lambda, grad);

        % Execute the nonmonotone line search
        k = iter + 1; 
        start = max(1, k - options.ls_nmsteps + 1);
        
        [stepsize, newx, newkey, lsstats] = ...
            options.linesearch(problem, x, desc_dir, max(f(start:k)), ...
                            -lambda * gradnorm^2, options, storedb, key);

        % Updates the value of lambda
        lambda = lambda * lsstats.alpha;

        % Compute the new cost-related quantities for newx
        [newcost, newgrad] = getCostGrad(problem, newx, storedb, newkey);
        newgradnorm = problem.M.norm(newx, newgrad);

        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % BARZILAI-BORWEIN STRATEGY

        % Store the cost value
        f(iter+2) = newcost;
       
        % Transport the old gradient to newx
        grad_transp = problem.M.transp(x, newx, grad);

        % Compute the difference between grandients 
        Y = problem.M.lincomb(newx, 1, newgrad, -1, grad_transp);

        % Compute the transported step
        Stransp =  problem.M.lincomb(x, -lambda, grad_transp); 

        % Compute the new Barzilai-Borwein step following the strategy
        % direct strategy
        if strcmp(strategy, 'direct')
          num = problem.M.norm(newx, Stransp)^2; 
          den = problem.M.inner(newx, Stransp, Y);
          if den > 0
            lambda = min( lambdamax, max(lambdamin, num/den) );
          else
            lambda = lambdamax;
          end
        end

        % inverse strategy
        if strcmp(strategy, 'inverse')
          num = problem.M.inner(newx, Stransp, Y);
          den = problem.M.norm(newx, Y)^2;

          if num > 0  
            lambda = min( lambdamax, max(lambdamin, num/den) );
          else
            lambda = lambdamax;
          end
        end

        % alternate strategy
        if strcmp(strategy, 'alternate')
          num = problem.M.norm(newx, Stransp)^2; 
          den = problem.M.inner(newx, Stransp, Y);
          den2 = problem.M.norm(newx, Y)^2;
          if (den > 0)  
            if mod(iter,2)==0
            	lambda = min( lambdamax, max(lambdamin, num/den) );
	    else
                lambda = min( lambdamax, max(lambdamin, den/den2) );
            end
          else
            lambda = lambdamax;
          end
        end
        
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

        % Make sure we don't use too much memory for the store database
        storedb.purge();
        
        % Update iterate info
        x = newx;
        key = newkey;
        cost = newcost;
        grad = newgrad;
        gradnorm = newgradnorm;

        % iter is the number of iterations we have accomplished.
        iter = iter + 1;
        
        % Log statistics for freshly executed iteration
        stats = savestats();
        info(iter+1) = stats;
        
    end
    
    info = info(1:iter+1);

    if options.verbosity >= 1
        fprintf('Total time is %f [s] (excludes statsfun)\n', ...
                info(end).time);
    end
    
    
    % Routine in charge of collecting the current iteration stats
    function stats = savestats()
        stats.iter = iter;
        stats.cost = cost;
        stats.gradnorm = gradnorm;
        if iter == 0
            stats.stepsize = NaN;
            stats.time = toc(timetic);
            stats.linesearch = [];
        else
            stats.stepsize = stepsize;
            stats.time = info(iter).time + toc(timetic);
            stats.linesearch = lsstats;
        end
        stats = applyStatsfun(problem, x, storedb, key, options, stats);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/bfgs/rlbfgs.m
================================================
function [x, cost, info, options] = rlbfgs(problem, x0, options)
% Riemannian limited memory BFGS solver for smooth objective functions.
%
% function [x, cost, info, options] = rlbfgs(problem)
% function [x, cost, info, options] = rlbfgs(problem, x0)
% function [x, cost, info, options] = rlbfgs(problem, x0, options)
% function [x, cost, info, options] = rlbfgs(problem, [], options)
%
%
% This is a Riemannian limited memory BFGS solver (quasi-Newton method), 
% which aims to minimize the cost function in the given problem structure.
% It requires access to the gradient of the cost function.
%
% Parameter options.memory can be used to specify the number of iterations
% the algorithm remembers and uses to approximate the inverse Hessian of
% the cost. Default value is 30.
% For unlimited memory, set options.memory = Inf.
%
%
% For a description of the algorithm and theorems offering convergence
% guarantees, see the references below.
%
% The initial iterate is x0 if it is provided. Otherwise, a random point on
% the manifold is picked. To specify options whilst not specifying an
% initial iterate, give x0 as [] (the empty matrix).
%
% The two outputs 'x' and 'cost' are the last reached point on the manifold
% and its cost. 
% 
% The output 'info' is a struct-array which contains information about the
% iterations:
%   iter (integer)
%       The iteration number. The initial guess is 0.
%	cost (double)
%       The corresponding cost value.
%	gradnorm (double)
%       The (Riemannian) norm of the gradient.
%	time (double)
%       The total elapsed time in seconds to reach the corresponding cost.
%	stepsize (double)
%       The size of the step from the previous to the new iterate.
%   accepted (Boolean)
%       true if step is accepted in the cautious update. 0 otherwise.
%   And possibly additional information logged by options.statsfun.
% For example, type [info.gradnorm] to obtain a vector of the successive
% gradient norms reached at each iteration.
%
% The options structure is used to overwrite the default values. All
% options have a default value and are hence optional. To force an option
% value, pass an options structure with a field options.optionname, where
% optionname is one of the following and the default value is indicated
% between parentheses:
%
%   tolgradnorm (1e-6)
%       The algorithm terminates if the norm of the gradient drops below
%       this. For well-scaled problems, a rule of thumb is that you can
%       expect to reduce the gradient norm by 8 orders of magnitude
%       (sqrt(eps)) compared to the gradient norm at a "typical" point (a
%       rough initial iterate for example). Further decrease is sometimes
%       possible, but inexact floating point arithmetic will eventually
%       limit the final accuracy. If tolgradnorm is set too low, the
%       algorithm may end up iterating forever (or at least until another
%       stopping criterion triggers).
%   maxiter (1000)
%       The algorithm terminates if maxiter iterations were executed.
%   maxtime (Inf)
%       The algorithm terminates if maxtime seconds elapsed.
%   minstepsize (1e-10)
%     The minimum norm of the tangent vector that points from the current
%     point to the next point. If the norm is less than minstepsize, the 
%     program will terminate.
%   memory (30)
%     The number of previous iterations the program remembers. This is used 
%     to approximate the inverse Hessian at the current point. Because of
%     difficulty of maintaining a representation of operators in terms of
%     coordinates, a recursive method is used. The number of steps in the
%     recursion is at most options.memory. This parameter can take any
%     integer value >= 0, or Inf, which is taken to be options.maxiter. If
%     options.maxiter has value Inf, then it will take value 10000 and a
%     warning will be displayed.
%   linesearch (@linesearch_hint)
%       Function handle to a line search function. The options structure is
%       passed to the line search too, so you can pass it parameters. See
%       each line search's documentation for info.
%       By default, the intial multiplier tried is alpha = 1. This can be
%       changed with options.linesearch: see help of linesearch_hint.
%   strict_inc_func (@(t) t)
%     The Cautious step needs a real function that has value 0 at t = 0,
%     and  is strictly increasing. See details in Wen Huang's paper
%     "A Riemannian BFGS Method without Differentiated Retraction for 
%     Nonconvex Optimization Problems"
%   statsfun (none)
%       Function handle to a function that will be called after each
%       iteration to provide the opportunity to log additional statistics.
%       They will be returned in the info struct. See the generic Manopt
%       documentation about solvers for further information. statsfun is
%       called with the point x that was reached last.
%   stopfun (none)
%       Function handle to a function that will be called at each iteration
%       to provide the opportunity to specify additional stopping criteria.
%       See the generic Manopt documentation about solvers for further
%       information.
%   verbosity (2)
%       Integer number used to tune the amount of output the algorithm
%       generates during execution (mostly as text in the command window).
%       The higher, the more output. 0 means silent. 3 and above includes a
%       display of the options structure at the beginning of the execution.
%   debug (false)
%       Set to true to allow the algorithm to perform additional
%       computations for debugging purposes. If a debugging test fails, you
%       will be informed of it, usually via the command window. Be aware
%       that these additional computations appear in the algorithm timings
%       too, and may interfere with operations such as counting the number
%       of cost evaluations, etc. (the debug calls get storedb too).
%   storedepth (30)
%       Maximum number of different points x of the manifold for which a
%       store structure will be kept in memory in the storedb. If the
%       caching features of Manopt are not used, this is irrelevant. If
%       memory usage is an issue, you may try to lower this number.
%       Profiling may then help to investigate if a performance hit was
%       incurred as a result.
%
%
% Please cite the Manopt paper as well as the research paper:
% @InBook{Huang2016,
%   title     = {A {R}iemannian {BFGS} Method for Nonconvex Optimization Problems},
%   author    = {Huang, W. and Absil, P.-A. and Gallivan, K.A.},
%   year      = {2016},
%   publisher = {Springer International Publishing},
%   editor    = {Karas{\"o}zen, B{\"u}lent and Manguo{\u{g}}lu, Murat and Tezer-Sezgin, M{\"u}nevver and G{\"o}ktepe, Serdar and U{\u{g}}ur, {\"O}m{\"u}r},
%   address   = {Cham},
%   booktitle = {Numerical Mathematics and Advanced Applications ENUMATH 2015},
%   pages     = {627--634},
%   doi       = {10.1007/978-3-319-39929-4_60}
% }
%


% This file is part of Manopt: www.manopt.org.
% Original author: Changshuo Liu, July 19, 2017.
% Contributors: Nicolas Boumal
% Change log: 


    % Verify that the problem description is sufficient for the solver.
    if ~canGetCost(problem)
        warning('manopt:getCost', ...
            'No cost provided. The algorithm will likely abort.');
    end
    if ~canGetGradient(problem) && ~canGetApproxGradient(problem)
        % Note: we do not give a warning if an approximate gradient is
        % explicitly given in the problem description, as in that case the user
        % seems to be aware of the issue.
        warning('manopt:getGradient:approx', ...
           ['No gradient provided. Using an FD approximation instead (slow).\n' ...
            'It may be necessary to increase options.tolgradnorm.\n' ...
            'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']);
        problem.approxgrad = approxgradientFD(problem);
    end
    
    % Local defaults for the program
    localdefaults.minstepsize = 1e-10;
    localdefaults.maxiter = 1000;
    localdefaults.tolgradnorm = 1e-6;
    localdefaults.memory = 30;
    localdefaults.strict_inc_func = @(t) t;
    localdefaults.ls_max_steps  = 25;
    localdefaults.storedepth = 30;
    localdefaults.linesearch = @linesearch_hint;
    
    % Merge global and local defaults, then merge w/ user options, if any.
    localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    % To make sure memory in range [0, Inf)
    options.memory = max(options.memory, 0);
    if options.memory == Inf
        if isinf(options.maxiter)
            options.memory = 10000;
            warning('rlbfgs:memory', ['options.memory and options.maxiter' ...
              ' are both Inf; options.memory has been changed to 10000.']);
        else
            options.memory = options.maxiter;
        end
    end
    
    M = problem.M;
    
    % Create a random starting point if no starting point is provided.
    if ~exist('x0', 'var')|| isempty(x0)
        xCur = M.rand(); 
    else
        xCur = x0;
    end
    
    timetic = tic();
    
    % Create a store database and get a key for the current x
    storedb = StoreDB(options.storedepth);
    key = storedb.getNewKey();
    
    % __________Initialization of variables______________
    % Number of iterations since the last restart
    k = 0;  
    % Total number of BFGS iterations
    iter = 0; 
    
    % This cell stores step vectors which point from x_{t} to x_{t+1} for t
    % indexing the last iterations, capped at options.memory.
    % That is, it stores up to options.memory of the most recent step
    % vectors. However, the implementation below does not need step vectors 
    % in their respective tangent spaces at x_{t}'s. Rather, it requires
    % them transported to the current point's tangent space by vector
    % tranport. For details regarding the requirements on the the vector
    % tranport, see the reference paper by Huang et al.
    % In this implementation, those step vectors are iteratively 
    % transported to the current point's tangent space after every
    % iteration. Thus, at every iteration, vectors in sHistory are in the
    % current point's tangent space.
    sHistory = cell(1, options.memory);
    
    % This cell stores the differences for latest t's of the gradient at
    % x_{t+1} and the gradient at x_{t}, transported to x_{t+1}'s tangent
    % space. The memory is also capped at options.memory.
    yHistory = cell(1, options.memory);
    
    % rhoHistory{t} stores the reciprocal of the inner product between
    % sHistory{t} and yHistory{t}.
    rhoHistory = cell(1, options.memory);
    
    % Scaling of direction given by getDirection for acceptable step
    alpha = 1; 
    
    % Scaling of initial matrix, Barzilai-Borwein.
    scaleFactor = 1;
    
    % Norm of the step
    stepsize = 1;
    
    % Stores whether the step is accepted by the cautious update check.
    accepted = true;
    
    % Query the cost function and its gradient
    [xCurCost, xCurGradient] = getCostGrad(problem, xCur, storedb, key);
    
    xCurGradNorm = M.norm(xCur, xCurGradient);
    
    % Line-search statistics for recording in info.
    lsstats = [];
    
    % Flag to control restarting scheme to avoid infinite loops (see below)
    ultimatum = false;
    
    % Save stats in a struct array info, and preallocate.
    stats = savestats();
    info(1) = stats;
    info(min(10000, options.maxiter+1)).iter = [];
    
    if options.verbosity >= 2
        fprintf(' iter                   cost val            grad. norm           alpha\n');
    end
    
    % Main iteration
    while true

        % Display iteration information
        if options.verbosity >= 2
        fprintf('%5d    %+.16e        %.8e      %.4e\n', ...
                iter, xCurCost, xCurGradNorm, alpha);
        end
        
        % Start timing this iteration
        timetic = tic();
        
        % Run standard stopping criterion checks
        [stop, reason] = stoppingcriterion(problem, xCur, options, ...
                                           info, iter+1);
        
        % If none triggered, run specific stopping criterion check
        if ~stop 
            if stats.stepsize < options.minstepsize
                % To avoid infinite loop and to push the search further
                % in case BFGS approximation of Hessian is off towards
                % the end, we erase the memory by setting k = 0;
                % In this way, it starts off like a steepest descent.
                % If even steepest descent does not work, then it is 
                % hopeless and we will terminate.
                if ~ultimatum
                    if options.verbosity >= 2
                        fprintf(['stepsize is too small, restarting ' ...
                            'the bfgs procedure at the current point.\n']);
                    end
                    k = 0;
                    ultimatum = true;
                else
                    stop = true;
                    reason = sprintf(['Last stepsize smaller than '  ...
                        'minimum allowed; options.minstepsize = %g.'], ...
                        options.minstepsize);
                end
            else
                % We are not in trouble: lift the ultimatum if it was on.
                ultimatum = false;
            end
        end  
        
        if stop
            if options.verbosity >= 1
                fprintf([reason '\n']);
            end
            break;
        end

        
        % Compute BFGS direction
        p = getDirection(M, xCur, xCurGradient, sHistory,...
                yHistory, rhoHistory, scaleFactor, min(k, options.memory));

        % Execute line-search
        [stepsize, xNext, newkey, lsstats] = ...
            linesearch_hint(problem, xCur, p, xCurCost, ...
                            M.inner(xCur, xCurGradient, p), ...
                            options, storedb, key);
        
        % Record the BFGS step-multiplier alpha which as effectively
        % selected. Toward convergence, we hope to see alpha = 1.
        alpha = stepsize/M.norm(xCur, p);
        step = M.lincomb(xCur, alpha, p);
        
        
        % Query cost and gradient at the candidate new point.
        [xNextCost, xNextGrad] = getCostGrad(problem, xNext, storedb, newkey);
        
        % Compute sk and yk
        sk = M.transp(xCur, xNext, step);
        yk = M.lincomb(xNext, 1, xNextGrad, ...
                             -1, M.transp(xCur, xNext, xCurGradient));

        % Computation of the BFGS step is invariant under scaling of sk and
        % yk by a common factor. For numerical reasons, we scale sk and yk
        % so that sk is a unit norm vector.
        norm_sk = M.norm(xNext, sk);
        sk = M.lincomb(xNext, 1/norm_sk, sk);
        yk = M.lincomb(xNext, 1/norm_sk, yk);
        
        inner_sk_yk = M.inner(xNext, sk, yk);
        inner_sk_sk = M.norm(xNext, sk)^2;    % ensures nonnegativity
        
        
        % If the cautious step is accepted (which is the intended
        % behavior), we record sk, yk and rhok and need to do some
        % housekeeping. If the cautious step is rejected, these are not
        % recorded. In all cases, xNext is the next iterate: the notion of
        % accept/reject here is limited to whether or not we keep track of
        % sk, yk, rhok to update the BFGS operator.
        cap = options.strict_inc_func(xCurGradNorm);
        if inner_sk_sk ~= 0 && (inner_sk_yk / inner_sk_sk) >= cap
            
            accepted = true;
            
            rhok = 1/inner_sk_yk;
            
            scaleFactor = inner_sk_yk / M.norm(xNext, yk)^2;
            
            % Time to store the vectors sk, yk and the scalar rhok.
            % Remember: we need to transport all vectors to the most
            % current tangent space.
            
            % If we are out of memory
            if k >= options.memory
                
                % sk and yk are saved from 1 to the end with the most 
                % current recorded to the rightmost hand side of the cells
                % that are occupied. When memory is full, do a shift so
                % that the rightmost is earliest and replace it with the
                % most recent sk, yk.
                for  i = 2 : options.memory
                    sHistory{i} = M.transp(xCur, xNext, sHistory{i});
                    yHistory{i} = M.transp(xCur, xNext, yHistory{i});
                end
                if options.memory > 1
                    sHistory = sHistory([2:end, 1]);
                    yHistory = yHistory([2:end, 1]);
                    rhoHistory = rhoHistory([2:end 1]);
                end
                if options.memory > 0
                    sHistory{options.memory} = sk;
                    yHistory{options.memory} = yk;
                    rhoHistory{options.memory} = rhok;
                end
                
            % If we are not out of memory
            else
                
                for  i = 1:k
                    sHistory{i} = M.transp(xCur, xNext, sHistory{i});
                    yHistory{i} = M.transp(xCur, xNext, yHistory{i});
                end
                sHistory{k+1} = sk;
                yHistory{k+1} = yk;
                rhoHistory{k+1} = rhok;
                
            end
            
            k = k + 1;
            
        % The cautious step is rejected: we do not store sk, yk, rhok but
        % we still need to transport stored vectors to the new tangent
        % space.
        else
            
            accepted = false;
            
            for  i = 1 : min(k, options.memory)
                sHistory{i} = M.transp(xCur, xNext, sHistory{i});
                yHistory{i} = M.transp(xCur, xNext, yHistory{i});
            end
            
        end
        
        % Update variables to new iterate
        iter = iter + 1;
        xCur = xNext;
        key = newkey;
        xCurGradient = xNextGrad;
        xCurGradNorm = M.norm(xNext, xNextGrad);
        xCurCost = xNextCost;
        
        
        % Make sure we don't use too much memory for the store database
        % (this is independent from the BFGS memory.)
        storedb.purge();
        
        
        % Log statistics for freshly executed iteration
        stats = savestats();
        info(iter+1) = stats; 
        
    end

    
    % Housekeeping before we return
    info = info(1:iter+1);
    x = xCur;
    cost = xCurCost;

    if options.verbosity >= 1
        fprintf('Total time is %f [s] (excludes statsfun)\n', ...
                info(end).time);
    end

    
    % Routine in charge of collecting the current iteration stats
    function stats = savestats()
        stats.iter = iter;
        stats.cost = xCurCost;
        stats.gradnorm = xCurGradNorm;
        if iter == 0
            stats.stepsize = NaN;
            stats.time = toc(timetic);
            stats.accepted = NaN;
        else
            stats.stepsize = stepsize;
            stats.time = info(iter).time + toc(timetic);
            stats.accepted = accepted;
        end
        stats.linesearch = lsstats;
        stats = applyStatsfun(problem, xCur, storedb, key, options, stats);
    end

end


% BFGS step, see Wen's paper for details. This functon takes in a tangent
% vector g, and applies an approximate inverse Hessian P to it to get Pg.
% Then, -Pg is returned.
%
% Theory requires the vector transport to be isometric and to satisfy the
% locking condition (see paper), but these properties do not seem to be
% crucial in practice. If your manifold provides M.isotransp, it may be
% good to do M.transp = M.isotransp; after loading M with a factory.
%
% This implementation operates in the tangent space of the most recent
% point since all vectors in sHistory and yHistory have been transported
% there.
function dir = getDirection(M, xCur, xCurGradient, sHistory, yHistory, ...
                            rhoHistory, scaleFactor, k)
    
    q = xCurGradient;
    
    inner_s_q = zeros(1, k);
    
    for i = k : -1 : 1
        inner_s_q(1, i) = rhoHistory{i} * M.inner(xCur, sHistory{i}, q);
        q = M.lincomb(xCur, 1, q, -inner_s_q(1, i), yHistory{i});
    end
    
    r = M.lincomb(xCur, scaleFactor, q);
    
    for i = 1 : k
         omega = rhoHistory{i} * M.inner(xCur, yHistory{i}, r);
         r = M.lincomb(xCur, 1, r, inner_s_q(1, i)-omega, sHistory{i});
    end
    
    dir = M.lincomb(xCur, -1, r);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/conjugategradient/conjugategradient.m
================================================
function [x, cost, info, options] = conjugategradient(problem, x, options)
% Conjugate gradient minimization algorithm for Manopt.
%
% function [x, cost, info, options] = conjugategradient(problem)
% function [x, cost, info, options] = conjugategradient(problem, x0)
% function [x, cost, info, options] = conjugategradient(problem, x0, options)
% function [x, cost, info, options] = conjugategradient(problem, [], options)
%
% Apply the conjugate gradient minimization algorithm to the problem
% defined in the problem structure, starting at x0 if it is provided
% (otherwise, at a random point on the manifold). To specify options whilst
% not specifying an initial guess, give x0 as [] (the empty matrix).
%
% The outputs x and cost are the best reached point on the manifold and its
% cost. The struct-array info contains information about the iterations:
%   iter : the iteration number (0 for the initial guess)
%   cost : cost value
%   time : elapsed time in seconds
%   gradnorm : Riemannian norm of the gradient
%   stepsize : norm of the last tangent vector retracted
%   beta : value of the beta parameter (see options.beta_type)
%   linesearch : information logged by options.linesearch
%   And possibly additional information logged by options.statsfun.
% For example, type [info.gradnorm] to obtain a vector of the successive
% gradient norms reached.
%
% The options structure is used to overwrite the default values. All
% options have a default value and are hence optional. To force an option
% value, pass an options structure with a field options.optionname, where
% optionname is one of the following and the default value is indicated
% between parentheses:
%
%   tolgradnorm (1e-6)
%       The algorithm terminates if the norm of the gradient drops below this.
%   maxiter (1000)
%       The algorithm terminates if maxiter iterations have been executed.
%   maxtime (Inf)
%       The algorithm terminates if maxtime seconds elapsed.
%   minstepsize (1e-10)
%       The algorithm terminates if the linesearch returns a displacement
%       vector (to be retracted) smaller in norm than this value.
%   beta_type ('H-S')
%       Conjugate gradient beta rule used to construct the new search
%       direction, based on a linear combination of the previous search
%       direction and the new (preconditioned) gradient. Possible values
%       for this parameter are:
%           'S-D', 'steep' for beta = 0 (preconditioned steepest descent)
%           'F-R' for Fletcher-Reeves's rule
%           'P-R' for Polak-Ribiere's modified rule
%           'H-S' for Hestenes-Stiefel's modified rule
%           'H-Z' for Hager-Zhang's modified rule
%       See Hager and Zhang 2006, "A survey of nonlinear conjugate gradient
%       methods" for a description of these rules in the Euclidean case and
%       for an explanation of how to adapt them to the preconditioned case.
%       The adaption to the Riemannian case is straightforward: see in code
%       for details. Modified rules take the max between 0 and the computed
%       beta value, which provides automatic restart, except for H-Z which
%       uses a different modification.
%   orth_value (Inf)
%       Following Powell's restart strategy (Math. prog. 1977), restart CG
%       (that is, make a -preconditioned- gradient step) if two successive
%       -preconditioned- gradients are "too" parallel. See for example
%       Hager and Zhang 2006, "A survey of nonlinear conjugate gradient
%       methods", page 12. An infinite value disables this strategy. See in
%       code formula for the specific criterion used.
%   linesearch (@linesearch_adaptive or @linesearch_hint)
%       Function handle to a line search function. The options structure is
%       passed to the line search too, so you can pass it parameters. See
%       each line search's documentation for info. Another available line
%       search in manopt is @linesearch, in /manopt/linesearch/linesearch.m
%       If the problem structure includes a line search hint, then the
%       default line search used is @linesearch_hint.
%   statsfun (none)
%       Function handle to a function that will be called after each
%       iteration to provide the opportunity to log additional statistics.
%       They will be returned in the info struct. See the generic Manopt
%       documentation about solvers for further information.
%   stopfun (none)
%       Function handle to a function that will be called at each iteration
%       to provide the opportunity to specify additional stopping criteria.
%       See the generic Manopt documentation about solvers for further
%       information.
%   verbosity (3)
%       Integer number used to tune the amount of output the algorithm
%       generates during execution (mostly as text in the command window).
%       The higher, the more output. 0 means silent.
%   storedepth (2)
%       Maximum number of different points x of the manifold for which a
%       store structure will be kept in memory in the storedb. If the
%       caching features of Manopt are not used, this is irrelevant. For
%       the CG algorithm, a store depth of 2 should always be sufficient.
%
%
% In most of the examples bundled with the toolbox (see link below), the
% solver can be replaced by the present one if need be.
%
% See also: steepestdescent trustregions manopt/solvers/linesearch manopt/examples

% An explicit, general listing of this algorithm, with preconditioning,
% can be found in the following paper:
%     @Article{boumal2015lowrank,
%       Title   = {Low-rank matrix completion via preconditioned optimization on the {G}rassmann manifold},
%       Author  = {Boumal, N. and Absil, P.-A.},
%       Journal = {Linear Algebra and its Applications},
%       Year    = {2015},
%       Pages   = {200--239},
%       Volume  = {475},
%       Doi     = {10.1016/j.laa.2015.02.027},
%     }

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, Dec. 30, 2012.
% Contributors: Nicolas Boumal
% Change log: 
%
%   March 14, 2013, NB:
%       Added preconditioner support : see Section 8 in
%       https://www.math.lsu.edu/~hozhang/papers/cgsurvey.pdf
%    
%   Sept. 13, 2013, NB:
%       Now logging beta parameter too.
%    
%	Nov. 7, 2013, NB:
%       The search direction is no longer normalized before it is passed
%       to the linesearch. This way, it is up to the designers of the
%       linesearch to decide whether they want to use the norm of the
%       search direction in their algorithm or not. There are reasons
%       against it, but practical evidence that it may help too, so we
%       allow it. The default linesearch_adaptive used does exploit the
%       norm information. The base linesearch does not. You may select it
%       by setting options.linesearch = @linesearch;
%
%	Nov. 29, 2013, NB:
%       Documentation improved: options are now explicitly described.
%       Removed the Daniel rule for beta: it was not appropriate for
%       preconditioned CG and I could not find a proper reference for it.
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

% Verify that the problem description is sufficient for the solver.
if ~canGetCost(problem)
    warning('manopt:getCost', ...
        'No cost provided. The algorithm will likely abort.');
end
if ~canGetGradient(problem) && ~canGetApproxGradient(problem)
    warning('manopt:getGradient:approx', ...
           ['No gradient provided. Using an FD approximation instead (slow).\n' ...
            'It may be necessary to increase options.tolgradnorm.\n' ...
            'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']);
    problem.approxgrad = approxgradientFD(problem);
end

% Set local defaults here
localdefaults.minstepsize = 1e-10;
localdefaults.maxiter = 1000;
localdefaults.tolgradnorm = 1e-6;
localdefaults.storedepth = 20;
% Changed by NB : H-S has the "auto restart" property.
% See Hager-Zhang 2005/2006 survey about CG methods.
% The auto restart comes from the 'max(0, ...)', not so much from the
% reason stated in Hager-Zhang I think. P-R also has auto restart.
localdefaults.beta_type = 'H-S';
localdefaults.orth_value = Inf; % by BM as suggested in Nocedal and Wright

    
% Depending on whether the problem structure specifies a hint for
% line-search algorithms, choose a default line-search that works on
% its own (typical) or that uses the hint.
if ~canGetLinesearch(problem)
    localdefaults.linesearch = @linesearch_adaptive;
else
    localdefaults.linesearch = @linesearch_hint;
end

% Merge global and local defaults, then merge w/ user options, if any.
localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
if ~exist('options', 'var') || isempty(options)
    options = struct();
end
options = mergeOptions(localdefaults, options);

% For convenience
inner = problem.M.inner;
lincomb = problem.M.lincomb;

timetic = tic();

% If no initial point x is given by the user, generate one at random.
if ~exist('x', 'var') || isempty(x)
    x = problem.M.rand();
end

% Create a store database and generate a key for the current x
storedb = StoreDB(options.storedepth);
key = storedb.getNewKey();

% Compute cost-related quantities for x
[cost, grad] = getCostGrad(problem, x, storedb, key);
gradnorm = problem.M.norm(x, grad);
Pgrad = getPrecon(problem, x, grad, storedb, key);
gradPgrad = inner(x, grad, Pgrad);

% Iteration counter (at any point, iter is the number of fully executed
% iterations so far)
iter = 0;

% Save stats in a struct array info and preallocate.
stats = savestats();
info(1) = stats;
info(min(10000, options.maxiter+1)).iter = [];


if options.verbosity >= 2
    fprintf(' iter\t               cost val\t    grad. norm\n');
end

% Compute a first descent direction (not normalized)
desc_dir = lincomb(x, -1, Pgrad);


% Start iterating until stopping criterion triggers
while true
    
    % Display iteration information
    if options.verbosity >= 2
        fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm);
    end
    
    % Start timing this iteration
    timetic = tic();
    
    % Run standard stopping criterion checks
    [stop, reason] = stoppingcriterion(problem, x, options, info, iter+1);
    
    % Run specific stopping criterion check
    if ~stop && abs(stats.stepsize) < options.minstepsize
        stop = true;
        reason = sprintf(['Last stepsize smaller than minimum '  ...
                          'allowed; options.minstepsize = %g.'], ...
                          options.minstepsize);
    end
    
    if stop
        if options.verbosity >= 1
            fprintf([reason '\n']);
        end
        break;
    end
    
    
    % The line search algorithms require the directional derivative of the
    % cost at the current point x along the search direction.
    df0 = inner(x, grad, desc_dir);
        
    % If we didn't get a descent direction: restart, i.e., switch to the
    % negative gradient. Equivalent to resetting the CG direction to a
    % steepest descent step, which discards the past information.
    if df0 >= 0
        
        % Or we switch to the negative gradient direction.
        if options.verbosity >= 3
            fprintf(['Conjugate gradient info: got an ascent direction '...
                     '(df0 = %2e), reset to the (preconditioned) '...
                     'steepest descent direction.\n'], df0);
        end
        % Reset to negative gradient: this discards the CG memory.
        desc_dir = lincomb(x, -1, Pgrad);
        df0 = -gradPgrad;
        
    end
    
    
    % Execute line search
    [stepsize, newx, newkey, lsstats] = options.linesearch( ...
                   problem, x, desc_dir, cost, df0, options, storedb, key);
               
    
    % Compute the new cost-related quantities for newx
    [newcost, newgrad] = getCostGrad(problem, newx, storedb, newkey);
    newgradnorm = problem.M.norm(newx, newgrad);
    Pnewgrad = getPrecon(problem, newx, newgrad, storedb, newkey);
    newgradPnewgrad = inner(newx, newgrad, Pnewgrad);
    
    
    % Apply the CG scheme to compute the next search direction.
    %
    % This paper https://www.math.lsu.edu/~hozhang/papers/cgsurvey.pdf
	% by Hager and Zhang lists many known beta rules. The rules defined
    % here can be found in that paper (or are provided with additional
    % references), adapted to the Riemannian setting.
	% 
    if strcmpi(options.beta_type, 'steep') || ...
       strcmpi(options.beta_type, 'S-D')              % Gradient Descent
        
        beta = 0;
        desc_dir = lincomb(x, -1, Pnewgrad);
        
    else
        
        oldgrad = problem.M.transp(x, newx, grad);
        orth_grads = inner(newx, oldgrad, Pnewgrad) / newgradPnewgrad;
        
        % Powell's restart strategy (see page 12 of Hager and Zhang's
        % survey on conjugate gradient methods, for example)
        if abs(orth_grads) >= options.orth_value,
            beta = 0;
            desc_dir = lincomb(x, -1, Pnewgrad);
            
        else % Compute the CG modification
            
            desc_dir = problem.M.transp(x, newx, desc_dir);
            
            switch upper(options.beta_type)
            
                case 'F-R'  % Fletcher-Reeves
                    beta = newgradPnewgrad / gradPgrad;
                
                case 'P-R'  % Polak-Ribiere+
                    % vector grad(new) - transported grad(current)
                    diff = lincomb(newx, 1, newgrad, -1, oldgrad);
                    ip_diff = inner(newx, Pnewgrad, diff);
                    beta = ip_diff / gradPgrad;
                    beta = max(0, beta);
                
                case 'H-S'  % Hestenes-Stiefel+
                    diff = lincomb(newx, 1, newgrad, -1, oldgrad);
                    ip_diff = inner(newx, Pnewgrad, diff);
                    beta = ip_diff / inner(newx, diff, desc_dir);
                    beta = max(0, beta);

                case 'H-Z' % Hager-Zhang+
                    diff = lincomb(newx, 1, newgrad, -1, oldgrad);
                    Poldgrad = problem.M.transp(x, newx, Pgrad);
                    Pdiff = lincomb(newx, 1, Pnewgrad, -1, Poldgrad);
                    deno = inner(newx, diff, desc_dir);
                    numo = inner(newx, diff, Pnewgrad);
                    numo = numo - 2*inner(newx, diff, Pdiff)*...
                                     inner(newx, desc_dir, newgrad) / deno;
                    beta = numo / deno;

                    % Robustness (see Hager-Zhang paper mentioned above)
                    desc_dir_norm = problem.M.norm(newx, desc_dir);
                    eta_HZ = -1 / ( desc_dir_norm * min(0.01, gradnorm) );
                    beta = max(beta, eta_HZ);

                otherwise
                    error(['Unknown options.beta_type. ' ...
                           'Should be steep, S-D, F-R, P-R, H-S or H-Z.']);
            end
            
            desc_dir = lincomb(newx, -1, Pnewgrad, beta, desc_dir);
        
        end
        
    end
    
    % Make sure we don't use too much memory for the store database
    storedb.purge();
    
    % Transfer iterate info
    x = newx;
    key = newkey;
    cost = newcost;
    grad = newgrad;
    Pgrad = Pnewgrad;
    gradnorm = newgradnorm;
    gradPgrad = newgradPnewgrad;
    
    % iter is the number of iterations we have accomplished.
    iter = iter + 1;
    
    % Log statistics for freshly executed iteration
    stats = savestats();
    info(iter+1) = stats; %#ok<AGROW>
    
end


info = info(1:iter+1);

if options.verbosity >= 1
    fprintf('Total time is %f [s] (excludes statsfun)\n', info(end).time);
end


% Routine in charge of collecting the current iteration stats
function stats = savestats()
    stats.iter = iter;
    stats.cost = cost;
    stats.gradnorm = gradnorm;
    if iter == 0
        stats.stepsize = nan;
        stats.time = toc(timetic);
        stats.linesearch = [];
        stats.beta = 0;
    else
        stats.stepsize = stepsize;
        stats.time = info(iter).time + toc(timetic);
        stats.linesearch = lsstats;
        stats.beta = beta;
    end
    stats = applyStatsfun(problem, x, storedb, key, options, stats);
end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/conjugategradient/linear_conjugategradient.m
================================================
function [x, cost, info, options] = linear_conjugategradient(problem, x, options)
% Conjugate gradient minimization algorithm for Manopt.
%
% function [x, cost, info, options] = linear_conjugategradient(problem)
% function [x, cost, info, options] = linear_conjugategradient(problem, x0)
% function [x, cost, info, options] = linear_conjugategradient(problem, x0, options)
% function [x, cost, info, options] = linear_conjugategradient(problem, [], options)
%
% Apply the conjugate gradient minimization algorithm to the problem
% defined in the problem structure, starting at x0 if it is provided
% (otherwise, at a random point on the manifold). To specify options whilst
% not specifying an initial guess, give x0 as [] (the empty matrix).
%
% The outputs x and cost are the best reached point on the manifold and its
% cost. The struct-array info contains information about the iterations:
%   iter : the iteration number (0 for the initial guess)
%   cost : cost value
%   time : elapsed time in seconds
%   gradnorm : Riemannian norm of the gradient
%   stepsize : norm of the last tangent vector retracted
%   beta : value of the beta parameter (see options.beta_type)
%   linesearch : information logged by options.linesearch
%   And possibly additional information logged by options.statsfun.
% For example, type [info.gradnorm] to obtain a vector of the successive
% gradient norms reached.
%
% The options structure is used to overwrite the default values. All
% options have a default value and are hence optional. To force an option
% value, pass an options structure with a field options.optionname, where
% optionname is one of the following and the default value is indicated
% between parentheses:
%
%   tolgradnorm (1e-6)
%       The algorithm terminates if the norm of the gradient drops below this.
%   maxiter (1000)
%       The algorithm terminates if maxiter iterations have been executed.
%   maxtime (Inf)
%       The algorithm terminates if maxtime seconds elapsed.
%   minstepsize (1e-10)
%       The algorithm terminates if the linesearch returns a displacement
%       vector (to be retracted) smaller in norm than this value.
%   beta_type ('H-S')
%       Conjugate gradient beta rule used to construct the new search
%       direction, based on a linear combination of the previous search
%       direction and the new (preconditioned) gradient. Possible values
%       for this parameter are:
%           'S-D', 'steep' for beta = 0 (preconditioned steepest descent)
%           'F-R' for Fletcher-Reeves's rule
%           'P-R' for Polak-Ribiere's modified rule
%           'H-S' for Hestenes-Stiefel's modified rule
%           'H-Z' for Hager-Zhang's modified rule
%       See Hager and Zhang 2006, "A survey of nonlinear conjugate gradient
%       methods" for a description of these rules in the Euclidean case and
%       for an explanation of how to adapt them to the preconditioned case.
%       The adaption to the Riemannian case is straightforward: see in code
%       for details. Modified rules take the max between 0 and the computed
%       beta value, which provides automatic restart, except for H-Z which
%       uses a different modification.
%   orth_value (Inf)
%       Following Powell's restart strategy (Math. prog. 1977), restart CG
%       (that is, make a -preconditioned- gradient step) if two successive
%       -preconditioned- gradients are "too" parallel. See for example
%       Hager and Zhang 2006, "A survey of nonlinear conjugate gradient
%       methods", page 12. An infinite value disables this strategy. See in
%       code formula for the specific criterion used.
%   linesearch (@linesearch_adaptive or @linesearch_hint)
%       Function handle to a line search function. The options structure is
%       passed to the line search too, so you can pass it parameters. See
%       each line search's documentation for info. Another available line
%       search in manopt is @linesearch, in /manopt/linesearch/linesearch.m
%       If the problem structure includes a line search hint, then the
%       default line search used is @linesearch_hint.
%   statsfun (none)
%       Function handle to a function that will be called after each
%       iteration to provide the opportunity to log additional statistics.
%       They will be returned in the info struct. See the generic Manopt
%       documentation about solvers for further information.
%   stopfun (none)
%       Function handle to a function that will be called at each iteration
%       to provide the opportunity to specify additional stopping criteria.
%       See the generic Manopt documentation about solvers for further
%       information.
%   verbosity (3)
%       Integer number used to tune the amount of output the algorithm
%       generates during execution (mostly as text in the command window).
%       The higher, the more output. 0 means silent.
%   storedepth (2)
%       Maximum number of different points x of the manifold for which a
%       store structure will be kept in memory in the storedb. If the
%       caching features of Manopt are not used, this is irrelevant. For
%       the CG algorithm, a store depth of 2 should always be sufficient.
%
%
% In most of the examples bundled with the toolbox (see link below), the
% solver can be replaced by the present one if need be.
%
% See also: steepestdescent trustregions manopt/solvers/linesearch manopt/examples

% An explicit, general listing of this algorithm, with preconditioning,
% can be found in the following paper:
%     @Article{boumal2015lowrank,
%       Title   = {Low-rank matrix completion via preconditioned optimization on the {G}rassmann manifold},
%       Author  = {Boumal, N. and Absil, P.-A.},
%       Journal = {Linear Algebra and its Applications},
%       Year    = {2015},
%       Pages   = {200--239},
%       Volume  = {475},
%       Doi     = {10.1016/j.laa.2015.02.027},
%     }

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, Dec. 30, 2012.
% Contributors: Nicolas Boumal
% Change log: 
%
%   March 14, 2013, NB:
%       Added preconditioner support : see Section 8 in
%       https://www.math.lsu.edu/~hozhang/papers/cgsurvey.pdf
%    
%   Sept. 13, 2013, NB:
%       Now logging beta parameter too.
%    
%	Nov. 7, 2013, NB:
%       The search direction is no longer normalized before it is passed
%       to the linesearch. This way, it is up to the designers of the
%       linesearch to decide whether they want to use the norm of the
%       search direction in their algorithm or not. There are reasons
%       against it, but practical evidence that it may help too, so we
%       allow it. The default linesearch_adaptive used does exploit the
%       norm information. The base linesearch does not. You may select it
%       by setting options.linesearch = @linesearch;
%
%	Nov. 29, 2013, NB:
%       Documentation improved: options are now explicitly described.
%       Removed the Daniel rule for beta: it was not appropriate for
%       preconditioned CG and I could not find a proper reference for it.
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

% Verify that the problem description is sufficient for the solver.
if ~canGetCost(problem)
    warning('manopt:getCost', ...
        'No cost provided. The algorithm will likely abort.');
end
if ~canGetGradient(problem) && ~canGetApproxGradient(problem)
    warning('manopt:getGradient:approx', ...
           ['No gradient provided. Using an FD approximation instead (slow).\n' ...
            'It may be necessary to increase options.tolgradnorm.\n' ...
            'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']);
    problem.approxgrad = approxgradientFD(problem);
end

% Set local defaults here
localdefaults.minstepsize = 1e-10;
localdefaults.maxiter = 1000;
localdefaults.tolgradnorm = 1e-6;
localdefaults.storedepth = 20;
% Changed by NB : H-S has the "auto restart" property.
% See Hager-Zhang 2005/2006 survey about CG methods.
% The auto restart comes from the 'max(0, ...)', not so much from the
% reason stated in Hager-Zhang I think. P-R also has auto restart.
localdefaults.beta_type = 'H-S';
localdefaults.orth_value = Inf; % by BM as suggested in Nocedal and Wright

    
% Depending on whether the problem structure specifies a hint for
% line-search algorithms, choose a default line-search that works on
% its own (typical) or that uses the hint.
if ~canGetLinesearch(problem)
    localdefaults.linesearch = @linesearch_adaptive;
else
    localdefaults.linesearch = @linesearch_hint;
end

% Merge global and local defaults, then merge w/ user options, if any.
localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
if ~exist('options', 'var') || isempty(options)
    options = struct();
end
options = mergeOptions(localdefaults, options);

% For convenience
inner = problem.M.inner;
lincomb = problem.M.lincomb;

timetic = tic();

% If no initial point x is given by the user, generate one at random.
if ~exist('x', 'var') || isempty(x)
    x = problem.M.rand();
end

% Create a store database and generate a key for the current x
storedb = StoreDB(options.storedepth);
key = storedb.getNewKey();

% Compute cost-related quantities for x
[cost, grad] = getCostGrad(problem, x, storedb, key);
gradnorm = problem.M.norm(x, grad);
Pgrad = getPrecon(problem, x, grad, storedb, key);
gradPgrad = inner(x, grad, Pgrad);

% Iteration counter (at any point, iter is the number of fully executed
% iterations so far)
iter = 0;

% Save stats in a struct array info and preallocate.
stats = savestats();
info(1) = stats;
info(min(10000, options.maxiter+1)).iter = [];


if options.verbosity >= 2
    fprintf(' iter\t               cost val\t    grad. norm\n');
end

% Compute a first descent direction (not normalized)
desc_dir = lincomb(x, -1, Pgrad);


% Start iterating until stopping criterion triggers
while true
    
    % Display iteration information
    if options.verbosity >= 2
        fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm);
    end
    
    % Start timing this iteration
    timetic = tic();
    
    % Run standard stopping criterion checks
    [stop, reason] = stoppingcriterion(problem, x, options, info, iter+1);
    
    % Run specific stopping criterion check
    if ~stop && abs(stats.stepsize) < options.minstepsize
        stop = true;
        reason = sprintf(['Last stepsize smaller than minimum '  ...
                          'allowed; options.minstepsize = %g.'], ...
                          options.minstepsize);
    end
    
    if stop
        if options.verbosity >= 1
            fprintf([reason '\n']);
        end
        break;
    end
    
    
    % The line search algorithms require the directional derivative of the
    % cost at the current point x along the search direction.
    df0 = inner(x, grad, desc_dir);
        
    % If we didn't get a descent direction: restart, i.e., switch to the
    % negative gradient. Equivalent to resetting the CG direction to a
    % steepest descent step, which discards the past information.
    if df0 >= 0
        
        % Or we switch to the negative gradient direction.
        if options.verbosity >= 3
            fprintf(['Conjugate gradient info: got an ascent direction '...
                     '(df0 = %2e), reset to the (preconditioned) '...
                     'steepest descent direction.\n'], df0);
        end
        % Reset to negative gradient: this discards the CG memory.
        desc_dir = lincomb(x, -1, Pgrad);
        df0 = -gradPgrad;
        
    end
    
    
    % Execute line search
    [stepsize, newx, newkey, lsstats] = options.linesearch( ...
                   problem, x, desc_dir, cost, df0, options, storedb, key);
               
    
    % Compute the new cost-related quantities for newx
    [newcost, newgrad] = getCostGrad(problem, newx, storedb, newkey);
    newgradnorm = problem.M.norm(newx, newgrad);
    Pnewgrad = getPrecon(problem, newx, newgrad, storedb, newkey);
    newgradPnewgrad = inner(newx, newgrad, Pnewgrad);
    
    
    % Apply the CG scheme to compute the next search direction.
    %
    % This paper https://www.math.lsu.edu/~hozhang/papers/cgsurvey.pdf
	% by Hager and Zhang lists many known beta rules. The rules defined
    % here can be found in that paper (or are provided with additional
    % references), adapted to the Riemannian setting.
	% 
    if strcmpi(options.beta_type, 'steep') || ...
       strcmpi(options.beta_type, 'S-D')              % Gradient Descent
        
        beta = 0;
        desc_dir = lincomb(x, -1, Pnewgrad);
        
    else
        
        oldgrad = problem.M.transp(x, newx, grad);
        orth_grads = inner(newx, oldgrad, Pnewgrad) / newgradPnewgrad;
        
        % Powell's restart strategy (see page 12 of Hager and Zhang's
        % survey on conjugate gradient methods, for example)
        if abs(orth_grads) >= options.orth_value,
            beta = 0;
            desc_dir = lincomb(x, -1, Pnewgrad);
            
        else % Compute the CG modification
            
            desc_dir = problem.M.transp(x, newx, desc_dir);
            
            switch upper(options.beta_type)
            
                case 'F-R'  % Fletcher-Reeves
                    beta = newgradPnewgrad / gradPgrad;
                
                case 'P-R'  % Polak-Ribiere+
                    % vector grad(new) - transported grad(current)
                    diff = lincomb(newx, 1, newgrad, -1, oldgrad);
                    ip_diff = inner(newx, Pnewgrad, diff);
                    beta = ip_diff / gradPgrad;
                    beta = max(0, beta);
                
                case 'H-S'  % Hestenes-Stiefel+
                    diff = lincomb(newx, 1, newgrad, -1, oldgrad);
                    ip_diff = inner(newx, Pnewgrad, diff);
                    beta = ip_diff / inner(newx, diff, desc_dir);
                    beta = max(0, beta);

                case 'H-Z' % Hager-Zhang+
                    diff = lincomb(newx, 1, newgrad, -1, oldgrad);
                    Poldgrad = problem.M.transp(x, newx, Pgrad);
                    Pdiff = lincomb(newx, 1, Pnewgrad, -1, Poldgrad);
                    deno = inner(newx, diff, desc_dir);
                    numo = inner(newx, diff, Pnewgrad);
                    numo = numo - 2*inner(newx, diff, Pdiff)*...
                                     inner(newx, desc_dir, newgrad) / deno;
                    beta = numo / deno;

                    % Robustness (see Hager-Zhang paper mentioned above)
                    desc_dir_norm = problem.M.norm(newx, desc_dir);
                    eta_HZ = -1 / ( desc_dir_norm * min(0.01, gradnorm) );
                    beta = max(beta, eta_HZ);

                otherwise
                    error(['Unknown options.beta_type. ' ...
                           'Should be steep, S-D, F-R, P-R, H-S or H-Z.']);
            end
            
            desc_dir = lincomb(newx, -1, Pnewgrad, beta, desc_dir);
        
        end
        
    end
    
    % Make sure we don't use too much memory for the store database
    storedb.purge();
    
    % Transfer iterate info
    x = newx;
    key = newkey;
    cost = newcost;
    grad = newgrad;
    Pgrad = Pnewgrad;
    gradnorm = newgradnorm;
    gradPgrad = newgradPnewgrad;
    
    % iter is the number of iterations we have accomplished.
    iter = iter + 1;
    
    % Log statistics for freshly executed iteration
    stats = savestats();
    info(iter+1) = stats; %#ok<AGROW>
    
end


info = info(1:iter+1);

if options.verbosity >= 1
    fprintf('Total time is %f [s] (excludes statsfun)\n', info(end).time);
end


% Routine in charge of collecting the current iteration stats
function stats = savestats()
    stats.iter = iter;
    stats.cost = cost;
    stats.gradnorm = gradnorm;
    if iter == 0
        stats.stepsize = nan;
        stats.time = toc(timetic);
        stats.linesearch = [];
        stats.beta = 0;
    else
        stats.stepsize = stepsize;
        stats.time = info(iter).time + toc(timetic);
        stats.linesearch = lsstats;
        stats.beta = beta;
    end
    stats = applyStatsfun(problem, x, storedb, key, options, stats);
end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/gradientapproximations/approxgradientFD.m
================================================
function gradfun = approxgradientFD(problem, options)
% Gradient approx. fnctn handle based on finite differences of the cost.
%
% function gradfun = approxgradientFD(problem)
% function gradfun = approxgradientFD(problem, options)
%
% Input:
%
% A Manopt problem structure (already containing the manifold and enough
% information to compute the cost) and an options structure (optional),
% containing one option:
%    options.stepsize (positive double; default: 2^-23).
%    options.subspacedim (positive integer; default: [], for M.dim()).
%
% If the cost cannot be computed on 'problem', a warning is issued.
%
% Output:
% 
% Returns a function handle, encapsulating a generic finite difference
% approximation of the gradient of the problem cost. The finite difference
% is based on M.dim()+1 computations of the cost.
% 
% The returned gradfun has this calling pattern:
% 
%   function gradfd = gradfun(x)
%   function gradfd = gradfun(x, storedb)
%   function gradfd = gradfun(x, storedb, key)
% 
% x is a point on the manifold problem.M, storedb is a StoreDB object,
% and key is the StoreDB key to point x.
%
% Usage:
%
% Typically, the user will set problem.M and other fields to define the
% cost (typically, problem.cost). Then, to use this generic purpose
% gradient approximation:
%
%   problem.approxgrad = approxgradientFD(problem, options);
%
% See also: steepestdescent conjugategradient

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Nov. 1, 2016.
% Contributors: 
% Change log: 

    % This gradient approximation is based on the cost:
    % check availability.
    if ~canGetCost(problem)
        warning('manopt:approxgradFD:nocost', ...
                'approxgradFD requires the cost to be computable.');
    end

    % Set local defaults here, and merge with user options, if any.
    localdefaults.stepsize = 2^-23;
    localdefaults.subspacedim = [];
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    % % Finite-difference parameters
    % How far do we look?
    stepsize = options.stepsize;
    % Approximate the projection of the gradient on a random subspace of
    % what dimension? If [], uses full tangent space.
    subspacedim = options.subspacedim;
                   
    % Build and return the function handle here. This extra construct via
    % funhandle makes it possible to make storedb and key optional.
    gradfun = @funhandle;
    function gradfd = funhandle(x, storedb, key)
        % Allow omission of the key, and even of storedb.
        if ~exist('key', 'var')
            if ~exist('storedb', 'var')
                storedb = StoreDB();
            end
            key = storedb.getNewKey();
        end
        gradfd = gradientFD(stepsize, subspacedim, problem, x, storedb, key);
    end
    
end


function gradfd = gradientFD(stepsize, subspacedim, problem, x, storedb, key)
% This function does the actual work.
%
% Original code: Nov. 1, 2016 (NB).
	
    % Evaluate the cost at the root point
    fx = getCost(problem, x, storedb, key);

    % Pick an orthonormal basis for the tangent space at x, or a subspace
    % thereof. The default is a full subspace. If a strict subspace is
    % picked, the returned vector approximates the orthogonal projection of
    % the gradient to that subspace.
    B = tangentorthobasis(problem.M, x, subspacedim);
    
    % Use finite differences to approximate the directional derivative
    % along each direction in the basis B.
    df = zeros(size(B));
    for k = 1 : numel(B)
        % Move in the B{k} direction
        xk = problem.M.retr(x, B{k}, stepsize);
        % Evaluate the cost there
        fxk = getCost(problem, xk, storedb);
        % Finite difference
        df(k) = (fxk - fx)/stepsize;
    end
    
    % Build the gradient approximation.
    gradfd = lincomb(problem.M, x, B, df);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/hessianapproximations/approxhessianFD.m
================================================
function hessfun = approxhessianFD(problem, options)
% Hessian approx. fnctn handle based on finite differences of the gradient.
%
% function hessfun = approxhessianFD(problem)
% function hessfun = approxhessianFD(problem, options)
%
% Input:
%
% A Manopt problem structure (already containing the manifold and enough
% information to compute the cost gradient) and an options structure
% (optional), containing one option:
%    options.stepsize (positive double; default: 2^-14).
%
% If the gradient cannot be computed or approximated on 'problem',
% a warning is issued.
%
% Output:
% 
% Returns a function handle, encapsulating a generic finite difference
% approximation of the Hessian of the problem cost. The finite difference
% is based on computations of the gradient.
% 
% The returned hessfun has this calling pattern:
% 
%   function hessfd = hessfun(x, xdot)
%   function hessfd = hessfun(x, xdot, storedb)
%   function hessfd = hessfun(x, xdot, storedb, key)
% 
% x is a point on the manifold problem.M, xdot is a tangent vector to that
% manifold at x, storedb is a StoreDB object, and key is the StoreDB key to
% point x.
%
% Usage:
%
% Typically, the user will set problem.M and other fields to define the
% cost and the gradient (typically, problem.cost and problem.grad or
% problem.egrad). Then, to use this generic purpose Hessian approximation:
%
%   problem.approxhess = approxhessianFD(problem, options);
%
% See also: trustregions

% The Riemannian Trust-Region method, used in combination with the present
% Hessian approximation, is called RTR-FD. Some convergence theory for it
% is available in this paper:
%
% @incollection{boumal2015rtrfd
% 	author={Boumal, N.},
% 	title={Riemannian trust regions with finite-difference Hessian approximations are globally convergent},
% 	year={2015},
% 	booktitle={Geometric Science of Information}
% }

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 8, 2015.
% Contributors: 
% Change log: 
%
%   Feb. 19, 2015 (NB):
%       It is sufficient to ensure positive radial linearity to guarantee
%       (together with other assumptions) that this approximation of the
%       Hessian will confer global convergence to the trust-regions method.
%       Formerly, in-code comments referred to the necessity of having
%       complete radial linearity, and that this was harder to achieve.
%       This appears not to be necessary after all, which simplifies the
%       code.
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   April 8, 2015 (NB):
%       Changed to approxhessianFD, which now returns a function handle
%       that encapsulates the getHessianFD functionality. Will be better
%       aligned with the other Hessian approximations to come (which may
%       want to use storedb.internal), and now allows specifying the step
%       size.

    % This Hessian approximation is based on the gradient:
    % check availability.
    if ~canGetGradient(problem) && ~canGetApproxGradient(problem)
        warning('manopt:approxhessianFD:nogradient', ...
                'approxhessianFD requires the gradient to be computable.');
    end

    % Set local defaults here, and merge with user options, if any.
    localdefaults.stepsize = 2^-14;
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    % Finite-difference parameter: how far do we look?
    stepsize = options.stepsize;
                   
    % Build and return the function handle here. This extra construct via
    % funhandle makes it possible to make storedb and key optional.
    hessfun = @funhandle;
    function hessfd = funhandle(x, xdot, storedb, key)
        % Allow omission of the key, and even of storedb.
        if ~exist('key', 'var')
            if ~exist('storedb', 'var')
                storedb = StoreDB();
            end
            key = storedb.getNewKey();
        end 
        hessfd = hessianFD(stepsize, problem, x, xdot, storedb, key);
    end
    
end


function hessfd = hessianFD(stepsize, problem, x, xdot, storedb, key)
% This function does the actual work.
%
% Original code: Dec. 30, 2012 (NB).
	
	% Extract the input vector norm.
    norm_xdot = problem.M.norm(x, xdot);
    
    % First, check whether the step xdot is not too small.
    if norm_xdot < eps
        hessfd = problem.M.zerovec(x);
        return;
    end
    
    % Determine how far to retract xdot, so that the point reached does not
    % depend on the norm of xdot. This is what ensures radial linearity of
    % this present Hessian approximation.
    c = stepsize / norm_xdot;
    
    % Compute the gradient at the current point.
    grad = getGradient(problem, x, storedb, key);
    
    % Compute a point a little further along xdot, and the gradient there.
    % Since this is a new point, we need a new key for it, for storedb.
    x1 = problem.M.retr(x, xdot, c);
    key1 = storedb.getNewKey();
    grad1 = getGradient(problem, x1, storedb, key1);
    
    % Transport grad1 back from x1 to x.
    grad1 = problem.M.transp(x1, x, grad1);
    
    % Return the finite difference of them: (grad1 - grad)/c.
    hessfd = problem.M.lincomb(x, 1/c, grad1, -1/c, grad);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/linesearch/linesearch.m
================================================
function [stepsize, newx, newkey, lsstats] = ...
                  linesearch(problem, x, d, f0, df0, options, storedb, key)
% Standard line-search algorithm (step size selection) for descent methods.
%
% function [stepsize, newx, newkey, lsstats] = 
%                 linesearch(problem, x, d, f0, df0, options, storedb, key)
%
% Base line-search algorithm for descent methods, based on a simple
% backtracking method. The search direction provided has to be a descent
% direction, as indicated by a negative df0 = directional derivative of f
% at x along d.
%
% The algorithm is invariant under positive scaling of the cost function
% and under offsetting, that is: if the cost function f is replaced by
% 8*f+3 for example, the returned step size will be the same. Furthermore,
% the returned step size is independent of the norm of the search direction
% vector d: only the direction of d is important.
% 
% Below, the step is constructed as alpha*d, and the step size is the norm
% of that vector, thus: stepsize = alpha*norm_d. The step is executed by
% retracting the vector alpha*d from the current point x, giving newx.
%
% This line-search may create and maintain a structure called lsmem inside
% storedb.internal. This gives the linesearch the opportunity to remember
% what happened in the previous calls. This is typically used to make a
% first guess at the step size, based on previous events.
%
% Inputs
%
%  problem : structure holding the description of the optimization problem
%  x : current point on the manifold problem.M
%  d : tangent vector at x (descent direction) -- its norm is irrelevant
%  f0 : cost value at x
%  df0 : directional derivative at x along d
%  options : options structure (see in code for usage)
%  storedb : StoreDB object (handle class: passed by reference) for caching
%  key : key associated to point x in storedb
%
%  options, storedb and key are optional.
%
% Outputs
%
%  stepsize : norm of the vector retracted to reach newx from x.
%  newx : next iterate suggested by the line-search algorithm, such that
%         the retraction at x of the vector alpha*d reaches newx.
%  newkey : key associated to newx in storedb
%  lsstats : statistics about the line-search procedure
%            (stepsize, number of trials etc).
%
% See also: steepestdescent conjugategradients linesearch_adaptive

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%	Sept. 13, 2013 (NB):
%       User control over the parameters of the linesearch via the options
%       ls_contraction_factor, ls_optimism, ls_suff_decr and ls_max_steps.
%       See in code for the effect of those.
% 
%   Sept. 13, 2013 (NB):
%       The automatic direction reversal feature was removed (it triggered
%       when df0 > 0). Direction reversal is a decision that needs to be
%       made by the solver, so it can know about it.
% 
%	Sept. 13, 2013 (NB):
%       The linesearch is now invariant under rescaling of the cost
%       function f. That is, if f is replaced by 8*f (and hence the
%       directional derivatives of f are scaled accordingly), the
%       stepsizes computed will not change.
% 
%   Nov. 7, 2013 (NB):
%       The linesearch is now invariant under rescaling of the search
%       direction d. The meaning of stepsize is also more clear in the
%       comments. Added a parameter ls_initial_stepsize to give users
%       control over the first step size trial.
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   April 8, 2015 (NB):
%       Got rid of lsmem input/output: now maintained in storedb.internal.
%
%   Oct. 7, 2016 (NB):
%       Thanks to Wen Huang, a bug was corrected in the logic around
%       lsmem handling. Specifically, lsmem = storedb.internal.lsmem;
%       was erroneously coded as lsmem = storedb.internal;


    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

    % Backtracking default parameters. These can be overwritten in the
    % options structure which is passed to the solver.
    default_options.ls_contraction_factor = .5;
    default_options.ls_optimism = 1/.5;
    default_options.ls_suff_decr = 1e-4;
    default_options.ls_max_steps = 25;
    default_options.ls_initial_stepsize = 1;
    
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(default_options, options);
    
    contraction_factor = options.ls_contraction_factor;
    optimism = options.ls_optimism;
    suff_decr = options.ls_suff_decr;
    max_ls_steps = options.ls_max_steps;
    initial_stepsize = options.ls_initial_stepsize;
    
    % Compute the norm of the search direction.
    % This is useful to make the linesearch algorithm invariant under the
    % scaling of d. The rationale is that the important information is the
    % search direction, not the size of that vector. The question of how
    % far we should go is precisely what the linesearch algorithm is
    % supposed to answer: the calling algorithm should not need to care.
    norm_d = problem.M.norm(x, d);
    
    % At first, we have no idea of what the step size should be.
    alpha = NaN;
    
    % If we know about what happened at the previous step, we can leverage
    % that to compute an initial guess for the step size, as inspired from
    % Nocedal&Wright, p59.
    if isfield(storedb.internal, 'lsmem')
        lsmem = storedb.internal.lsmem;
        if isfield(lsmem, 'f0')
            % Pick initial step size based on where we were last time,
            alpha = 2*(f0 - lsmem.f0) / df0;
            % and go look a little further (or less far), just in case.
            alpha = optimism*alpha;
        end
    end
    
    % If we have no information about the previous iteration (maybe this is
    % the first one?) or if the above formula gave a too small step size
    % (perhaps it is even negative), then fall back to a user supplied
    % suggestion for the first step size (the "a priori").
    % At any rate, the choice should be invariant under rescaling of the
    % cost function f and of the search direction d, and it should be
    % bounded away from zero for convergence guarantees. We must allow it
    % to be close to zero though, for fine convergence.
    if isnan(alpha) || alpha*norm_d <= eps
        alpha = initial_stepsize/norm_d;
    end
    

    % Make the chosen step and compute the cost there.
    newx = problem.M.retr(x, d, alpha);
    newkey = storedb.getNewKey();
    newf = getCost(problem, newx, storedb, newkey);
    cost_evaluations = 1;
    
    % Backtrack while the Armijo criterion is not satisfied
    while newf > f0 + suff_decr*alpha*df0
        
        % Reduce the step size,
        alpha = contraction_factor * alpha;
        
        % and look closer down the line
        newx = problem.M.retr(x, d, alpha);
        newkey = storedb.getNewKey();
        newf = getCost(problem, newx, storedb, newkey);
        cost_evaluations = cost_evaluations + 1;
        
        % Make sure we don't run out of budget
        if cost_evaluations >= max_ls_steps
            break;
        end
        
    end
    
    % If we got here without obtaining a decrease, we reject the step.
    if newf > f0
        alpha = 0;
        newx = x;
        newkey = key;
        newf = f0; %#ok<NASGU>
    end
    
    % As seen outside this function, stepsize is the size of the vector we
    % retract to make the step from x to newx. Since the step is alpha*d:
    stepsize = alpha * norm_d;

    % Save the situtation faced now so that, at the next iteration,
    % we will know something about the previous decision.
    storedb.internal.lsmem.f0 = f0;
    storedb.internal.lsmem.df0 = df0;
    storedb.internal.lsmem.stepsize = stepsize;
    
    % Return some statistics also, for possible analysis.
    lsstats.costevals = cost_evaluations;
    lsstats.stepsize = stepsize;
    lsstats.alpha = alpha;
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/linesearch/linesearch_adaptive.m
================================================
function [stepsize, newx, newkey, lsstats] = ...
  linesearch_adaptive(problem, x, d, f0, df0, options, storedb, key)
% Adaptive line search algorithm (step size selection) for descent methods.
%
% function [stepsize, newx, newkey, lsstats] = 
%        linesearch_adaptive(problem, x, d, f0, df0, options, storedb, key)
%
% Adaptive linesearch algorithm for descent methods, based on a simple
% backtracking method. Contrary to linesearch.m, this function is not
% invariant under rescaling of the search direction d. These two line
% search methods vary mainly in their strategy to pick the initial step
% size.
% 
% Below, the step is constructed as alpha*d, and the step size is the norm
% of that vector, thus: stepsize = alpha*norm_d. The step is executed by
% retracting the vector alpha*d from the current point x, giving newx.
%
% This line-search may create and maintain a structure called lsmem inside
% storedb.internal. This gives the linesearch the opportunity to remember
% what happened in the previous calls. This is typically used to make a
% first guess at the step size, based on previous events.
%
% Inputs/Outputs : see help for linesearch
%
% See also: steepestdescent conjugategradients linesearch

% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, Dec. 30, 2012.
% Contributors: Nicolas Boumal
% Change log:
%
%   Sept. 13, 2013 (NB) :
%       The automatic direction reversal feature was removed (it triggered
%       when df0 > 0). Direction reversal is a decision that needs to be
%       made by the solver, so it can know about it.
%
%	Nov. 7, 2013 (NB) :
%       The whole function has been recoded to mimick more closely the new
%       version of linesearch.m. The parameters are available through the
%       options structure passed to the solver and have the same names and
%       same meaning as for the base linesearch. The information is logged
%       more reliably.
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   April 8, 2015 (NB):
%       Got rid of lsmem input/output: now maintained in storedb.internal.


    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

    % Backtracking default parameters. These can be overwritten in the
    % options structure which is passed to the solver.
    default_options.ls_contraction_factor = .5;
    default_options.ls_suff_decr = .5;
    default_options.ls_max_steps = 10;
    default_options.ls_initial_stepsize = 1;
    
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(default_options, options);
    
    contraction_factor = options.ls_contraction_factor;
    suff_decr = options.ls_suff_decr;
    max_ls_steps = options.ls_max_steps;
    initial_stepsize = options.ls_initial_stepsize;
    
    % Compute the norm of the search direction.
    norm_d = problem.M.norm(x, d);
    
    % If this is not the first iteration, then lsmem should have been
    % filled with a suggestion for the initial step.
    if isfield(storedb.internal, 'lsmem')
        lsmem = storedb.internal.lsmem;
        if isfield(lsmem, 'init_alpha')
            % Pick initial step size based on where we were last time,
            alpha = lsmem.init_alpha;
        end
    % Otherwise, fall back to a user supplied suggestion.
    else
        alpha = initial_stepsize / norm_d;
    end

    % Make the chosen step and compute the cost there.
    newx = problem.M.retr(x, d, alpha);
    newkey = storedb.getNewKey();
    newf = getCost(problem, newx, storedb, newkey);
    cost_evaluations = 1;
    
    % Backtrack while the Armijo criterion is not satisfied
    while newf > f0 + suff_decr*alpha*df0
        
        % Reduce the step size,
        alpha = contraction_factor * alpha;
        
        % and look closer down the line
        newx = problem.M.retr(x, d, alpha);
        newkey = storedb.getNewKey();
        newf = getCost(problem, newx, storedb, newkey);
        cost_evaluations = cost_evaluations + 1;
        
        % Make sure we don't run out of budget
        if cost_evaluations >= max_ls_steps
            break;
        end
        
    end
    
    % If we got here without obtaining a decrease, we reject the step.
    if newf > f0
        alpha = 0;
        newx = x;
        newkey = key;
        newf = f0; %#ok<NASGU>
    end
    
    % As seen outside this function, stepsize is the size of the vector we
    % retract to make the step from x to newx. Since the step is alpha*d:
    stepsize = alpha * norm_d;

    % Fill lsmem with a suggestion for what the next initial step size
    % trial should be. On average we intend to do only one extra cost
    % evaluation. Notice how the suggestion is not about stepsize but about
    % alpha. This is the reason why this line search is not invariant under
    % rescaling of the search direction d.
    switch cost_evaluations
        case 1
            % If things go very well, push your luck.
            init_alpha = 2 * alpha;
        case 2
            % If things go reasonably well, try to keep pace.
            init_alpha = alpha;
        otherwise
            % If we backtracked a lot, the new stepsize is probably quite
            % small: try to recover.
            init_alpha = 2 * alpha;
    end
    storedb.internal.lsmem.init_alpha = init_alpha;
    
    % Return some statistics also, for possible analysis.
    lsstats.costevals = cost_evaluations;
    lsstats.stepsize = stepsize;
    lsstats.alpha = alpha;
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/linesearch/linesearch_decrease.m
================================================
function [stepsize, newx, newkey, lsstats] = ...
           linesearch_decrease(problem, x, d, f0, ~, options, storedb, key)
% Backtracking line-search aiming merely for a decrease in cost value.
%
% function [stepsize, newx, newkey, lsstats] = 
%        linesearch_decrease(problem, x, d, f0, df0, options, storedb, key)
%
% Line-search algorithm based on a simple backtracking method. The search
% direction provided has to be a descent direction, but needs not be a
% first-order descent, i.e.: this line-search can be used even if x is a
% critical point, as long as the cost function is strictly decreasing
% along the direction d.
%
% The line-search merely guarantees a decrease in the cost (unless a
% stopping criterion triggers first, such as exceeding a maximal number of
% iterations). This is typically useful to escape saddle points (critical
% points admitting descent directions at the second order). Escape
% directions can be computed using hessianextreme, for example.
% 
% Below, the step is constructed as alpha*d, and the step size is the norm
% of that vector, thus: stepsize = alpha*norm_d. The step is executed by
% retracting the vector alpha*d from the current point x, giving newx.
% An initial stepsize of norm_d thus means the first candidate x is
% obtained by retracting d at x, as is.
%
% Options:
%   options.ls_max_steps (25): maximum number of cost evaluations.
%   options.ls_initial_stepsize (norm_d): first stepsize trial.
%   options.ls_contraction_factor (0.5): stepsize reduction per iteration.
%
%
% Inputs/Outputs : see help for linesearch.
%   f0 is the cost at x.
%   df0 is unused.
%   options, storedb and key are optional.
%   Thus, a simplified calling pattern is (with all outputs still
%   available): linesearch_decrease(problem, x, d, f0)
%
% See also: steepestdescent linesearch hessianextreme

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 8, 2015.
% Contributors: 
% Change log: 


    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end
    
    norm_d = problem.M.norm(x, d);

    % Backtracking default parameters. These can be overwritten in the
    % options structure which is passed to the solver.
    default_options.ls_contraction_factor = .5;
    default_options.ls_initial_stepsize = norm_d;
    default_options.ls_max_steps = 25;
    
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(default_options, options);
    
    contraction_factor = options.ls_contraction_factor;
    initial_stepsize = options.ls_initial_stepsize;
    max_ls_steps = options.ls_max_steps;
    
    % Initial step size as a mutliplier of d.
    alpha = initial_stepsize / norm_d;
    
    % Make the chosen step and compute the cost there.
    newx = problem.M.retr(x, d, alpha);
    newkey = storedb.getNewKey();
    newf = getCost(problem, newx, storedb, newkey);
    cost_evaluations = 1;
    
    % Backtrack while no cost decrease is obtained.
    while newf >= f0
        
        % Reduce the step size,
        alpha = contraction_factor * alpha;
        
        % and look closer down the line
        newx = problem.M.retr(x, d, alpha);
        newkey = storedb.getNewKey();
        newf = getCost(problem, newx, storedb, newkey);
        cost_evaluations = cost_evaluations + 1;
        
        % Make sure we don't run out of budget
        if cost_evaluations >= max_ls_steps
            break;
        end
        
    end
    
    % If we got here without obtaining a decrease, we reject the step.
    % Equal cost is accepted, since if x is critical, it is important to
    % move away from x more than it is important to decrease the cost.
    if newf > f0
        alpha = 0;
        newx = x;
        newkey = key;
        newf = f0; %#ok<NASGU>
    end
    
    % As seen outside this function, stepsize is the size of the vector we
    % retract to make the step from x to newx. Since the step is alpha*d:
    stepsize = alpha * norm_d;
    
    % Return some statistics also, for possible analysis.
    lsstats.costevals = cost_evaluations;
    lsstats.stepsize = stepsize;
    lsstats.alpha = alpha;
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/linesearch/linesearch_hint.m
================================================
function [stepsize, newx, newkey, lsstats] = ...
             linesearch_hint(problem, x, d, f0, df0, options, storedb, key)
% Armijo line-search based on the line-search hint in the problem structure.
%
% function [stepsize, newx, newkey, lsstats] = 
%            linesearch_hint(problem, x, d, f0, df0, options, storedb, key)
%
% Base line-search algorithm for descent methods, based on a simple
% backtracking method. The search direction provided has to be a descent
% direction, as indicated by a negative df0 = directional derivative of f
% at x along d.
%
% The algorithm obtains an initial step size candidate from the problem
% structure, typically through the problem.linesearch function. If that
% step does not fulfill the Armijo sufficient decrease criterion, that step
% size is reduced geometrically until a satisfactory step size is obtained
% or until a failure criterion triggers. If the problem structure does not
% provide an initial alpha, then alpha = 1 is tried first.
% 
% Below, the step is constructed as alpha*d, and the step size is the norm
% of that vector, thus: stepsize = alpha*norm_d. The step is executed by
% retracting the vector alpha*d from the current point x, giving newx.
%
% Inputs/Outputs : see help for linesearch
%
% See also: steepestdescent conjugategradients linesearch

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 17, 2014.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   April 8, 2015 (NB):
%       Got rid of lsmem input/output.
%
%   July 20, 2017 (NB):
%       Now using alpha = 1 by default.
%
%   Aug. 28, 2017 (NB):
%       Adding two options: ls_backtrack and ls_force_decrease, both true
%       by default. Setting them to false can disable parts of the line
%       search that, respectively, execute an Armijo backtracking and
%       reject a cost increasing step.


    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

    % Backtracking default parameters. These can be overwritten in the
    % options structure which is passed to the solver.
    default_options.ls_contraction_factor = .5;
    default_options.ls_suff_decr = 1e-4;
    default_options.ls_max_steps = 25;
    default_options.ls_backtrack = true;
    default_options.ls_force_decrease = true;
    
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(default_options, options);
    
    contraction_factor = options.ls_contraction_factor;
    suff_decr = options.ls_suff_decr;
    max_ls_steps = options.ls_max_steps;
    
    % Obtain an initial guess at alpha from the problem structure. It is
    % assumed that the present line-search is only called when the problem
    % structure provides enough information for the call here to work.
    if canGetLinesearch(problem)
        alpha = getLinesearch(problem, x, d, storedb, key);
    else
        alpha = 1;
    end
    
    % Make the chosen step and compute the cost there.
    newx = problem.M.retr(x, d, alpha);
    newkey = storedb.getNewKey();
    newf = getCost(problem, newx, storedb, newkey);
    cost_evaluations = 1;
    
    % Backtrack while the Armijo criterion is not satisfied
    while options.ls_backtrack && newf > f0 + suff_decr*alpha*df0
        
        % Reduce the step size,
        alpha = contraction_factor * alpha;
        
        % and look closer down the line
        newx = problem.M.retr(x, d, alpha);
        newkey = storedb.getNewKey();
        newf = getCost(problem, newx, storedb, newkey);
        cost_evaluations = cost_evaluations + 1;
        
        % Make sure we don't run out of budget
        if cost_evaluations >= max_ls_steps
            break;
        end
        
    end
    
    % If we got here without obtaining a decrease, we reject the step.
    if options.ls_force_decrease && newf > f0
        alpha = 0;
        newx = x;
        newkey = key;
        newf = f0; %#ok<NASGU>
    end
    
    % As seen outside this function, stepsize is the size of the vector we
    % retract to make the step from x to newx. Since the step is alpha*d:
    norm_d = problem.M.norm(x, d);
    stepsize = alpha * norm_d;
    
    % Return some statistics also, for possible analysis.
    lsstats.costevals = cost_evaluations;
    lsstats.stepsize = stepsize;
    lsstats.alpha = alpha;
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/neldermead/centroid.m
================================================
function y = centroid(M, x)
% Attempts the computation of a centroid of a set of points on a manifold.
% 
% function y = centroid(M, x)
%
% M is a structure representing a manifold.
% x is a cell of points on that manifold.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 


    % For now, just apply a few steps of gradient descent for Karcher means
    
    n = numel(x);
    
    problem.M = M;
    
    problem.cost = @cost;
    function val = cost(y)
        val = 0;
        for i = 1 : n
            val = val + M.dist(y, x{i})^2;
        end
        val = val/2;
    end

    problem.grad = @grad;
    function g = grad(y)
        g = M.zerovec(y);
        for i = 1 : n
            g = M.lincomb(y, 1, g, -1, M.log(y, x{i}));
        end
    end

    % This line can be uncommented to check that the gradient is indeed
    % correct. This should always be the case if the dist and the log
    % functions in the manifold are correct.
    % checkgradient(problem); pause;
    
    query = warning('query', 'manopt:getHessian:approx');
    warning('off', 'manopt:getHessian:approx');
    options.verbosity = 0;
    options.maxiter = 15;
    y = trustregions(problem, x{randi(n)}, options);
    warning(query.state, 'manopt:getHessian:approx');

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/neldermead/neldermead.m
================================================
function [x, cost, info, options] = neldermead(problem, x, options)
% Nelder Mead optimization algorithm for derivative-free minimization.
%
% function [x, cost, info, options] = neldermead(problem)
% function [x, cost, info, options] = neldermead(problem, x0)
% function [x, cost, info, options] = neldermead(problem, x0, options)
% function [x, cost, info, options] = neldermead(problem, [], options)
%
% Apply a Nelder-Mead minimization algorithm to the problem defined in
% the problem structure, starting with the population x0 if it is provided
% (otherwise, a random population on the manifold is generated). A
% population is a cell containing points on the manifold. The number of
% elements in the cell must be dim+1, where dim is the dimension of the
% manifold: problem.M.dim().
%
% To specify options whilst not specifying an initial guess, give x0 as []
% (the empty matrix).
%
% This algorithm is a plain adaptation of the Euclidean Nelder-Mead method
% to the Riemannian setting. It comes with no convergence guarantees and
% there is room for improvement. In particular, we compute centroids as
% Karcher means, which seems overly expensive: cheaper forms of
% average-like quantities might work better.
% This solver is useful nonetheless for problems for which no derivatives
% are available, and it may constitute a starting point for the development
% of other Riemannian derivative-free methods.
%
% None of the options are mandatory. See in code for details.
%
% Requires problem.M.pairmean(x, y) to be defined (computes the average
% between two points, x and y).
%
% If options.statsfun is defined, it will receive a cell of points x (the
% current simplex being considered at that iteration), and, if required,
% one store structure corresponding to the best point, x{1}. The points are
% ordered by increasing cost: f(x{1}) <= f(x{2}) <= ... <= f(x{dim+1}),
% where dim = problem.M.dim().
%
% Based on http://www.optimization-online.org/DB_FILE/2007/08/1742.pdf.
%
% See also: manopt/solvers/pso/pso

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 4, 2015 (NB):
%       Working with the new StoreDB class system.
%       Clarified interactions with statsfun and store.
%
%   Nov. 11, 2016 (NB):
%       If options.verbosity is < 2, prints minimal output.

    
    % Verify that the problem description is sufficient for the solver.
    if ~canGetCost(problem)
        warning('manopt:getCost', ...
                'No cost provided. The algorithm will likely abort.');  
    end
    
    % Dimension of the manifold
    dim = problem.M.dim();

    % Set local defaults here
    localdefaults.storedepth = 0;                     % no need for caching
    localdefaults.maxcostevals = max(1000, 2*dim);
    localdefaults.maxiter = max(2000, 4*dim);
    
    localdefaults.reflection = 1;
    localdefaults.expansion = 2;
    localdefaults.contraction = .5;
    % forced to .5 to enable using pairmean functions in manifolds.
    % localdefaults.shrinkage = .5;
    
    % Merge global and local defaults, then merge w/ user options, if any.
    localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    % Start timing for initialization.
    timetic = tic();
    
    % If no initial simplex x is given by the user, generate one at random.
    if ~exist('x', 'var') || isempty(x)
        x = cell(dim+1, 1);
        for i = 1 : dim+1
            x{i} = problem.M.rand();
        end
    end
    
    % Create a store database and a key for each point.
    storedb = StoreDB(options.storedepth);
    key = cell(size(x));
    for i = 1 : dim+1;
        key{i} = storedb.getNewKey();
    end
    
    % Compute objective-related quantities for x, and setup a
    % function evaluations counter.
    costs = zeros(dim+1, 1);
    for i = 1 : dim+1
        costs(i) = getCost(problem, x{i}, storedb, key{i});
    end
    costevals = dim+1;
    
    % Sort simplex points by cost.
    [costs, order] = sort(costs);
    x = x(order);
    key = key(order);
    
    % Iteration counter.
    % At any point, iter is the number of fully executed iterations so far.
    iter = 0;
    
    % Save stats in a struct array info, and preallocate.
    % savestats will be called twice for the initial iterate (number 0),
    % which is unfortunate, but not problematic.
    stats = savestats();
    info(1) = stats;
    info(min(10000, options.maxiter+1)).iter = [];
    
    % Start iterating until stopping criterion triggers.
    while true
        
        % Make sure we don't use to much memory for the store database.
        storedb.purge();
        
        stats = savestats();
        info(iter+1) = stats; %#ok<AGROW>
        iter = iter + 1;
        
        % Start timing this iteration.
        timetic = tic();
        
        % Sort simplex points by cost.
        [costs, order] = sort(costs);
        x = x(order);
        key = key(order);

        % Log / display iteration information here.
        if options.verbosity >= 2
            fprintf('Cost evals: %7d\tBest cost: %+.4e\t', ...
                    costevals, costs(1));
        end
        
        % Run standard stopping criterion checks.
        [stop, reason] = stoppingcriterion(problem, x, options, info, iter);
    
        if stop
            if options.verbosity >= 1
                fprintf([reason '\n']);
            end
            break;
        end
        
        % Compute a centroid for the dim best points.
        xbar = centroid(problem.M, x(1:end-1));
        
        % Compute the direction for moving along the axis xbar - worst x.
        vec = problem.M.log(xbar, x{end});
        
        % Reflection step
        xr = problem.M.exp(xbar, vec, -options.reflection);
        keyr = storedb.getNewKey();
        costr = getCost(problem, xr, storedb, keyr);
        costevals = costevals + 1;
        
        % If the reflected point is honorable, drop the worst point,
        % replace it by the reflected point and start new iteration.
        if costr >= costs(1) && costr < costs(end-1)
            if options.verbosity >= 2
                fprintf('Reflection\n');
            end
            costs(end) = costr;
            x{end} = xr;
            key{end} = keyr;
            continue;
        end
        
        % If the reflected point is better than the best point, expand.
        if costr < costs(1)
            xe = problem.M.exp(xbar, vec, -options.expansion);
            keye = storedb.getNewKey();
            coste = getCost(problem, xe, storedb, keye);
            costevals = costevals + 1;
            if coste < costr
                if options.verbosity >= 2
                    fprintf('Expansion\n');
                end
                costs(end) = coste;
                x{end} = xe;
                key{end} = keye;
                continue;
            else
                if options.verbosity >= 2
                    fprintf('Reflection (failed expansion)\n');
                end
                costs(end) = costr;
                x{end} = xr;
                key{end} = keyr;
                continue;
            end
        end
        
        % If the reflected point is worse than the second to worst point,
		% contract.
        if costr >= costs(end-1)
            if costr < costs(end)
                % do an outside contraction
                xoc = problem.M.exp(xbar, vec, -options.contraction);
                keyoc = storedb.getNewKey();
                costoc = getCost(problem, xoc, storedb, keyoc);
                costevals = costevals + 1;
                if costoc <= costr
                    if options.verbosity >= 2
                        fprintf('Outside contraction\n');
                    end
                    costs(end) = costoc;
                    x{end} = xoc;
                    key{end} = keyoc;
                    continue;
                end
            else
                % do an inside contraction
                xic = problem.M.exp(xbar, vec, options.contraction);
                keyic = storedb.getNewKey();
                costic = getCost(problem, xic, storedb, keyic);
                costevals = costevals + 1;
                if costic <= costs(end)
                    if options.verbosity >= 2
                        fprintf('Inside contraction\n');
                    end
                    costs(end) = costic;
                    x{end} = xic;
                    key{end} = keyic;
                    continue;
                end
            end
        end
        
        % If we get here, shrink the simplex around x{1}.
        if options.verbosity >= 2
            fprintf('Shrinkage\n');
        end
        for i = 2 : dim+1
            x{i} = problem.M.pairmean(x{1}, x{i});
            key{i} = storedb.getNewKey();
            costs(i) = getCost(problem, x{i}, storedb, key{i});
        end
        costevals = costevals + dim;
        
    end
    
    
    info = info(1:iter);
    
    % Iteration done: return only the best point found.
    cost = costs(1);
    x = x{1};
    key = key{1};
    
    
    % Routine in charge of collecting the current iteration stats.
    function stats = savestats()
        stats.iter = iter;
        stats.cost = costs(1);
        stats.costevals = costevals;
        if iter == 0
            stats.time = toc(timetic);
        else
            stats.time = info(iter).time + toc(timetic);
        end
        % The statsfun can only possibly receive one store structure. We
        % pass the key to the best point, so that the best point's store
        % will be passed. But the whole cell x of points is passed through.
        stats = applyStatsfun(problem, x, storedb, key{1}, options, stats);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/preconditioners/preconhessiansolve.m
================================================
function preconfun = preconhessiansolve(problem, options)
% Preconditioner based on the inverse Hessian, by solving linear systems.
%
% function preconfun = preconhessiansolve(problem)
% function preconfun = preconhessiansolve(problem, options)
%
% Input:
%
% A Manopt problem structure (already containing the manifold and enough
% information to compute the Hessian of the cost) and an options structure
% (optional, currently ignored). Notice that if the Hessian is not positive
% definite, then its inverse is not positive definite either and this
% preconditioner is not suitable.
%
% If the Hessian cannot be computed on 'problem', a warning is issued. An
% approximation of the Hessian will be used instead, and the present
% preconditioner will attempt to invert that (although it may not be a
% linear operator). If no approximate Hessian is provided either, a generic
% approximation is used. Behavior is unspecified.
%
% Output:
% 
% Returns a function handle, encapsulating a generic preconditioner of the
% Hessian based on solving linear systems of the form:
%   Hessian(x)[preconfun(x, xdot)] = xdot,
% where x is the point on the manifold, xdot is the input to the
% preconditioner (a tangent vector) and preconfun(x, xdot) is returned
% (also a tangent vector). The solve may be approximate.
% 
% The returned preconfun has this calling pattern:
% 
%   function precxdot = preconfun(x, xdot)
%   function precxdot = preconfun(x, xdot, storedb)
%   function precxdot = preconfun(x, xdot, storedb, key)
% 
% x is a point on the manifold problem.M, xdot is a tangent vector to that
% manifold at x, storedb is a StoreDB object, and key is the StoreDB key to
% point x.
%
% Usage:
%
% Typically, the user will set problem.M and other fields to define the
% cost, the gradient and the Hessian (typically, problem.cost, problem.grad
% and problem.hess, or problem.egrad and problem.ehess). Then, to use this
% generic purpose Hessian preconditioner:
%
%   problem.precon = preconhessiansolve(problem, options);
%
% Passing that problem structure to the conjugategradients solver
% (which uses preconditioning) configured in steepest descent mode results
% in a type of Riemannian Newton method.
%
% See also: conjugategradients

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 9, 2015.
% Contributors: 
% Change log: 

    % Check availability of the Hessian, or at least of an approximation.
    if ~canGetHessian(problem) && ~canGetApproxHessian(problem)
        % Note: we do not give a warning if an approximate Hessian is
        % explicitly given in the problem description, as in that case the
        % user seems to be aware of the issue.
        warning('manopt:getHessian:approx', ...
               ['No Hessian provided. Using an FD approximation instead.\n' ...
                'To disable this warning: warning(''off'', ''manopt:getHessian:approx'')']);
        problem.approxhess = approxhessianFD(problem);
    end

    % Set local defaults here, and merge with user options, if any.
    localdefaults = struct();
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);

    % Build and return the function handle here. This extra construct via
    % funhandle makes it possible to make storedb and key optional.
    preconfun = @funhandle;
    function precxdot = funhandle(x, xdot, storedb, key)
        % Allow omission of the key, and even of storedb.
        if ~exist('key', 'var')
            if ~exist('storedb', 'var')
                storedb = StoreDB();
            end
            key = storedb.getNewKey();
        end 
        precxdot = hessiansolvehelper(options, problem, x, xdot, ...
                                      storedb, key);
    end
    
end


function precxdot = hessiansolvehelper(options, problem, x, xdot, storedb, key)
% This function does the actual work.
    
    % Exclude the case where xdot is zero
    norm_xdot = problem.M.norm(x, xdot);
    if norm_xdot < eps
        precxdot = problem.M.zerovec(x);
        return;
    end
    
    % Get a shorthand for the Hessian of the cost on M at x.
    hessian = @(u) getHessian(problem, x, u, storedb, key);
    
    % Setup an optimization problem on the tangent space to problem.M at x.
    M = problem.M;
    tgtspace = tangentspacefactory(M, x);
    prblm.M = tgtspace;
    prblm.cost = @cost;
    prblm.grad = @grad;
    prblm.hess = @(u, udot) 2*hessian(hessian(udot))/norm_xdot;
    
    function [f, store] = cost(u, store)
        if ~isfield(store, 'residue')
            Hu = hessian(u);
            store.residue = M.lincomb(x, 1, Hu, -1, xdot);
        end
        f = M.norm(x, store.residue).^2 / norm_xdot;
    end
    function [g, store] = grad(u, store)
        if ~isfield(store, 'residue')
            Hu = hessian(u);
            store.residue = M.lincomb(x, 1, Hu, -1, xdot);
        end
        g = 2 * hessian(store.residue) / norm_xdot;
    end
    
    % checkgradient(prblm); pause;
    % checkhessian(prblm); pause;
    
    localdefaults.solver = @trustregions;
    localdefaults.verbosity = 0;
    % Merge local defaults with user options, if any.
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    % Solve the linear system by solving the optimization problem.
    precxdot = manoptsolve(prblm, M.zerovec(), options);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/pso/pso.m
================================================
function [xbest, fbest, info, options] = pso(problem, x, options)
% Particle swarm optimization (PSO) for derivative-free minimization.
%
% function [x, cost, info, options] = pso(problem)
% function [x, cost, info, options] = pso(problem, x0)
% function [x, cost, info, options] = pso(problem, x0, options)
% function [x, cost, info, options] = pso(problem, [], options)
%
% Apply the Particle Swarm Optimization minimization algorithm to
% the problem defined in the problem structure, starting with the
% population x0 if it is provided (otherwise, a random population on the
% manifold is generated). A population is a cell containing points on the
% manifold. The number of elements in the cell must match the parameter
% options.populationsize.
%
% To specify options whilst not specifying an initial guess, give x0 as []
% (the empty matrix).
%
% None of the options are mandatory. See in code for details.
%
% Based on the original PSO description in
%   http://particleswarm.info/nn951942.ps.
%
% See also: manopt/solvers/neldermead/neldermead

% This file is part of Manopt: www.manopt.org.
% Original author: Pierre Borckmans, Dec. 30, 2012.
% Contributors: Bamdev Mishra, June 18, 2014.
% Change log:
%
%   June 18, 2014 (BM) :
%       Modified for handling product manifolds. Still need overall cleanup
%       to avoid potential issues, in particular wrt logarithms.
%
%   June 23, 2014 (NB) :
%       Added some logic for handling of the populationsize option.
%
%   April 5, 2015 (NB):
%       Working with the new StoreDB class system. The code keeps track of
%       storedb keys for all points, even though it is not strictly
%       necessary. This extra bookkeeping should help maintaining the code.
    
    
    % Verify that the problem description is sufficient for the solver.
    if ~canGetCost(problem)
        warning('manopt:getCost', ...
            'No cost provided. The algorithm will likely abort.');
    end
    
    % Dimension of the manifold
    dim = problem.M.dim();
    
    % Set local defaults here
    localdefaults.storedepth = 0;                   % no need for caching
    localdefaults.maxcostevals = max(5000, 2*dim);
    localdefaults.maxiter = max(500, 4*dim);
    
    localdefaults.populationsize = min(40, 10*dim);
    localdefaults.nostalgia = 1.4;
    localdefaults.social = 1.4;
    
    % Merge global and local defaults, then merge w/ user options, if any.
    localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    
    if ~isfield(problem.M, 'log') % BM
        error(['The manifold problem.M must provide a logarithmic map, ' ...
               'M.log(x, y). An approximate logarithm will do too.']);
    end
    
    % Start timing for initialization
    timetic = tic();
    
    % If no initial population x is given by the user,
    % generate one at random.
    if ~exist('x', 'var') || isempty(x)
        x = cell(options.populationsize, 1);
        for i = 1 : options.populationsize
            x{i} = problem.M.rand();
        end
    else
        if ~iscell(x)
            error('The initial guess x0 must be a cell (a population).');
        end
        if length(x) ~= options.populationsize
            options.populationsize = length(x);
            warning('manopt:pso:size', ...
                    ['The option populationsize was forced to the size' ...
                     ' of the given initial population x0.']);
        end
    end
    
    
    % Create a store database and a key for each point x{i}
    storedb = StoreDB(options.storedepth);
    xkey = cell(size(x));
    for i = 1 : numel(x)
        xkey{i} = storedb.getNewKey();
    end
    
    % Initialize personal best positions to the initial population
    y = x;
    ykey = xkey;
    
    % Save a copy of the swarm at the previous iteration
    xprev = x;
    xprevkey = xkey; %#ok<NASGU>
    
    % Initialize velocities for each particle
    v = cell(size(x));
    for i = 1 : numel(x)
        % random velocity to improve initial exploration
        v{i} = problem.M.randvec(x{i});
        % or null velocity
        % v{i} = problem.M.zerovec();
    end
    
    % Compute cost for each particle xi,
    % initialize personal best costs,
    % and setup a function evaluations counter.
    costs = zeros(size(x));
    for i = 1 : numel(x)
        costs(i) = getCost(problem, x{i}, storedb, xkey{i});
    end
    fy = costs;
    costevals = options.populationsize;
    
    % Identify the best particle and store its cost/position
    [fbest, imin] = min(costs);
    xbest = x{imin};
    xbestkey = xkey{imin}; %#ok<NASGU>
    
    % Iteration counter (at any point, iter is the number of fully executed
    % iterations so far)
    iter = 0;
    
    % Save stats in a struct array info, and preallocate.
    % savestats will be called twice for the initial iterate (number 0),
    % which is unfortunate, but not problematic.
    stats = savestats();
    info(1) = stats;
    info(min(10000, options.maxiter+1)).iter = [];
    
    % Start iterating until stopping criterion triggers
    while true
        
        stats = savestats();
        info(iter+1) = stats; %#ok<AGROW>
        iter = iter + 1;
        
        % Make sure we don't use too much memory for the store database
        storedb.purge();
        
        % Log / display iteration information here.
        if options.verbosity >= 2
            fprintf('Cost evals: %7d\tBest cost: %+.8e\n', costevals, fbest);
        end
        
        % Start timing this iteration
        timetic = tic();
        
        % BM: Run standard stopping criterion checks.
        % BM: Stop if any particle triggers a stopping criterion.
        for i = numel(x)
            [stop, reason] = stoppingcriterion(problem, x{i}, options, info, iter);
            if stop
                break;
            end
        end
        
        if stop
            if options.verbosity >= 1
                fprintf([reason '\n']);
            end
            break;
        end
        
        
        % Compute the inertia factor
        % (linearly decreasing from .9 to .4, from iter=0 to maxiter)
        w = 0.4 + 0.5*(1-iter/options.maxiter);
        
        % Compute velocities
        for i = 1 : numel(x)
            
            % Get the position and past best position of particle i
            xi = x{i};
            yi = y{i};
            
            % Get the previous position and velocity of particle i
            xiprev = xprev{i};
            vi = v{i};
            
            % Compute new velocity of particle i,
            % composed of 3 contributions
            inertia = problem.M.lincomb(xi, w , problem.M.transp(xiprev, xi, vi));
            nostalgia = problem.M.lincomb(xi, rand(1)*options.nostalgia, problem.M.log(xi, yi) );
            social = problem.M.lincomb(xi, rand(1) * options.social, problem.M.log(xi, xbest));
            
            v{i} = problem.M.lincomb(xi, 1, inertia, 1, problem.M.lincomb(xi, 1, nostalgia, 1, social));
            
        end
        
        % Backup the current swarm positions
        xprev = x;
        xprevkey = xkey; %#ok<NASGU>
        
        % Update positions, personal bests and global best
        for i = 1 : numel(x)
            % compute new position of particle i
            x{i} = problem.M.retr(x{i}, v{i});
            xkey{i} = storedb.getNewKey();
            % compute new cost of particle i
            fxi = getCost(problem, x{i}, storedb, xkey{i});
            costevals = costevals + 1;
            
            % update costs of the swarm
            costs(i) = fxi;
            % update self-best if necessary
            if fxi < fy(i)
                % update self-best cost and position
                fy(i) = fxi;
                y{i} = x{i};
                ykey{i} = xkey{i};
                % update global-best if necessary
                if fy(i) < fbest
                    fbest = fy(i);
                    xbest = y{i};
                    xbestkey = ykey{i}; %#ok<NASGU>
                end
            end
        end
    end
    
    
    info = info(1:iter);
     
    % Routine in charge of collecting the current iteration stats
    function stats = savestats()
        stats.iter = iter;
        stats.cost = fbest;
        stats.costevals = costevals;
        stats.x = x;
        stats.v = v;
        stats.xbest = xbest;
        if iter == 0
            stats.time = toc(timetic);
        else
            stats.time = info(iter).time + toc(timetic);
        end
        
        % BM: Begin storing user defined stats for the entire population
        num_old_fields = size(fieldnames(stats), 1);
        trialstats = applyStatsfun(problem, x{1}, storedb, xkey{1}, options, stats);% BM
        new_fields = fieldnames(trialstats);
        num_new_fields = size(fieldnames(trialstats), 1);
        num_additional_fields =  num_new_fields - num_old_fields; % User has defined new fields
        for jj = 1 : num_additional_fields % New fields added
            tempfield = new_fields(num_old_fields + jj);
            stats.(char(tempfield)) = cell(options.populationsize, 1);
        end
        for ii = 1 : options.populationsize % Adding information for each element of the population
            tempstats = applyStatsfun(problem, x{ii}, storedb, xkey{ii}, options, stats);
            for jj = 1 : num_additional_fields
                tempfield = new_fields(num_old_fields + jj);
                tempfield_value = tempstats.(char(tempfield));
                stats.(char(tempfield)){ii} = tempfield_value;
            end
        end
        % BM: End storing
       
    end
    
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/steepestdescent/steepestdescent.m
================================================
function [x, cost, info, options] = steepestdescent(problem, x, options)
% Steepest descent (gradient descent) minimization algorithm for Manopt.
%
% function [x, cost, info, options] = steepestdescent(problem)
% function [x, cost, info, options] = steepestdescent(problem, x0)
% function [x, cost, info, options] = steepestdescent(problem, x0, options)
% function [x, cost, info, options] = steepestdescent(problem, [], options)
%
% Apply the steepest descent minimization algorithm to the problem defined
% in the problem structure, starting at x0 if it is provided (otherwise, at
% a random point on the manifold). To specify options whilst not specifying
% an initial guess, give x0 as [] (the empty matrix).
%
% In most of the examples bundled with the toolbox (see link below), the
% solver can be replaced by the present one if need be.
%
% The outputs x and cost are the best reached point on the manifold and its
% cost. The struct-array info contains information about the iterations:
%   iter : the iteration number (0 for the initial guess)
%   cost : cost value
%   time : elapsed time in seconds
%   gradnorm : Riemannian norm of the gradient
%   stepsize : norm of the last tangent vector retracted
%   linesearch : information logged by options.linesearch
%   And possibly additional information logged by options.statsfun.
% For example, type [info.gradnorm] to obtain a vector of the successive
% gradient norms reached.
%
% The options structure is used to overwrite the default values. All
% options have a default value and are hence optional. To force an option
% value, pass an options structure with a field options.optionname, where
% optionname is one of the following and the default value is indicated
% between parentheses:
%
%   tolgradnorm (1e-6)
%       The algorithm terminates if the norm of the gradient drops below this.
%   maxiter (1000)
%       The algorithm terminates if maxiter iterations have been executed.
%   maxtime (Inf)
%       The algorithm terminates if maxtime seconds elapsed.
%   minstepsize (1e-10)
%       The algorithm terminates if the linesearch returns a displacement
%       vector (to be retracted) smaller in norm than this value.
%   linesearch (@linesearch or @linesearch_hint)
%       Function handle to a line search function. The options structure is
%       passed to the line search too, so you can pass it parameters. See
%       each line search's documentation for info. Another available line
%       search in manopt is @linesearch_adaptive, in
%       /manopt/linesearch/linesearch_adaptive.m
%       If the problem structure includes a line search hint, then the
%       default line search used is @linesearch_hint.
%   statsfun (none)
%       Function handle to a function that will be called after each
%       iteration to provide the opportunity to log additional statistics.
%       They will be returned in the info struct. See the generic Manopt
%       documentation about solvers for further information.
%   stopfun (none)
%       Function handle to a function that will be called at each iteration
%       to provide the opportunity to specify additional stopping criteria.
%       See the generic Manopt documentation about solvers for further
%       information.
%   verbosity (3)
%       Integer number used to tune the amount of output the algorithm
%       generates during execution (mostly as text in the command window).
%       The higher, the more output. 0 means silent.
%   storedepth (2)
%       Maximum number of different points x of the manifold for which a
%       store structure will be kept in memory in the storedb. If the
%       caching features of Manopt are not used, this is irrelevant. For
%       the SD algorithm, a store depth of 2 should always be sufficient.
%
%
% See also: conjugategradient trustregions manopt/solvers/linesearch manopt/examples

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

    
    % Verify that the problem description is sufficient for the solver.
    if ~canGetCost(problem)
        warning('manopt:getCost', ...
                'No cost provided. The algorithm will likely abort.');
    end
    if ~canGetGradient(problem) && ~canGetApproxGradient(problem)
        % Note: we do not give a warning if an approximate gradient is
        % explicitly given in the problem description, as in that case the
        % user seems to be aware of the issue.
        warning('manopt:getGradient:approx', ...
               ['No gradient provided. Using an FD approximation instead (slow).\n' ...
                'It may be necessary to increase options.tolgradnorm.\n' ...
                'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']);
        problem.approxgrad = approxgradientFD(problem);
    end
    
    % Set local defaults here
    localdefaults.minstepsize = 1e-10;
    localdefaults.maxiter = 1000;
    localdefaults.tolgradnorm = 1e-6;
    
    % Depending on whether the problem structure specifies a hint for
    % line-search algorithms, choose a default line-search that works on
    % its own (typical) or that uses the hint.
    if ~canGetLinesearch(problem)
        localdefaults.linesearch = @linesearch;
    else
        localdefaults.linesearch = @linesearch_hint;
    end
    
    % Merge global and local defaults, then merge w/ user options, if any.
    localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    timetic = tic();
    
    % If no initial point x is given by the user, generate one at random.
    if ~exist('x', 'var') || isempty(x)
        x = problem.M.rand();
    end
    
    % Create a store database and get a key for the current x
    storedb = StoreDB(options.storedepth);
    key = storedb.getNewKey();
    
    % Compute objective-related quantities for x
    [cost, grad] = getCostGrad(problem, x, storedb, key);
    gradnorm = problem.M.norm(x, grad);
    
    % Iteration counter.
    % At any point, iter is the number of fully executed iterations so far.
    iter = 0;
    
    % Save stats in a struct array info, and preallocate.
    stats = savestats();
    info(1) = stats;
    info(min(10000, options.maxiter+1)).iter = [];
    
    if options.verbosity >= 2
        fprintf(' iter\t               cost val\t    grad. norm\n');
    end
    
    % Start iterating until stopping criterion triggers
    while true

        % Display iteration information
        if options.verbosity >= 2
            fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm);
        end
        
        % Start timing this iteration
        timetic = tic();
        
        % Run standard stopping criterion checks
        [stop, reason] = stoppingcriterion(problem, x, options, ...
                                                             info, iter+1);
        
        % If none triggered, run specific stopping criterion check
        if ~stop && stats.stepsize < options.minstepsize
            stop = true;
            reason = sprintf(['Last stepsize smaller than minimum '  ...
                              'allowed; options.minstepsize = %g.'], ...
                              options.minstepsize);
        end
    
        if stop
            if options.verbosity >= 1
                fprintf([reason '\n']);
            end
            break;
        end

        % Pick the descent direction as minus the gradient
        desc_dir = problem.M.lincomb(x, -1, grad);
        
        % Execute the line search
        [stepsize, newx, newkey, lsstats] = options.linesearch( ...
                             problem, x, desc_dir, cost, -gradnorm^2, ...
                             options, storedb, key);
        
        % Compute the new cost-related quantities for x
        [newcost, newgrad] = getCostGrad(problem, newx, storedb, newkey);
        newgradnorm = problem.M.norm(newx, newgrad);
        
        % Make sure we don't use too much memory for the store database
        storedb.purge();
        
        % Transfer iterate info
        x = newx;
        key = newkey;
        cost = newcost;
        grad = newgrad;
        gradnorm = newgradnorm;
        
        % iter is the number of iterations we have accomplished.
        iter = iter + 1;
        
        % Log statistics for freshly executed iteration
        stats = savestats();
        info(iter+1) = stats;
        
    end
    
    
    info = info(1:iter+1);

    if options.verbosity >= 1
        fprintf('Total time is %f [s] (excludes statsfun)\n', ...
                info(end).time);
    end
    
    
    % Routine in charge of collecting the current iteration stats
    function stats = savestats()
        stats.iter = iter;
        stats.cost = cost;
        stats.gradnorm = gradnorm;
        if iter == 0
            stats.stepsize = NaN;
            stats.time = toc(timetic);
            stats.linesearch = [];
        else
            stats.stepsize = stepsize;
            stats.time = info(iter).time + toc(timetic);
            stats.linesearch = lsstats;
        end
        stats = applyStatsfun(problem, x, storedb, key, options, stats);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/stochasticgradient/stepsize_sg.m
================================================
function [stepsize, newx, newkey, ssstats] = ...
                    stepsize_sg(problem, x, d, iter, options, storedb, key) %#ok<INUSD>
% Standard step size selection algorithm for the stochastic gradient method
%
% Given a problem structure, a point x on the manifold problem.d and a
% tangent vector d at x, produces a stepsize (a positive real number) and a
% new point newx obtained by retraction -stepsize*d at x. Additional inputs
% include iter (the iteration number of x, where 0 marks the initial
% guess), an options structure, a storedb database and the key of point x
% in that database. Additional outputs include the key of newx in the
% database, newkey, as well as a structure ssstats collecting statistics
% about the work done during the call to this function.
%
% See in code for the role of available options:
%    options.stepsize_type
%    options.stepsize_init
%    options.stepsize_lambda
%    options.stepsize_decaysteps
%
% This function may create and maintain a structure called sssgmem inside
% storedb.internal. This gives the function the opportunity to remember
% what happened in previous calls.
%
% See also: stochasticgradient

% This file is part of Manopt: www.manopt.org.
% Original authors: Bamdev Mishra and Nicolas Boumal, March 30, 2017.
% Contributors: Hiroyuki Kasai and Hiroyuki Sato.
% Change log: 


    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey(); %#ok<NASGU>
    end
    

    % Initial stepsize guess.
    default_options.stepsize_init = 0.1;
    % Stepsize evolution type. Options are 'decay', 'fix' and 'hybrid'.
    default_options.stepsize_type = 'decay';
    % If stepsize_type = 'decay' or 'hybrid', lambda is a weighting factor.
    default_options.stepsize_lambda = 0.1;
    % If stepsize_type = 'hybrid', decaysteps states for how many
    % iterations the step size decays before becoming constant.
    default_options.stepsize_decaysteps = 100;
    
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(default_options, options);
    

    type = options.stepsize_type;
    init = options.stepsize_init;
    lambda = options.stepsize_lambda;
    decaysteps = options.stepsize_decaysteps;

    
    switch lower(type)
        
        % Step size decays as O(1/iter).
        case 'decay'
            stepsize = init / (1 + init*lambda*iter);

        % Step size is fixed.
        case {'fix', 'fixed'}
            stepsize = init;

        % Step size decays only for the few initial iterations.
        case 'hybrid'
            if iter < decaysteps
                stepsize = init / (1 + init*lambda*iter);
            else
                stepsize = init / (1 + init*lambda*decaysteps);
            end

        otherwise
            error(['Unknown options.stepsize_type. ' ...
                   'Should be ''fix'', ''decay'' or ''hybrid''.']);
               
    end

    % Store some information.
    ssstats = struct();
    ssstats.stepsize = stepsize;

    % Compute the new point and give it a key.
    newx = problem.M.retr(x, d, -stepsize);
    newkey = storedb.getNewKey();

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/stochasticgradient/stochasticgradient.m
================================================
function [x, info, options] = stochasticgradient(problem, x, options)
% Stochastic gradient (SG) minimization algorithm for Manopt.
%
% function [x, info, options] = stochasticgradient(problem)
% function [x, info, options] = stochasticgradient(problem, x0)
% function [x, info, options] = stochasticgradient(problem, x0, options)
% function [x, info, options] = stochasticgradient(problem, [], options)
%
% Apply the Riemannian stochastic gradient algorithm to the problem defined
% in the problem structure, starting at x0 if it is provided (otherwise, at
% a random point on the manifold). To specify options whilst not specifying
% an initial guess, give x0 as [] (the empty matrix).
%
% The problem structure must contain the following fields:
%
%  problem.M:
%       Defines the manifold to optimize over, given by a factory.
%
%  problem.partialgrad or problem.partialegrad (or equivalent)
%       Describes the partial gradients of the cost function. If the cost
%       function is of the form f(x) = sum_{k=1}^N f_k(x),
%       then partialegrad(x, K) = sum_{k \in K} grad f_k(x).
%       As usual, partialgrad must define the Riemannian gradient, whereas
%       partialegrad defines a Euclidean (classical) gradient which will be
%       converted automatically to a Riemannian gradient. Use the tool
%       checkgradient(problem) to check it.
%
%  problem.ncostterms
%       An integer specifying how many terms are in the cost function (in
%       the example above, that would be N.)
%
% Importantly, the cost function itself needs not be specified.
%
% Some of the options of the solver are specific to this file. Please have
% a look inside the code.
%
% To record the value of the cost function or the norm of the gradient for
% example (which are statistics the algorithm does not require and hence
% does not compute by default), one can set the following options:
%
%   metrics.cost = @(problem, x) getCost(problem, x);
%   metrics.gradnorm = @(problem, x) problem.M.norm(x, getGradient(problem, x));
%   options.statsfun = statsfunhelper(metrics);
%
% Important caveat: stochastic algorithms usually return an average of the
% last few iterates. Computing averages on manifolds can be expensive.
% Currently, this solver does not compute averages and simply returns the
% last iterate. Using options.statsfun, it is possible for the user to
% compute averages manually. If you have ideas on how to do this
% generically, we welcome feedback. In particular, approximate means could
% be computed with M.pairmean which is available in many geometries.
%
% See also: steepestdescent

% This file is part of Manopt: www.manopt.org.
% Original authors: Bamdev Mishra <bamdevm@gmail.com>,
%                   Hiroyuki Kasai <kasai@is.uec.ac.jp>, and
%                   Hiroyuki Sato <hsato@ms.kagu.tus.ac.jp>, 22 April 2016.
% Contributors: Nicolas Boumal
% Change log: 
    

    % Verify that the problem description is sufficient for the solver.
    if ~canGetPartialGradient(problem)
        warning('manopt:getPartialGradient', ...
         'No partial gradient provided. The algorithm will likely abort.');
    end
    
   
    % Set local default
    localdefaults.maxiter = 1000;       % Maximum number of iterations
    localdefaults.batchsize = 1;        % Batchsize (# cost terms per iter)
    localdefaults.verbosity = 2;        % Output verbosity (0, 1 or 2)
    localdefaults.storedepth = 20;      % Limit amount of caching
    
    % Check stopping criteria and save stats every checkperiod iterations.
    localdefaults.checkperiod = 100;
    
    % stepsizefun is a function implementing a step size selection
    % algorithm. See that function for help with options, which can be
    % specified in the options structure passed to the solver directly.
    localdefaults.stepsizefun = @stepsize_sg;
    
    % Merge global and local defaults, then merge w/ user options, if any.
    localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    
    assert(options.checkperiod >= 1, ...
                 'options.checkperiod must be a positive integer (>= 1).');
    
    
    % If no initial point x is given by the user, generate one at random.
    if ~exist('x', 'var') || isempty(x)
        x = problem.M.rand();
    end
    
    % Create a store database and get a key for the current x
    storedb = StoreDB(options.storedepth);
    key = storedb.getNewKey();
    
    
    % Elapsed time for the current set of iterations, where a set of
    % iterations comprises options.checkperiod iterations. We do not
    % count time spent for such things as logging statistics, as these are
    % not relevant to the actual optimization process.
    elapsed_time = 0;
    
    % Total number of completed steps
    iter = 0;
    
    
    % Total number of saved stats at this point.
    savedstats = 0;
    
    % Collect and save stats in a struct array info, and preallocate.
    stats = savestats();
    info(1) = stats;
    savedstats = savedstats + 1;
    if isinf(options.maxiter)
        % We trust that if the user set maxiter = inf, then they defined
        % another stopping criterion.
        preallocate = 1e5;
    else
        preallocate = ceil(options.maxiter / options.checkperiod) + 1;
    end
    info(preallocate).iter = [];
    
    
    % Display information header for the user.
    if options.verbosity >= 2
        fprintf('    iter       time [s]       step size\n');
    end
    
    
    % Main loop.
    stop = false;
    while iter < options.maxiter
        
        % Record start time.
        start_time = tic();
        
        % Draw the samples with replacement.
        idx_batch = randi(problem.ncostterms, options.batchsize, 1);
        
        % Compute partial gradient on this batch.
        pgrad = getPartialGradient(problem, x, idx_batch, storedb, key);
        
        % Compute a step size and the corresponding new point x.
        [stepsize, newx, newkey, ssstats] = ...
                           options.stepsizefun(problem, x, pgrad, iter, ...
                                               options, storedb, key);
        
        % Make the step.
        x = newx;
        key = newkey;
        
        % Total number of completed steps.
        iter = iter + 1;
        
        % Make sure we do not use too much memory for the store database.
        storedb.purge();
        
        % Elapsed time doing actual optimization work so far in this
        % set of options.checkperiod iterations.
        elapsed_time = elapsed_time + toc(start_time);
        
        
        % Check stopping criteria and save stats every checkperiod iters.
        if mod(iter, options.checkperiod) == 0
            
            % Log statistics for freshly executed iteration.
            stats = savestats();
            info(savedstats+1) = stats;
            savedstats = savedstats + 1;
            
            % Reset timer.
            elapsed_time = 0;
            
            % Print output.
            if options.verbosity >= 2
                fprintf('%8d     %10.2f       %.3e\n', ...
                                               iter, stats.time, stepsize);
            end
            
            % Run standard stopping criterion checks.
            [stop, reason] = stoppingcriterion(problem, x, ...
                                               options, info, savedstats);
            if stop
                if options.verbosity >= 1
                    fprintf([reason '\n']);
                end
                break;
            end
        
        end

    end
    
    
    % Keep only the relevant portion of the info struct-array.
    info = info(1:savedstats);
    
    
    % Display a final information message.
    if options.verbosity >= 1
        if ~stop
            % We stopped not because of stoppingcriterion but because the
            % loop came to an end, which means maxiter triggered.
            msg = 'Max iteration count reached; options.maxiter = %g.\n';
            fprintf(msg, options.maxiter);
        end
        fprintf('Total time is %f [s] (excludes statsfun)\n', ...
                info(end).time + elapsed_time);
    end
    
    
    % Helper function to collect statistics to be saved at
    % index checkperiodcount+1 in info.
    function stats = savestats()
        stats.iter = iter;
        if savedstats == 0
            stats.time = 0;
            stats.stepsize = NaN;
            stats.stepsize_stats = [];
        else
            stats.time = info(savedstats).time + elapsed_time;
            stats.stepsize = stepsize;
            stats.stepsize_stats = ssstats;
        end
        stats = applyStatsfun(problem, x, storedb, key, options, stats);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/trustregions/license for original GenRTR code.txt
================================================
Copyright (c) 2007,2012 Christopher G. Baker, Pierre-Antoine Absil, Kyle A. Gallivan
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the names of the contributors nor of their affiliated 
      institutions may be used to endorse or promote products
      derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

For questions, please contact Chris Baker (chris@cgbaker.net)


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/trustregions/tCG.m
================================================
function [eta, Heta, inner_it, stop_tCG] ...
                 = tCG(problem, x, grad, eta, Delta, options, storedb, key)
% tCG - Truncated (Steihaug-Toint) Conjugate-Gradient method
% minimize <eta,grad> + .5*<eta,Hess(eta)>
% subject to <eta,eta>_[inverse precon] <= Delta^2
%
% See also: trustregions

% This file is part of Manopt: www.manopt.org.
% This code is an adaptation to Manopt of the original GenRTR code:
% RTR - Riemannian Trust-Region
% (c) 2004-2007, P.-A. Absil, C. G. Baker, K. A. Gallivan
% Florida State University
% School of Computational Science
% (http://www.math.fsu.edu/~cbaker/GenRTR/?page=download)
% See accompanying license file.
% The adaptation was executed by Nicolas Boumal.
%
% Change log:
%
%   NB Feb. 12, 2013:
%       We do not project r back to the tangent space anymore: it was not
%       necessary, and as of Manopt 1.0.1, the proj operator does not
%       coincide with this notion anymore.
%
%   NB April 3, 2013:
%       tCG now also returns Heta, the Hessian at x along eta. Additional
%       esthetic modifications.
%
%   NB Dec. 2, 2013:
%       If options.useRand is activated, we now make sure the preconditio-
%       ner is not used, as was originally intended in GenRTR. In time, we
%       may want to investigate whether useRand can be modifed to work well
%       with preconditioning too.
%
%   NB Jan. 9, 2014:
%       Now checking explicitly for model decrease at each iteration. The
%       first iteration is a Cauchy point, which necessarily realizes a
%       decrease of the model cost. If a model increase is witnessed
%       (which is theoretically impossible if a linear operator is used for
%       the Hessian approximation), then we return the previous eta. This
%       ensures we always achieve at least the Cauchy decrease, which
%       should be sufficient for convergence.
%
%   NB Feb. 17, 2015:
%       The previous update was in effect verifying that the current eta
%       performed at least as well as the first eta (the Cauchy step) with
%       respect to the model cost. While this is an acceptable strategy,
%       the documentation (and the original intent) was to ensure a
%       monotonic decrease of the model cost at each new eta. This is now
%       the case, with the added line: "model_value = new_model_value;".
%
%   NB April 3, 2015:
%       Works with the new StoreDB class system.


% All terms involving the trust-region radius will use an inner product
% w.r.t. the preconditioner; this is because the iterates grow in
% length w.r.t. the preconditioner, guaranteeing that we will not
% re-enter the trust-region.
%
% The following recurrences for Prec-based norms and inner
% products come from [CGT2000], pg. 205, first edition.
% Below, P is the preconditioner.
%
% <eta_k,P*delta_k> = 
%          beta_k-1 * ( <eta_k-1,P*delta_k-1> + alpha_k-1 |delta_k-1|^2_P )
% |delta_k|^2_P = <r_k,z_k> + beta_k-1^2 |delta_k-1|^2_P
%
% therefore, we need to keep track of
% 1)   |delta_k|^2_P
% 2)   <eta_k,P*delta_k> = <eta_k,delta_k>_P
% 3)   |eta_k  |^2_P
%
% initial values are given by:
%    |delta_0|_P = <r,z>
%    |eta_0|_P   = 0
%    <eta_0,delta_0>_P = 0
% because we take eta_0 = 0 (if useRand = false).
%
% [CGT2000] Conn, Gould and Toint: Trust-region methods, 2000.

inner = problem.M.inner;
lincomb = problem.M.lincomb;

theta = options.theta;
kappa = options.kappa;

if ~options.useRand % and therefore, eta == 0
    Heta = problem.M.zerovec(x);
    r = grad;
    e_Pe = 0;
else % and therefore, no preconditioner
    % eta (presumably) ~= 0 was provided by the caller.
    Heta = getHessian(problem, x, eta, storedb, key);
    r = lincomb(x, 1, grad, 1, Heta);
    e_Pe = inner(x, eta, eta);
end
r_r = inner(x, r, r);
norm_r = sqrt(r_r);
norm_r0 = norm_r;

% Precondition the residual.
if ~options.useRand
    z = getPrecon(problem, x, r, storedb, key);
else
    z = r;
end

% Compute z'*r.
z_r = inner(x, z, r);
d_Pd = z_r;

% Initial search direction.
delta  = lincomb(x, -1, z);
if ~options.useRand % and therefore, eta == 0
    e_Pd = 0;
else % and therefore, no preconditioner
    e_Pd = inner(x, eta, delta);
end

% If the Hessian or a linear Hessian approximation is in use, it is
% theoretically guaranteed that the model value decreases strictly
% with each iteration of tCG. Hence, there is no need to monitor the model
% value. But, when a nonlinear Hessian approximation is used (such as the
% built-in finite-difference approximation for example), the model may
% increase. It is then important to terminate the tCG iterations and return
% the previous (the best-so-far) iterate. The variable below will hold the
% model value.
model_fun = @(eta, Heta) inner(x, eta, grad) + .5*inner(x, eta, Heta);
if ~options.useRand
    model_value = 0;
else
    model_value = model_fun(eta, Heta);
end

% Pre-assume termination because j == end.
stop_tCG = 5;

% Begin inner/tCG loop.
j = 0;
for j = 1 : options.maxinner
    
    % This call is the computationally expensive step.
    Hdelta = getHessian(problem, x, delta, storedb, key);
    
    % Compute curvature (often called kappa).
    d_Hd = inner(x, delta, Hdelta);
    
    
    % Note that if d_Hd == 0, we will exit at the next "if" anyway.
    alpha = z_r/d_Hd;
    % <neweta,neweta>_P =
    % <eta,eta>_P + 2*alpha*<eta,delta>_P + alpha*alpha*<delta,delta>_P
    e_Pe_new = e_Pe + 2.0*alpha*e_Pd + alpha*alpha*d_Pd;
    
    if options.debug > 2,
        fprintf('DBG:   (r,r)  : %e\n', r_r);
        fprintf('DBG:   (d,Hd) : %e\n', d_Hd);
        fprintf('DBG:   alpha  : %e\n', alpha);
    end
    
    % Check against negative curvature and trust-region radius violation.
    % If either condition triggers, we bail out.
    if d_Hd <= 0 || e_Pe_new >= Delta^2,
        % want
        %  ee = <eta,eta>_prec,x
        %  ed = <eta,delta>_prec,x
        %  dd = <delta,delta>_prec,x
        tau = (-e_Pd + sqrt(e_Pd*e_Pd + d_Pd*(Delta^2-e_Pe))) / d_Pd;
        if options.debug > 2,
            fprintf('DBG:     tau  : %e\n', tau);
        end
        eta  = lincomb(x, 1,  eta, tau,  delta);
        
        % If only a nonlinear Hessian approximation is available, this is
        % only approximately correct, but saves an additional Hessian call.
        Heta = lincomb(x, 1, Heta, tau, Hdelta);
        
        % Technically, we may want to verify that this new eta is indeed
        % better than the previous eta before returning it (this is always
        % the case if the Hessian approximation is linear, but I am unsure
        % whether it is the case or not for nonlinear approximations.)
        % At any rate, the impact should be limited, so in the interest of
        % code conciseness (if we can still hope for that), we omit this.
        
        if d_Hd <= 0,
            stop_tCG = 1;     % negative curvature
        else
            stop_tCG = 2;     % exceeded trust region
        end
        break;
    end
    
    % No negative curvature and eta_prop inside TR: accept it.
    e_Pe = e_Pe_new;
    new_eta  = lincomb(x, 1,  eta, alpha,  delta);
    
    % If only a nonlinear Hessian approximation is available, this is
    % only approximately correct, but saves an additional Hessian call.
    new_Heta = lincomb(x, 1, Heta, alpha, Hdelta);
    
    % Verify that the model cost decreased in going from eta to new_eta. If
    % it did not (which can only occur if the Hessian approximation is
    % nonlinear or because of numerical errors), then we return the
    % previous eta (which necessarily is the best reached so far, according
    % to the model cost). Otherwise, we accept the new eta and go on.
    new_model_value = model_fun(new_eta, new_Heta);
    if new_model_value >= model_value
        stop_tCG = 6;
        break;
    end
    
    eta = new_eta;
    Heta = new_Heta;
    model_value = new_model_value; %% added Feb. 17, 2015
    
    % Update the residual.
    r = lincomb(x, 1, r, alpha, Hdelta);
    
    % Compute new norm of r.
    r_r = inner(x, r, r);
    norm_r = sqrt(r_r);
    
    % Check kappa/theta stopping criterion.
    % Note that it is somewhat arbitrary whether to check this stopping
    % criterion on the r's (the gradients) or on the z's (the
    % preconditioned gradients). [CGT2000], page 206, mentions both as
    % acceptable criteria.
    if j >= options.mininner && norm_r <= norm_r0*min(norm_r0^theta, kappa)
        % Residual is small enough to quit
        if kappa < norm_r0^theta,
            stop_tCG = 3;  % linear convergence
        else
            stop_tCG = 4;  % superlinear convergence
        end
        break;
    end
    
    % Precondition the residual.
    if ~options.useRand
        z = getPrecon(problem, x, r, storedb, key);
    else
        z = r;
    end
    
    % Save the old z'*r.
    zold_rold = z_r;
    % Compute new z'*r.
    z_r = inner(x, z, r);
    
    % Compute new search direction.
    beta = z_r/zold_rold;
    delta = lincomb(x, -1, z, beta, delta);
    
    % Update new P-norms and P-dots [CGT2000, eq. 7.5.6 & 7.5.7].
    e_Pd = beta*(e_Pd + alpha*d_Pd);
    d_Pd = z_r + beta*beta*d_Pd;
    
end  % of tCG loop
inner_it = j;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/solvers/trustregions/trustregions.m
================================================
function [x, cost, info, options] = trustregions(problem, x, options)
% Riemannian trust-regions solver for optimization on manifolds.
%
% function [x, cost, info, options] = trustregions(problem)
% function [x, cost, info, options] = trustregions(problem, x0)
% function [x, cost, info, options] = trustregions(problem, x0, options)
% function [x, cost, info, options] = trustregions(problem, [], options)
%
% This is the Riemannian Trust-Region solver (with tCG inner solve), named
% RTR. This solver will attempt to minimize the cost function described in
% the problem structure. It requires the availability of the cost function
% and of its gradient. It will issue calls for the Hessian. If no Hessian
% nor approximate Hessian is provided, a standard approximation of the
% Hessian based on the gradient will be computed. If a preconditioner for
% the Hessian is provided, it will be used.
%
% If no gradient is provided, an approximation of the gradient is computed,
% but this can be slow for manifolds of high dimension.
%
% For a description of the algorithm and theorems offering convergence
% guarantees, see the references below. Documentation for this solver is
% available online at:
%
% http://www.manopt.org/solver_documentation_trustregions.html
%
%
% The initial iterate is x0 if it is provided. Otherwise, a random point on
% the manifold is picked. To specify options whilst not specifying an
% initial iterate, give x0 as [] (the empty matrix).
%
% The two outputs 'x' and 'cost' are the last reached point on the manifold
% and its cost. Notice that x is not necessarily the best reached point,
% because this solver is not forced to be a descent method. In particular,
% very close to convergence, it is sometimes preferable to accept very
% slight increases in the cost value (on the order of the machine epsilon)
% in the process of reaching fine convergence.
% 
% The output 'info' is a struct-array which contains information about the
% iterations:
%   iter (integer)
%       The (outer) iteration number, or number of steps considered
%       (whether accepted or rejected). The initial guess is 0.
%	cost (double)
%       The corresponding cost value.
%	gradnorm (double)
%       The (Riemannian) norm of the gradient.
%	numinner (integer)
%       The number of inner iterations executed to compute this iterate.
%       Inner iterations are truncated-CG steps. Each one requires a
%       Hessian (or approximate Hessian) evaluation.
%	time (double)
%       The total elapsed time in seconds to reach the corresponding cost.
%	rho (double)
%       The performance ratio for the iterate.
%	rhonum, rhoden (double)
%       Regularized numerator and denominator of the performance ratio:
%       rho = rhonum/rhoden. See options.rho_regularization.
%	accepted (boolean)
%       Whether the proposed iterate was accepted or not.
%	stepsize (double)
%       The (Riemannian) norm of the vector returned by the inner solver
%       tCG and which is retracted to obtain the proposed next iterate. If
%       accepted = true for the corresponding iterate, this is the size of
%       the step from the previous to the new iterate. If accepted is
%       false, the step was not executed and this is the size of the
%       rejected step.
%	Delta (double)
%       The trust-region radius at the outer iteration.
%	cauchy (boolean)
%       Whether the Cauchy point was used or not (if useRand is true).
%   And possibly additional information logged by options.statsfun.
% For example, type [info.gradnorm] to obtain a vector of the successive
% gradient norms reached at each (outer) iteration.
%
% The options structure is used to overwrite the default values. All
% options have a default value and are hence optional. To force an option
% value, pass an options structure with a field options.optionname, where
% optionname is one of the following and the default value is indicated
% between parentheses:
%
%   tolgradnorm (1e-6)
%       The algorithm terminates if the norm of the gradient drops below
%       this. For well-scaled problems, a rule of thumb is that you can
%       expect to reduce the gradient norm by 8 orders of magnitude
%       (sqrt(eps)) compared to the gradient norm at a "typical" point (a
%       rough initial iterate for example). Further decrease is sometimes
%       possible, but inexact floating point arithmetic will eventually
%       limit the final accuracy. If tolgradnorm is set too low, the
%       algorithm may end up iterating forever (or at least until another
%       stopping criterion triggers).
%   maxiter (1000)
%       The algorithm terminates if maxiter (outer) iterations were executed.
%   maxtime (Inf)
%       The algorithm terminates if maxtime seconds elapsed.
%	miniter (3)
%       Minimum number of outer iterations (used only if useRand is true).
%	mininner (1)
%       Minimum number of inner iterations (for tCG).
%	maxinner (problem.M.dim() : the manifold's dimension)
%       Maximum number of inner iterations (for tCG).
%	Delta_bar (problem.M.typicaldist() or sqrt(problem.M.dim()))
%       Maximum trust-region radius. If you specify this parameter but not
%       Delta0, then Delta0 will be set to 1/8 times this parameter.
%   Delta0 (Delta_bar/8)
%       Initial trust-region radius. If you observe a long plateau at the
%       beginning of the convergence plot (gradient norm VS iteration), it
%       may pay off to try to tune this parameter to shorten the plateau.
%       You should not set this parameter without setting Delta_bar too (at
%       a larger value).
%	useRand (false)
%       Set to true if the trust-region solve is to be initiated with a
%       random tangent vector. If set to true, no preconditioner will be
%       used. This option is set to true in some scenarios to escape saddle
%       points, but is otherwise seldom activated.
%	kappa (0.1)
%       tCG inner kappa convergence tolerance.
%       kappa > 0 is the linear convergence target rate: tCG will terminate
%       early if the residual was reduced by a factor of kappa.
%	theta (1.0)
%       tCG inner theta convergence tolerance.
%       1+theta (theta between 0 and 1) is the superlinear convergence
%       target rate. tCG will terminate early if the residual was reduced
%       by a power of 1+theta.
%	rho_prime (0.1)
%       Accept/reject threshold : if rho is at least rho_prime, the outer
%       iteration is accepted. Otherwise, it is rejected. In case it is
%       rejected, the trust-region radius will have been decreased.
%       To ensure this, rho_prime >= 0 must be strictly smaller than 1/4.
%       If rho_prime is negative, the algorithm is not guaranteed to
%       produce monotonically decreasing cost values. It is strongly
%       recommended to set rho_prime > 0, to aid convergence.
%   rho_regularization (1e3)
%       Close to convergence, evaluating the performance ratio rho is
%       numerically challenging. Meanwhile, close to convergence, the
%       quadratic model should be a good fit and the steps should be
%       accepted. Regularization lets rho go to 1 as the model decrease and
%       the actual decrease go to zero. Set this option to zero to disable
%       regularization (not recommended). See in-code for the specifics.
%       When this is not zero, it may happen that the iterates produced are
%       not monotonically improving the cost when very close to
%       convergence. This is because the corrected cost improvement could
%       change sign if it is negative but very small.
%   statsfun (none)
%       Function handle to a function that will be called after each
%       iteration to provide the opportunity to log additional statistics.
%       They will be returned in the info struct. See the generic Manopt
%       documentation about solvers for further information. statsfun is
%       called with the point x that was reached last, after the
%       accept/reject decision. See comment below.
%   stopfun (none)
%       Function handle to a function that will be called at each iteration
%       to provide the opportunity to specify additional stopping criteria.
%       See the generic Manopt documentation about solvers for further
%       information.
%   verbosity (2)
%       Integer number used to tune the amount of output the algorithm
%       generates during execution (mostly as text in the command window).
%       The higher, the more output. 0 means silent. 3 and above includes a
%       display of the options structure at the beginning of the execution.
%   debug (false)
%       Set to true to allow the algorithm to perform additional
%       computations for debugging purposes. If a debugging test fails, you
%       will be informed of it, usually via the command window. Be aware
%       that these additional computations appear in the algorithm timings
%       too, and may interfere with operations such as counting the number
%       of cost evaluations, etc. (the debug calls get storedb too).
%   storedepth (20)
%       Maximum number of different points x of the manifold for which a
%       store structure will be kept in memory in the storedb. If the
%       caching features of Manopt are not used, this is irrelevant. If
%       memory usage is an issue, you may try to lower this number.
%       Profiling may then help to investigate if a performance hit was
%       incurred as a result.
%
% Notice that statsfun is called with the point x that was reached last,
% after the accept/reject decision. Hence: if the step was accepted, we get
% that new x, with a store which only saw the call for the cost and for the
% gradient. If the step was rejected, we get the same x as previously, with
% the store structure containing everything that was computed at that point
% (possibly including previous rejects at that same point). Hence, statsfun
% should not be used in conjunction with the store to count operations for
% example. Instead, you should use storedb's shared memory for such
% purposes (either via storedb.shared, or via store.shared, see
% online documentation). It is however possible to use statsfun with the
% store to compute, for example, other merit functions on the point x
% (other than the actual cost function, that is).
%
%
% Please cite the Manopt paper as well as the research paper:
%     @Article{genrtr,
%       Title    = {Trust-region methods on {Riemannian} manifolds},
%       Author   = {Absil, P.-A. and Baker, C. G. and Gallivan, K. A.},
%       Journal  = {Foundations of Computational Mathematics},
%       Year     = {2007},
%       Number   = {3},
%       Pages    = {303--330},
%       Volume   = {7},
%       Doi      = {10.1007/s10208-005-0179-9}
%     }
%
% See also: steepestdescent conjugategradient manopt/examples

% An explicit, general listing of this algorithm, with preconditioning,
% can be found in the following paper:
%     @Article{boumal2015lowrank,
%       Title   = {Low-rank matrix completion via preconditioned optimization on the {G}rassmann manifold},
%       Author  = {Boumal, N. and Absil, P.-A.},
%       Journal = {Linear Algebra and its Applications},
%       Year    = {2015},
%       Pages   = {200--239},
%       Volume  = {475},
%       Doi     = {10.1016/j.laa.2015.02.027},
%     }

% When the Hessian is not specified, it is approximated with
% finite-differences of the gradient. The resulting method is called
% RTR-FD. Some convergence theory for it is available in this paper:
% @incollection{boumal2015rtrfd
% 	author={Boumal, N.},
% 	title={Riemannian trust regions with finite-difference Hessian approximations are globally convergent},
% 	year={2015},
% 	booktitle={Geometric Science of Information}
% }


% This file is part of Manopt: www.manopt.org.
% This code is an adaptation to Manopt of the original GenRTR code:
% RTR - Riemannian Trust-Region
% (c) 2004-2007, P.-A. Absil, C. G. Baker, K. A. Gallivan
% Florida State University
% School of Computational Science
% (http://www.math.fsu.edu/~cbaker/GenRTR/?page=download)
% See accompanying license file.
% The adaptation was executed by Nicolas Boumal.
%
%
% Change log: 
%
%   NB April 3, 2013:
%       tCG now returns the Hessian along the returned direction eta, so
%       that we do not compute that Hessian redundantly: some savings at
%       each iteration. Similarly, if the useRand flag is on, we spare an
%       extra Hessian computation at each outer iteration too, owing to
%       some modifications in the Cauchy point section of the code specific
%       to useRand = true.
%
%   NB Aug. 22, 2013:
%       This function is now Octave compatible. The transition called for
%       two changes which would otherwise not be advisable. (1) tic/toc is
%       now used as is, as opposed to the safer way:
%       t = tic(); elapsed = toc(t);
%       And (2), the (formerly inner) function savestats was moved outside
%       the main function to not be nested anymore. This is arguably less
%       elegant, but Octave does not (and likely will not) support nested
%       functions.
%
%   NB Dec. 2, 2013:
%       The in-code documentation was largely revised and expanded.
%
%   NB Dec. 2, 2013:
%       The former heuristic which triggered when rhonum was very small and
%       forced rho = 1 has been replaced by a smoother heuristic which
%       consists in regularizing rhonum and rhoden before computing their
%       ratio. It is tunable via options.rho_regularization. Furthermore,
%       the solver now detects if tCG did not obtain a model decrease
%       (which is theoretically impossible but may happen because of
%       numerical errors and/or because of a nonlinear/nonsymmetric Hessian
%       operator, which is the case for finite difference approximations).
%       When such an anomaly is detected, the step is rejected and the
%       trust region radius is decreased.
%       Feb. 18, 2015 note: this is less useful now, as tCG now guarantees
%       model decrease even for the finite difference approximation of the
%       Hessian. It is still useful in case of numerical errors, but this
%       is less stringent.
%
%   NB Dec. 3, 2013:
%       The stepsize is now registered at each iteration, at a small
%       additional cost. The defaults for Delta_bar and Delta0 are better
%       defined. Setting Delta_bar in the options will automatically set
%       Delta0 accordingly. In Manopt 1.0.4, the defaults for these options
%       were not treated appropriately because of an incorrect use of the
%       isfield() built-in function.
%
%   NB Feb. 18, 2015:
%       Added some comments. Also, Octave now supports safe tic/toc usage,
%       so we reverted the changes to use that again (see Aug. 22, 2013 log
%       entry).
%
%   NB April 3, 2015:
%       Works with the new StoreDB class system.
%
%   NB April 8, 2015:
%       No Hessian warning if approximate Hessian explicitly available.
%
%   NB Nov. 1, 2016:
%       Now uses approximate gradient via finite differences if need be.


% Verify that the problem description is sufficient for the solver.
if ~canGetCost(problem)
    warning('manopt:getCost', ...
            'No cost provided. The algorithm will likely abort.');  
end
if ~canGetGradient(problem) && ~canGetApproxGradient(problem)
    % Note: we do not give a warning if an approximate gradient is
    % explicitly given in the problem description, as in that case the user
    % seems to be aware of the issue.
    warning('manopt:getGradient:approx', ...
           ['No gradient provided. Using an FD approximation instead (slow).\n' ...
            'It may be necessary to increase options.tolgradnorm.\n' ...
            'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']);
    problem.approxgrad = approxgradientFD(problem);
end
if ~canGetHessian(problem) && ~canGetApproxHessian(problem)
    % Note: we do not give a warning if an approximate Hessian is
    % explicitly given in the problem description, as in that case the user
    % seems to be aware of the issue.
    warning('manopt:getHessian:approx', ...
           ['No Hessian provided. Using an FD approximation instead.\n' ...
            'To disable this warning: warning(''off'', ''manopt:getHessian:approx'')']);
    problem.approxhess = approxhessianFD(problem);
end

% Define some strings for display
tcg_stop_reason = {'negative curvature',...
                   'exceeded trust region',...
                   'reached target residual-kappa (linear)',...
                   'reached target residual-theta (superlinear)',...
                   'maximum inner iterations',...
                   'model increased'};

% Set local defaults here
localdefaults.verbosity = 2;
localdefaults.maxtime = inf;
localdefaults.miniter = 3;
localdefaults.maxiter = 1000;
localdefaults.mininner = 1;
localdefaults.maxinner = problem.M.dim();
localdefaults.tolgradnorm = 1e-6;
localdefaults.kappa = 0.1;
localdefaults.theta = 1.0;
localdefaults.rho_prime = 0.1;
localdefaults.useRand = false;
localdefaults.rho_regularization = 1e3;

% Merge global and local defaults, then merge w/ user options, if any.
localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
if ~exist('options', 'var') || isempty(options)
    options = struct();
end
options = mergeOptions(localdefaults, options);

% Set default Delta_bar and Delta0 separately to deal with additional
% logic: if Delta_bar is provided but not Delta0, let Delta0 automatically
% be some fraction of the provided Delta_bar.
if ~isfield(options, 'Delta_bar')
    if isfield(problem.M, 'typicaldist')
        options.Delta_bar = problem.M.typicaldist();
    else
        options.Delta_bar = sqrt(problem.M.dim());
    end 
end
if ~isfield(options,'Delta0')
    options.Delta0 = options.Delta_bar / 8;
end

% Check some option values
assert(options.rho_prime < 1/4, ...
        'options.rho_prime must be strictly smaller than 1/4.');
assert(options.Delta_bar > 0, ...
        'options.Delta_bar must be positive.');
assert(options.Delta0 > 0 && options.Delta0 < options.Delta_bar, ...
        'options.Delta0 must be positive and smaller than Delta_bar.');

% It is sometimes useful to check what the actual option values are.
if options.verbosity >= 3
    disp(options);
end

ticstart = tic();

% If no initial point x is given by the user, generate one at random.
if ~exist('x', 'var') || isempty(x)
    x = problem.M.rand();
end

% Create a store database and get a key for the current x
storedb = StoreDB(options.storedepth);
key = storedb.getNewKey();

%% Initializations

% k counts the outer (TR) iterations. The semantic is that k counts the
% number of iterations fully executed so far.
k = 0;

% Initialize solution and companion measures: f(x), fgrad(x)
[fx, fgradx] = getCostGrad(problem, x, storedb, key);
norm_grad = problem.M.norm(x, fgradx);

% Initialize trust-region radius
Delta = options.Delta0;

% Save stats in a struct array info, and preallocate.
if ~exist('used_cauchy', 'var')
    used_cauchy = [];
end
stats = savestats(problem, x, storedb, key, options, k, fx, norm_grad, Delta, ticstart);
info(1) = stats;
info(min(10000, options.maxiter+1)).iter = [];

% ** Display:
if options.verbosity == 2
   fprintf(['%3s %3s      %5s                %5s     ',...
            'f: %+e   |grad|: %e\n'],...
           '   ','   ','     ','     ', fx, norm_grad);
elseif options.verbosity > 2
   fprintf('************************************************************************\n');
   fprintf('%3s %3s    k: %5s     num_inner: %5s     %s\n',...
           '','','______','______','');
   fprintf('       f(x) : %+e       |grad| : %e\n', fx, norm_grad);
   fprintf('      Delta : %f\n', Delta);
end

% To keep track of consecutive radius changes, so that we can warn the
% user if it appears necessary.
consecutive_TRplus = 0;
consecutive_TRminus = 0;


% **********************
% ** Start of TR loop **
% **********************
while true
    
	% Start clock for this outer iteration
    ticstart = tic();

    % Run standard stopping criterion checks
    [stop, reason] = stoppingcriterion(problem, x, options, info, k+1);
    
    % If the stopping criterion that triggered is the tolerance on the
    % gradient norm but we are using randomization, make sure we make at
    % least miniter iterations to give randomization a chance at escaping
    % saddle points.
    if stop == 2 && options.useRand && k < options.miniter
        stop = 0;
    end
    
    if stop
        if options.verbosity >= 1
            fprintf([reason '\n']);
        end
        break;
    end

    if options.verbosity > 2 || options.debug > 0
        fprintf('************************************************************************\n');
    end

    % *************************
    % ** Begin TR Subproblem **
    % *************************
  
    % Determine eta0
    if ~options.useRand
        % Pick the zero vector
        eta = problem.M.zerovec(x);
    else
        % Random vector in T_x M (this has to be very small)
        eta = problem.M.lincomb(x, 1e-6, problem.M.randvec(x));
        % Must be inside trust-region
        while problem.M.norm(x, eta) > Delta
            eta = problem.M.lincomb(x, sqrt(sqrt(eps)), eta);
        end
    end

    % Solve TR subproblem approximately
    [eta, Heta, numit, stop_inner] = ...
                tCG(problem, x, fgradx, eta, Delta, options, storedb, key);
    srstr = tcg_stop_reason{stop_inner};

    % If using randomized approach, compare result with the Cauchy point.
    % Convergence proofs assume that we achieve at least (a fraction of)
    % the reduction of the Cauchy point. After this if-block, either all
    % eta-related quantities have been changed consistently, or none of
    % them have changed.
    if options.useRand
        used_cauchy = false;
        % Check the curvature,
        Hg = getHessian(problem, x, fgradx, storedb, key);
        g_Hg = problem.M.inner(x, fgradx, Hg);
        if g_Hg <= 0
            tau_c = 1;
        else
            tau_c = min( norm_grad^3/(Delta*g_Hg) , 1);
        end
        % and generate the Cauchy point.
        eta_c  = problem.M.lincomb(x, -tau_c * Delta / norm_grad, fgradx);
        Heta_c = problem.M.lincomb(x, -tau_c * Delta / norm_grad, Hg);

        % Now that we have computed the Cauchy point in addition to the
        % returned eta, we might as well keep the best of them.
        mdle  = fx + problem.M.inner(x, fgradx, eta) ...
                   + .5*problem.M.inner(x, Heta,   eta);
        mdlec = fx + problem.M.inner(x, fgradx, eta_c) ...
                   + .5*problem.M.inner(x, Heta_c, eta_c);
        if mdlec < mdle
            eta = eta_c;
            Heta = Heta_c; % added April 11, 2012
            used_cauchy = true;
        end
    end
    
    
    % This is only computed for logging purposes, because it may be useful
    % for some user-defined stopping criteria. If this is not cheap for
    % specific applications (compared to evaluating the cost), we should
    % reconsider this.
    norm_eta = problem.M.norm(x, eta);
    
    if options.debug > 0
        testangle = problem.M.inner(x, eta, fgradx) / (norm_eta*norm_grad);
    end
    

	% Compute the tentative next iterate (the proposal)
	x_prop  = problem.M.retr(x, eta);
    key_prop = storedb.getNewKey();

	% Compute the function value of the proposal
	fx_prop = getCost(problem, x_prop, storedb, key_prop);

	% Will we accept the proposal or not?
    % Check the performance of the quadratic model against the actual cost.
    rhonum = fx - fx_prop;
    rhoden = -problem.M.inner(x, fgradx, eta) ...
             -.5*problem.M.inner(x, eta, Heta);
    % rhonum could be anything.
    % rhoden should be nonnegative, as guaranteed by tCG, baring numerical
    % errors.
    
    % Heuristic -- added Dec. 2, 2013 (NB) to replace the former heuristic.
    % This heuristic is documented in the book by Conn Gould and Toint on
    % trust-region methods, section 17.4.2.
    % rhonum measures the difference between two numbers. Close to
    % convergence, these two numbers are very close to each other, so
    % that computing their difference is numerically challenging: there may
    % be a significant loss in accuracy. Since the acceptance or rejection
    % of the step is conditioned on the ratio between rhonum and rhoden,
    % large errors in rhonum result in a very large error in rho, hence in
    % erratic acceptance / rejection. Meanwhile, close to convergence,
    % steps are usually trustworthy and we should transition to a Newton-
    % like method, with rho=1 consistently. The heuristic thus shifts both
    % rhonum and rhoden by a small amount such that far from convergence,
    % the shift is irrelevant and close to convergence, the ratio rho goes
    % to 1, effectively promoting acceptance of the step.
    % The rationale is that close to convergence, both rhonum and rhoden
    % are quadratic in the distance between x and x_prop. Thus, when this
    % distance is on the order of sqrt(eps), the value of rhonum and rhoden
    % is on the order of eps, which is indistinguishable from the numerical
    % error, resulting in badly estimated rho's.
    % For abs(fx) < 1, this heuristic is invariant under offsets of f but
    % not under scaling of f. For abs(fx) > 1, the opposite holds. This
    % should not alarm us, as this heuristic only triggers at the very last
    % iterations if very fine convergence is demanded.
    rho_reg = max(1, abs(fx)) * eps * options.rho_regularization;
    rhonum = rhonum + rho_reg;
    rhoden = rhoden + rho_reg;
   
    if options.debug > 0
        fprintf('DBG:     rhonum : %e\n', rhonum);
        fprintf('DBG:     rhoden : %e\n', rhoden);
    end
    
    % This is always true if a linear, symmetric operator is used for the
    % Hessian (approximation) and if we had infinite numerical precision.
    % In practice, nonlinear approximations of the Hessian such as the
    % built-in finite difference approximation and finite numerical
    % accuracy can cause the model to increase. In such scenarios, we
    % decide to force a rejection of the step and a reduction of the
    % trust-region radius. We test the sign of the regularized rhoden since
    % the regularization is supposed to capture the accuracy to which
    % rhoden is computed: if rhoden were negative before regularization but
    % not after, that should not be (and is not) detected as a failure.
    % 
    % Note (Feb. 17, 2015, NB): the most recent version of tCG already
    % includes a mechanism to ensure model decrease if the Cauchy step
    % attained a decrease (which is theoretically the case under very lax
    % assumptions). This being said, it is always possible that numerical
    % errors will prevent this, so that it is good to keep a safeguard.
    %
    % The current strategy is that, if this should happen, then we reject
    % the step and reduce the trust region radius. This also ensures that
    % the actual cost values are monotonically decreasing.
    model_decreased = (rhoden >= 0);
    
    if ~model_decreased 
        srstr = [srstr ', model did not decrease']; %#ok<AGROW>
    end
    
    rho = rhonum / rhoden;
    
    % Added June 30, 2015 following observation by BM.
    % With this modification, it is guaranteed that a step rejection is
    % always accompanied by a TR reduction. This prevents stagnation in
    % this "corner case" (NaN's really aren't supposed to occur, but it's
    % nice if we can handle them nonetheless).
    if isnan(rho)
        fprintf('rho is NaN! Forcing a radius decrease. This should not happen.\n');
        if isnan(fx_prop)
            fprintf('The cost function returned NaN (perhaps the retraction returned a bad point?)\n');
        else
            fprintf('The cost function did not return a NaN value.');
        end
    end
   
    if options.debug > 0
        m = @(x, eta) ...
          getCost(problem, x, storedb, key) + ...
          getDirectionalDerivative(problem, x, eta, storedb, key) + ...
          .5*problem.M.inner(x, getHessian(problem, x, eta, storedb, key), eta);
        zerovec = problem.M.zerovec(x);
        actrho = (fx - fx_prop) / (m(x, zerovec) - m(x, eta));
        fprintf('DBG:   new f(x) : %+e\n', fx_prop);
        fprintf('DBG: actual rho : %e\n', actrho);
        fprintf('DBG:   used rho : %e\n', rho);
    end

    % Choose the new TR radius based on the model performance
    trstr = '   ';
    % If the actual decrease is smaller than 1/4 of the predicted decrease,
    % then reduce the TR radius.
    if rho < 1/4 || ~model_decreased || isnan(rho)
        trstr = 'TR-';
        Delta = Delta/4;
        consecutive_TRplus = 0;
        consecutive_TRminus = consecutive_TRminus + 1;
        if consecutive_TRminus >= 5 && options.verbosity >= 2
            consecutive_TRminus = -inf;
            fprintf(' +++ Detected many consecutive TR- (radius decreases).\n');
            fprintf(' +++ Consider decreasing options.Delta_bar by an order of magnitude.\n');
            fprintf(' +++ Current values: options.Delta_bar = %g and options.Delta0 = %g.\n', options.Delta_bar, options.Delta0);
        end
    % If the actual decrease is at least 3/4 of the precicted decrease and
    % the tCG (inner solve) hit the TR boundary, increase the TR radius.
    % We also keep track of the number of consecutive trust-region radius
    % increases. If there are many, this may indicate the need to adapt the
    % initial and maximum radii.
    elseif rho > 3/4 && (stop_inner == 1 || stop_inner == 2)
        trstr = 'TR+';
        Delta = min(2*Delta, options.Delta_bar);
        consecutive_TRminus = 0;
        consecutive_TRplus = consecutive_TRplus + 1;
        if consecutive_TRplus >= 5 && options.verbosity >= 1
            consecutive_TRplus = -inf;
            fprintf(' +++ Detected many consecutive TR+ (radius increases).\n');
            fprintf(' +++ Consider increasing options.Delta_bar by an order of magnitude.\n');
            fprintf(' +++ Current values: options.Delta_bar = %g and options.Delta0 = %g.\n', options.Delta_bar, options.Delta0);
        end
    else
        % Otherwise, keep the TR radius constant.
        consecutive_TRplus = 0;
        consecutive_TRminus = 0;
    end

    % Choose to accept or reject the proposed step based on the model
    % performance. Note the strict inequality.
    if model_decreased && rho > options.rho_prime
        accept = true;
        accstr = 'acc';
        x = x_prop;
        key = key_prop;
        fx = fx_prop;
        fgradx = getGradient(problem, x, storedb, key);
        norm_grad = problem.M.norm(x, fgradx);
    else
        accept = false;
        accstr = 'REJ';
    end
    
    
    % Make sure we don't use too much memory for the store database
    storedb.purge();
    
    % k is the number of iterations we have accomplished.
    k = k + 1;

    % Log statistics for freshly executed iteration.
    % Everything after this in the loop is not accounted for in the timing.
    stats = savestats(problem, x, storedb, key, options, k, fx, ...
                      norm_grad, Delta, ticstart, info, rho, rhonum, ...
                      rhoden, accept, numit, norm_eta, used_cauchy);
    info(k+1) = stats; %#ok<AGROW>

    
    % ** Display:
    if options.verbosity == 2,
        fprintf(['%3s %3s   k: %5d     num_inner: %5d     ', ...
        'f: %+e   |grad|: %e   %s\n'], ...
        accstr,trstr,k,numit,fx,norm_grad,srstr);
    elseif options.verbosity > 2,
        if options.useRand && used_cauchy,
            fprintf('USED CAUCHY POINT\n');
        end
		fprintf('%3s %3s    k: %5d     num_inner: %5d     %s\n', ...
				accstr, trstr, k, numit, srstr);
		fprintf('       f(x) : %+e     |grad| : %e\n',fx,norm_grad);
		if options.debug > 0
			fprintf('      Delta : %f          |eta| : %e\n',Delta,norm_eta);
		end
		fprintf('        rho : %e\n',rho);
    end
    if options.debug > 0,
        fprintf('DBG: cos ang(eta,gradf): %d\n',testangle);
        if rho == 0
            fprintf('DBG: rho = 0, this will likely hinder further convergence.\n');
        end
    end

end  % of TR loop (counter: k)

% Restrict info struct-array to useful part
info = info(1:k+1);


if (options.verbosity > 2) || (options.debug > 0),
   fprintf('************************************************************************\n');
end
if (options.verbosity > 0) || (options.debug > 0)
    fprintf('Total time is %f [s] (excludes statsfun)\n', info(end).time);
end

% Return the best cost reached
cost = fx;

end


% Routine in charge of collecting the current iteration stats
function stats = savestats(problem, x, storedb, key, options, k, fx, ...
                           norm_grad, Delta, ticstart, info, rho, rhonum, ...
                           rhoden, accept, numit, norm_eta, used_cauchy)
    stats.iter = k;
    stats.cost = fx;
    stats.gradnorm = norm_grad;
    stats.Delta = Delta;
    if k == 0
        stats.time = toc(ticstart);
        stats.rho = inf;
        stats.rhonum = NaN;
        stats.rhoden = NaN;
        stats.accepted = true;
        stats.numinner = NaN;
        stats.stepsize = NaN;
        if options.useRand
            stats.cauchy = false;
        end
    else
        stats.time = info(k).time + toc(ticstart);
        stats.rho = rho;
        stats.rhonum = rhonum;
        stats.rhoden = rhoden;
        stats.accepted = accept;
        stats.numinner = numit;
        stats.stepsize = norm_eta;
        if options.useRand,
          stats.cauchy = used_cauchy;
        end
    end
    
    % See comment about statsfun above: the x and store passed to statsfun
    % are that of the most recently accepted point after the iteration
    % fully executed.
    stats = applyStatsfun(problem, x, storedb, key, options, stats);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/checkdiff.m
================================================
function checkdiff(problem, x, d, force_gradient)
% Checks the consistency of the cost function and directional derivatives.
%
% function checkdiff(problem)
% function checkdiff(problem, x)
% function checkdiff(problem, x, d)
%
% checkdiff performs a numerical test to check that the directional
% derivatives defined in the problem structure agree up to first order with
% the cost function at some point x, along some direction d. The test is
% based on a truncated Taylor series (see online Manopt documentation).
%
% Both x and d are optional and will be sampled at random if omitted.
%
% See also: checkgradient checkhessian

% If force_gradient = true (hidden parameter), then the function will call
% getGradient and infer the directional derivative, rather than call
% getDirectionalDerivative directly. This is used by checkgradient.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   March 26, 2017 (JB):
%       Detects if the approximated linear model is exact
%       and provides the user with the corresponding feedback.
% 
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.

    if ~exist('force_gradient', 'var')
        force_gradient = false;
    end
        
    % Verify that the problem description is sufficient.
    if ~canGetCost(problem)
        error('It seems no cost was provided.');
    end
    if ~force_gradient && ~canGetDirectionalDerivative(problem)
        error('It seems no directional derivatives were provided.');
    end
    if force_gradient && ~canGetGradient(problem)
        % Would normally issue a warning, but this function should only be
        % called with force_gradient on by checkgradient, which will
        % already have issued a warning.
    end
        
    x_isprovided = exist('x', 'var') && ~isempty(x);
    d_isprovided = exist('d', 'var') && ~isempty(d);
    
    if ~x_isprovided && d_isprovided
        error('If d is provided, x must be too, since d is tangent at x.');
    end
    
    % If x and / or d are not specified, pick them at random.
    if ~x_isprovided
        x = problem.M.rand();
    end
    if ~d_isprovided
        d = problem.M.randvec(x);
    end

    % Compute the value f0 at f and directional derivative at x along d.
    storedb = StoreDB();
    xkey = storedb.getNewKey();
    f0 = getCost(problem, x, storedb, xkey);
    
    if ~force_gradient
        df0 = getDirectionalDerivative(problem, x, d, storedb, xkey);
    else
        grad = getGradient(problem, x, storedb, xkey);
        df0 = problem.M.inner(x, grad, d);
    end
    
    % Compute the value of f at points on the geodesic (or approximation
    % of it) originating from x, along direction d, for stepsizes in a
    % large range given by h.
    h = logspace(-8, 0, 51);
    value = zeros(size(h));
    for i = 1 : length(h)
        y = problem.M.exp(x, d, h(i));
        ykey = storedb.getNewKey();
        value(i) = getCost(problem, y, storedb, ykey);
    end
    
    % Compute the linear approximation of the cost function using f0 and
    % df0 at the same points.
    model = polyval([df0 f0], h);
    
    % Compute the approximation error
    err = abs(model - value);
    
    % And plot it.
    loglog(h, err);
    title(sprintf(['Directional derivative check.\nThe slope of the '...
                   'continuous line should match that of the dashed\n'...
                   '(reference) line over at least a few orders of '...
                   'magnitude for h.']));
    xlabel('h');
    ylabel('Approximation error');
    
    line('xdata', [1e-8 1e0], 'ydata', [1e-8 1e8], ...
         'color', 'k', 'LineStyle', '--', ...
         'YLimInclude', 'off', 'XLimInclude', 'off');
    
     
    if ~all( err < 1e-12 )
        % In a numerically reasonable neighborhood, the error should
        % decrease as the square of the stepsize, i.e., in loglog scale,
        % the error should have a slope of 2.
        isModelExact = false;
        window_len = 10;
        [range, poly] = identify_linear_piece(log10(h), log10(err), window_len);
    else
        % The 1st order model is exact: all errors are (numerically) zero
        % Fit line from all points, use log scale only in h.
        isModelExact = true;
        range = 1:numel(h);
        poly = polyfit(log10(h), err, 1);
        % Set mean error in log scale for plot.
        poly(end) = log10(poly(end));
        % Change title to something more descriptive for this special case.
        title(sprintf(...
              ['Directional derivative check.\n'...
               'It seems the linear model is exact:\n'...
               'Model error is numerically zero for all h.']));
    end
    hold all;
    loglog(h(range), 10.^polyval(poly, log10(h(range))), 'LineWidth', 3);
    hold off;
    
    if ~isModelExact
        fprintf('The slope should be 2. It appears to be: %g.\n', poly(1));
        fprintf(['If it is far from 2, then directional derivatives ' ...
                 'might be erroneous.\n']);
    else
        fprintf(['The linear model appears to be exact ' ...
                 '(within numerical precision),\n'...
                 'hence the slope computation is irrelevant.\n']);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/checkgradient.m
================================================
function checkgradient(problem, x, d)
% Checks the consistency of the cost function and the gradient.
%
% function checkgradient(problem)
% function checkgradient(problem, x)
% function checkgradient(problem, x, d)
%
% checkgradient performs a numerical test to check that the gradient
% defined in the problem structure agrees up to first order with the cost
% function at some point x, along some direction d. The test is based on a
% truncated Taylor series (see online Manopt documentation).
%
% It is also tested that the gradient is indeed a tangent vector.
% 
% Both x and d are optional and will be sampled at random if omitted.
%
% See also: checkdiff checkhessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   Nov. 1, 2016 (NB):
%       Now calls checkdiff with force_gradient = true, instead of doing an
%       rmfield of problem.diff. This became necessary after getGradient
%       was updated to know how to compute the gradient from directional
%       derivatives.

    
    % Verify that the problem description is sufficient.
    if ~canGetCost(problem)
        % The call to canGetPartialGradient will readily issue a warning if
        % problem.ncostterms is not defined even though it is expected.
        if ~canGetPartialGradient(problem)
            error('getCost:checkgradient', 'It seems no cost was provided.');
        else
            error('getCost:stochastic', ['It seems no cost was provided.\n' ...
                  'If you intend to use a stochastic solver, you still\n' ...
                  'need to define problem.cost to use checkgradient.']);
        end
    end
    if ~canGetGradient(problem)
        warning('manopt:checkgradient:nograd', ...
                'It seems no gradient was provided.');
    end
        
    x_isprovided = exist('x', 'var') && ~isempty(x);
    d_isprovided = exist('d', 'var') && ~isempty(d);
    
    if ~x_isprovided && d_isprovided
        error('If d is provided, x must be too, since d is tangent at x.');
    end
    
    % If x and / or d are not specified, pick them at random.
    if ~x_isprovided
        x = problem.M.rand();
    end
    if ~d_isprovided
        d = problem.M.randvec(x);
    end

    %% Check that the gradient yields a first order model of the cost.
    
    % Call checkdiff with force_gradient set to true, to force that
    % function to make a gradient call.
    checkdiff(problem, x, d, true);
    title(sprintf(['Gradient check.\nThe slope of the continuous line ' ...
                   'should match that of the dashed\n(reference) line ' ...
                   'over at least a few orders of magnitude for h.']));
    xlabel('h');
    ylabel('Approximation error');
    
    %% Try to check that the gradient is a tangent vector.
    if isfield(problem.M, 'tangent')
        storedb = StoreDB();
        key = storedb.getNewKey();
        grad = getGradient(problem, x, storedb, key);
        pgrad = problem.M.tangent(x, grad);
        residual = problem.M.lincomb(x, 1, grad, -1, pgrad);
        err = problem.M.norm(x, residual);
        fprintf('The residual should be 0, or very close. Residual: %g.\n', err);
        fprintf('If it is far from 0, then the gradient is not in the tangent space.\n');
    else
        fprintf(['Unfortunately, Manopt was unable to verify that the '...
                 'gradient is indeed a tangent vector.\nPlease verify ' ...
                 'this manually or implement the ''tangent'' function ' ...
                 'in your manifold structure.']);
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/checkhessian.m
================================================
function checkhessian(problem, x, d)
% Checks the consistency of the cost function and the Hessian.
%
% function checkhessian(problem)
% function checkhessian(problem, x)
% function checkhessian(problem, x, d)
%
% checkhessian performs a numerical test to check that the directional
% derivatives and Hessian defined in the problem structure agree up to
% second order with the cost function at some point x, along some direction
% d. The test is based on a truncated Taylor series (see online Manopt
% documentation).
% 
% It is also tested that the result of applying the Hessian along that
% direction is indeed a tangent vector, and that the Hessian operator is
% symmetric w.r.t. the Riemannian metric.
% 
% Both x and d are optional and will be sampled at random if omitted.
%
% See also: checkdiff checkgradient checkretraction

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%
%   March 26, 2017 (JB):
%       Detects if the approximated quadratic model is exact
%       and provides the user with the corresponding feedback.
% 
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   Nov. 1, 2016 (NB):
%       Issues a call to getGradient rather than getDirectionalDerivative.

        
    % Verify that the problem description is sufficient.
    if ~canGetCost(problem)
        error('It seems no cost was provided.');
    end
    if ~canGetGradient(problem)
        warning('manopt:checkhessian:nograd', ...
                'It seems no gradient was provided.');
    end
    if ~canGetHessian(problem)
        warning('manopt:checkhessian:nohess', ...
                'It seems no Hessian was provided.');
    end
    
    x_isprovided = exist('x', 'var') && ~isempty(x);
    d_isprovided = exist('d', 'var') && ~isempty(d);
    
    if ~x_isprovided && d_isprovided
        error('If d is provided, x must be too, since d is tangent at x.');
    end
    
    % If x and / or d are not specified, pick them at random.
    if ~x_isprovided
        x = problem.M.rand();
    end
    if ~d_isprovided
        d = problem.M.randvec(x);
    end
    
    %% Check that the directional derivative and the Hessian at x along d
    %% yield a second order model of the cost function.
    
    % Compute the value f0 at f, directional derivative df0 at x along d,
    % and Hessian along [d, d].
    storedb = StoreDB();
    xkey = storedb.getNewKey();
    f0 = getCost(problem, x, storedb, xkey);
    df0 = problem.M.inner(x, d, getGradient(problem, x, storedb, xkey));
    d2f0 = problem.M.inner(x, d, getHessian(problem, x, d, storedb, xkey));
    
    % Compute the value of f at points on the geodesic (or approximation
    % of it) originating from x, along direction d, for stepsizes in a
    % large range given by h.
    h = logspace(-8, 0, 51);
    value = zeros(size(h));
    for i = 1 : length(h)
        y = problem.M.exp(x, d, h(i));
        ykey = storedb.getNewKey();
        value(i) = getCost(problem, y, storedb, ykey);
    end
    
    % Compute the quadratic approximation of the cost function using f0,
    % df0 and d2f0 at the same points.
    model = polyval([.5*d2f0 df0 f0], h);
    
    % Compute the approximation error
    err = abs(model - value);
    
    % And plot it.
    loglog(h, err);
    title(sprintf(['Hessian check.\nThe slope of the continuous line ' ...
                   'should match that of the dashed\n(reference) line ' ...
                   'over at least a few orders of magnitude for h.']));
    xlabel('h');
    ylabel('Approximation error');
    
    line('xdata', [1e-8 1e0], 'ydata', [1e-16 1e8], ...
         'color', 'k', 'LineStyle', '--', ...
         'YLimInclude', 'off', 'XLimInclude', 'off');
    
    
    if ~all( err < 1e-12 )
        % In a numerically reasonable neighborhood, the error should
        % decrease as the cube of the stepsize, i.e., in loglog scale, the
        % error should have a slope of 3.
        isModelExact = false;
        window_len = 10;
        [range, poly] = identify_linear_piece(log10(h), log10(err), window_len);
    else
        % The 2nd order model is exact: all errors are (numerically) zero
        % Fit line from all points, use log scale only in h.
        isModelExact = true;
        range = 1:numel(h);
        poly = polyfit(log10(h), err, 1);
        % Set mean error in log scale for plot
        poly(end) = log10(poly(end));
        % Change title to something more descriptive for this special case.
        title(sprintf(...
              ['Hessian check.\n'...
               'It seems the quadratic model is exact:\n'...
               'Model error is numerically zero for all h.']));
    end
    hold all;
    loglog(h(range), 10.^polyval(poly, log10(h(range))), 'LineWidth', 3);
    hold off;
    
    if ~isModelExact
        fprintf('The slope should be 3. It appears to be: %g.\n', poly(1));
        fprintf(['If it is far from 3, then directional derivatives or ' ...
                 'the Hessian might be erroneous.\n']);
        fprintf(['Note: if the exponential map is only approximate, and it '...
                 'is not a second-order approximation,\nthen it is normal ' ...
                 'for the slope test to reach 2 instead of 3. Check the ' ...
                 'factory for this.\n' ...
                 'If tested at a critical point, then even for a first-order '...
                 'retraction the slope test should yield 3.\n']);
    else
        fprintf(['The quadratic model appears to be exact ' ...
                 '(within numerical precision),\n'...
                 'hence the slope computation is irrelevant.\n']);
    end

    
    %% Check that the Hessian at x along direction d is a tangent vector.
    if isfield(problem.M, 'tangent')
        hess = getHessian(problem, x, d, storedb, xkey);
        phess = problem.M.tangent(x, hess);
        residual = problem.M.lincomb(x, 1, hess, -1, phess);
        err = problem.M.norm(x, residual);
        fprintf('The residual should be zero, or very close. ');
        fprintf('Residual: %g.\n', err);
        fprintf(['If it is far from 0, then the Hessian is not in the ' ...
                 'tangent plane.\n']);
    else
        fprintf(['Unfortunately, Manopt was unable to verify that the '...
                 'Hessian is indeed a tangent vector.\nPlease verify ' ...
                 'this manually.']);
    end    
    
    %% Check that the Hessian at x is symmetric.
    d1 = problem.M.randvec(x);
    d2 = problem.M.randvec(x);
    h1 = getHessian(problem, x, d1, storedb, xkey);
    h2 = getHessian(problem, x, d2, storedb, xkey);
    v1 = problem.M.inner(x, d1, h2);
    v2 = problem.M.inner(x, h1, d2);
    value = v1-v2;
    fprintf(['<d1, H[d2]> - <H[d1], d2> should be zero, or very close.' ...
             '\n\tValue: %g - %g = %g.\n'], v1, v2, value);
    fprintf('If it is far from 0, then the Hessian is not symmetric.\n');
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/checkretraction.m
================================================
function checkretraction(M, x, v)
% Check the order of agreement of a retraction with an exponential.
% 
% function checkretraction(M)
% function checkretraction(M, x)
% function checkretraction(M, x, v)
%
% checkretraction performs a numerical test to check the order of agreement
% between the retraction and the exponential map in a given Manopt
% manifold structure M. The test is performed at the point x if it is
% provided (otherwise, the point is picked at random) and along the tangent
% vector v at x if one is provided (otherwise, a tangent vector at x is
% picked at random.)
%
% See also: checkdiff checkgradient checkhessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Oct. 21, 2016.
% Contributors: 
% Change log: 

    if ~exist('x', 'var') || isempty(x)
        x = M.rand();
        v = M.randvec(x);
    end
    
    if ~exist('v', 'var') || isempty(v)
        v = M.randvec(x);
    end
    
    % Compare the retraction and the exponential over steps of varying
    % length, on a wide log-scale.
    tt = logspace(-12, 0, 251);
    ee = zeros(size(tt));
    for k = 1 : numel(tt)
        t = tt(k);
        ee(k) = M.dist(M.exp(x, v, t), M.retr(x, v, t));
    end
    
    % Plot the difference between the exponential and the retration over
    % that span of steps, in log-log scale.
    loglog(tt, ee);
    
    % We hope to see a slope of 3, to confirm a second-order retraction. If
    % the slope is only 2, we have a first-order retration. If the slope is
    % less than 2, this is not a retraction.
    % Slope 3
    line('xdata', [1e-12 1e0], 'ydata', [1e-30 1e6], ...
         'color', 'k', 'LineStyle', '--', ...
         'YLimInclude', 'off', 'XLimInclude', 'off');
    % Slope 2
    line('xdata', [1e-14 1e0], 'ydata', [1e-20 1e8], ...
         'color', 'k', 'LineStyle', ':', ...
         'YLimInclude', 'off', 'XLimInclude', 'off');
     

    % Figure out the slope of the error in log-log, by identifying a piece
    % of the error curve which is mostly linear.
    window_len = 10;
    [range, poly] = identify_linear_piece(log10(tt), log10(ee), window_len);
    hold all;
    loglog(tt(range), 10.^polyval(poly, log10(tt(range))), 'LineWidth', 3);
    hold off;
    
    xlabel('Step size multiplier t');
    ylabel('Distance between Exp(x, v, t) and Retr(x, v, t)');
    title(sprintf('Retraction check.\nA slope of 2 is required, 3 is desired.'));
    
    fprintf('Check agreement between M.exp and M.retr. Please check the\n');
    fprintf('factory file of M to ensure M.exp is a proper exponential.\n');
    fprintf('The slope must be at least 2 to have a proper retraction.\n');
    fprintf('For the retraction to be second order, the slope should be 3.\n');
    fprintf('It appears the slope is: %g.\n', poly(1));

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/criticalpointfinder.m
================================================
function problem_critpt = criticalpointfinder(problem)
% Creates a Manopt problem whose optima are the critical points of another.
%
% problem_critpt = criticalpointfinder(problem)
%
% Given a Manopt problem structure 'problem', this tool returns a new
% problem structure, 'problem_critpt', such that the global optima of the
% new problem coincide with the critical points of the original problem.
% This can be useful notably in empirical studies of the properties of
% saddle points of a problem.
%
% Concretely, if f is the cost function of the given problem, grad f
% denotes its (Riemannian) gradient and Hess f denotes its (Riemannian)
% Hessian, then the new problem has a cost function g defined by:
%
%   g(x) = (1/2)*norm(grad f(x))^2,
%
% where x is a point on the manifold problem.M (the new problem lives on
% the same manifold), and norm(.) = problem.M.norm(x, .) is the Riemannian
% norm on the tangent space at x. The Riemannian gradient of g is elegantly
% obtained from knowledge of f:
%
%   grad g(x) = Hess f(x)[grad f(x)]
%
% If the Hessian of f is not available in the given problem, Manopt will
% approximate it automatically to compute an approximate gradient of g.
% If the Hessian of f is available, then an approximate Hessian of g is
% defined in the returned problem as
%
%  approxhess g(x)[u] = Hess f(x)[ Hess f(x)[u] ].
%
% This approximation is exact if x is a critical point of f, which is
% enough to ensure superlinear local convergence to critical points of f
% using the trustregions algorithm, for example.
%
% Once problem_critpt is obtained, it can be passed to any of the solvers
% of Manopt to compute critical points of the original problem. Supplying
% an initial point to the solver allows to aim for a critical point in a
% specific neighborhood of the search space.
%
%
% Usage example:
% 
% The code below creates a problem whose optima are dominant eigenvectors
% of a matrix A and whose critical points are any eigenvectors of A, then
% compute critical points using the present tool:
%
% n = 100; A = randn(n); A = .5*(A+A');
% problem.M = spherefactory(n);
% problem.cost  = @(x) -x'*(A*x);
% problem.egrad = @(x) -2*A*x;
% problem.ehess = @(x, xdot) -2*A*xdot;
% problem_critpt = criticalpointfinder(problem);
% opts.tolcost = .5*(1e-5)^2; % aim for a gradient smaller than 1e-5
% [x, fx] = trustregions(problem_critpt, [], opts); % random initial guess
% fprintf('Norm of the gradient at x: %g\n', sqrt(2*fx));
% fprintf('This is small if x is close to being an eigenvector: %g\n',...
%         norm((x'*A*x)*x - A*x));
% % The two displayed numbers are equal up to a factor 2.
%
%
% See also: trustregions

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Jan. 25, 2017.
% Contributors: 
% Change log: 

% TODO: Determine a safe way of using the caching functionalities of Manopt
%       with this tool. The issue in passing along storedb and key in the
%       costgrad and approxhess functions is that the storedb will be
%       associated to problem_critpt, not to problem. This may cause bugs
%       that would be very difficult to catch. To be on the safe side,
%       caching is not used at all here, but this may cause running times
%       to be longer than necessary. To create a local storedb associated
%       to problem and to only use the key seems to also not be a viable
%       solution, since there is no clear way of resetting it to zero
%       everytime a solver is called on problem_critpt.
%       -- Jan. 26, 2017 (NB)

    problem_critpt.M = problem.M;
    problem_critpt.costgrad = @costgrad;
    
    % If the Hessian is available for the problem, we build an approximate
    % Hessian based on it. Otherwise, there is no reason to believe that
    % this approximate Hessian would be better than the standard
    % approximate Hessian created by Manopt.
    if canGetHessian(problem)
        problem_critpt.approxhess = @approxhess;
    end
    
    function [g, gradg] = costgrad(x)
        
        gradf = getGradient(problem, x);
        Hessf_gradf = getHessian(problem, x, gradf);
        
        g = .5*problem.M.norm(x, gradf)^2;
        gradg = Hessf_gradf;
        
    end
    
    % This is not quite the Hessian because there should be a third-order
    % derivative term (which is inaccessible), but: at critical points
    % (where grad f(x) = 0 for the f of problem.cost) this Hessian is
    % exact, so it will allow for superlinear local convergence in
    % algorithms such as trustregions.
    function HHu = approxhess(x, u)
        
        Hu  = getHessian(problem, x, u);
        HHu = getHessian(problem, x, Hu);
        
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/dexpm.m
================================================
function D = dexpm(X, H)
% Frchet derivative of the matrix exponential.
%
% function D = dexpm(X, H)
%
% Computes the directional derivative (the Frchet derivative) of expm at X
% along H (square matrices).
%
% Thus, D = lim_(t -> 0) (expm(X + tH) - expm(X)) / t.
%
% Note: the adjoint of dexpm(X, .) is dexpm(X', .), which is a fact often
% useful to derive gradients of matrix functions involving expm(X).
% (This is wrt the inner product inner = @(A, B) real(trace(A'*B))).
% 
% See also: dfunm dlogm dsqrtm

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 3, 2015.
% Contributors:
% Change log:
    
    D = dfunm(@expm, X, H);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/dfunm.m
================================================
function D = dfunm(funm, X, H)
% Frchet derivative of matrix functions.
%
% function D = dfunm(funm, X, H)
%
% Computes the directional derivative (the Frchet derivative) of a matrix
% function (such as @logm, @expm, ...) at X along H (square matrices),
% according to a very nice trick which appears in this paper:
% 
% "Computing the Frchet derivative of the matrix exponential, with an
% application to condition number estimation",
% Awad H. Al-Mohy and Nicholas J. Higham, 2009.
% http://eprints.ma.man.ac.uk/1218/01/covered/MIMS_ep2008_26.pdf
%
% Thus, D = lim_(t -> 0) (funm(X + tH) - funm(X)) / t.
%
% This code is simple, but may not be the most efficient. In particular, it
% requires computing the matrix function on matrices which are four times
% as big, and which may have lost important structure (such as symmetry).
% 
% See also: dlogm dexpm dsqrtm

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 3, 2015.
% Contributors:
% Change log:
    
    n = size(X, 1);
    
    assert(length(size(X)) == 2,     'X and H must be square matrices.');
    assert(length(size(H)) == 2,     'X and H must be square matrices.');
    assert(size(X, 1) == size(X, 2), 'X and H must be square matrices.');
    assert(all(size(X) == size(H)),  'X and H must have the same size.');
    
    Z = zeros(n);
    A = funm([X, H ; Z, X]);
    D = A(1:n, (n+1):end);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/diagsum.m
================================================
function [tracedtensor] = diagsum(tensor1, d1, d2)
% C = DIAGSUM(A, d1, d2) Performs the trace
% C(i[1],...,i[d1-1],i[d1+1],...,i[d2-1],i[d2+1],...i[n]) =
%              A(i[1],...,i[d1-1],k,i[d1+1],...,i[d2-1],k,i[d2+1],...,i[n])
% (Sum on k).
%
% C = DIAGSUM(A, d1, d2) traces A along the diagonal formed by dimensions d1
% and d2. If the lengths of these dimensions are not equal, DIAGSUM traces
% until the end of the shortest of dimensions d1 and d2 is reached. This is
% an analogue of the built in TRACE function.
%
% Wynton Moore, January 2006


dim1=size(tensor1);
numdims=length(dim1);


%check inputs
if d1==d2
    tracedtensor=squeeze(sum(tensor1,d1));
elseif numdims==2
    tracedtensor=trace(tensor1);
elseif dim1(d1)==1 && dim1(d2)==1
    tracedtensor=squeeze(tensor1);
else


    %determine correct permutation
    swapd1=d1;swapd2=d2;
    
    if d1~=numdims-1 && d1~=numdims && d2~=numdims-1
        swapd1=numdims-1;
    elseif d1~=numdims-1 && d1~=numdims && d2~=numdims
        swapd1=numdims;
    end
    if d2~=numdims-1 && d2~=numdims && swapd1~=numdims-1
        swapd2=numdims-1;
    elseif d2~=numdims-1 && d2~=numdims && swapd1~=numdims
        swapd2=numdims;
    end
    
    
    %prepare for construction of selector tensor
    temp1=eye(numdims);
    permmatrix=temp1;
    permmatrix(:,d1)=temp1(:,swapd1);
    permmatrix(:,swapd1)=temp1(:,d1);
    permmatrix(:,d2)=temp1(:,swapd2);
    permmatrix(:,swapd2)=temp1(:,d2);

    selectordim=dim1*permmatrix;
    permvector=(1:numdims)*permmatrix;


    %construct selector tensor
    if numdims>3
        selector = ipermute(outer(ones(selectordim(1:numdims-2)), ...
                                  eye(selectordim(numdims-1), ...
                                      selectordim(numdims)), ...
                                  0), ...
                            permvector);
    else
        %when numdims=3, the above line gives ndims(selector)=4. This
        %routine avoids that error. When used with GMDMP, numdims will be
        %at least 4, so this routine will be unnecessary.
        selector2=eye(selectordim(numdims-1), selectordim(numdims));
        selector=zeros(selectordim);
        for j=1:selectordim(1)
            selector(j, :, :)=selector2;
        end
        selector=ipermute(selector, permvector);
    end
    
    
    %perform trace, discard resulting singleton dimensions
    tracedtensor=sum(sum(tensor1.*selector, d1), d2);
    tracedtensor=squeeze(tracedtensor);
	
    
end


%correction for abberation in squeeze function:
%size(squeeze(rand(1,1,2)))=[2 1]
nontracedimensions=dim1;
nontracedimensions(d1)=[];
if d2>d1
    nontracedimensions(d2-1)=[];
else
    nontracedimensions(d2)=[];
end
tracedsize=size(tracedtensor);
% Next line modified, Nicolas Boumal, April 30, 2012, such that
% diagsum(A, 1, 2) would compute the trace of A, a 2D matrix.
if length(tracedsize)==2 && tracedsize(2)==1 && ...
   (isempty(nontracedimensions) || tracedsize(1)~=nontracedimensions(1))

    tracedtensor=tracedtensor.';
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/dlogm.m
================================================
function D = dlogm(X, H)
% Frchet derivative of the matrix logarithm.
%
% function D = dlogm(X, H)
%
% Computes the directional derivative (the Frchet derivative) of logm at X
% along H (square matrices).
%
% Thus, D = lim_(t -> 0) (logm(X + tH) - logm(X)) / t.
% 
% See also: dfunm dexpm dsqrtm

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 3, 2015.
% Contributors:
% Change log:
    
    D = dfunm(@logm, X, H);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/dsqrtm.m
================================================
function D = dsqrtm(X, H)
% Frchet derivative of the matrix square root.
%
% function D = dsqrtm(X, H)
%
% Computes the directional derivative (the Frchet derivative) of sqrtm at
% X along H (square matrices).
%
% Thus, D = lim_(t -> 0) (sqrtm(X + tH) - sqrtm(X)) / t.
% 
% See also: dfunm dlogm dexpm

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 3, 2015.
% Contributors:
% Change log:
    
    D = dfunm(@sqrtm, X, H);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/grammatrix.m
================================================
function G = grammatrix(M, x, vectors)
% Computes the Gram matrix of tangent vectors in the Manopt framework.
%
% function G = grammatrix(M, x, vectors)
%
% M is a Manopt manifold structure obtained from a factory.
% x is a point on the manifold M.
% vectors is a cell containing n tangent vectors at x.
%
% G is an n-by-n symmetric positive semidefinite matrix such that G(i, j)
% is the inner product between vectors{i} and vectors{j}, with respect to
% the metric on the tangent space to M at x.
%
% See also: orthogonalize tangentorthobasis

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 28, 2016.
% Contributors: 
% Change log: 


    n = numel(vectors);
    
    G = zeros(n);
    
    for i = 1 : n
        
        vi = vectors{i};
        
        G(i, i) = M.inner(x, vi, vi);
        
        for j = (i+1) : n
            
            vj = vectors{j};
            G(i, j) = M.inner(x, vi, vj);
			
			% Manopt is designed to work with real inner products,
			% but it does not hurt to allow for complex inner products
			% here by taking the conjugate.
            G(j, i) = G(i, j)';
            
        end
        
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/hashmd5.m
================================================
function h = hashmd5(inp)
% Computes the MD5 hash of input data.
%
% function h = hashmd5(inp)
% 
% Returns a string containing the MD5 hash of the input variable. The input
% variable may be of any class that can be typecast to uint8 format, which
% is fairly non-restrictive.

% This file is part of Manopt: www.manopt.org.
% This code is a stripped version of more general hashing code by
% Michael Kleder, Nov 2005.
% Change log: 
% 
%   Aug. 8, 2013 (NB):
%       Made x a static (persistent) variable, in the hope it will speed
%       it up. Furthermore, the function is now Octave compatible.

    is_octave = exist('OCTAVE_VERSION', 'builtin');
        
    persistent x;
    if isempty(x) && ~is_octave
        x = java.security.MessageDigest.getInstance('MD5');
    end

    inp=inp(:);
    % Convert strings and logicals into uint8 format
    if ischar(inp) || islogical(inp)
        inp=uint8(inp);
    else % Convert everything else into uint8 format without loss of data
        inp=typecast(inp,'uint8');
    end
    
    % Create hash
    if ~is_octave
        x.update(inp);
        h = typecast(x.digest, 'uint8');
        h = dec2hex(h)';
        % Remote possibility: all hash bytes < 128, so pad:
        if(size(h,1))==1
            h = [repmat('0',[1 size(h,2)]);h];
        end
        h = lower(h(:)');
    else
        h = md5sum(char(inp'), true);
    end
	
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/hessianextreme.m
================================================
function [y, lambda, info] = hessianextreme(problem, x, side, y0, options, storedb, key)
% Compute an extreme eigenvector / eigenvalue of the Hessian of a problem.
%
% [u, lambda, info] = hessianextreme(problem, x)
% [u, lambda, info] = hessianextreme(problem, x, side)
% [u, lambda, info] = hessianextreme(problem, x, side, u0)
% [u, lambda, info] = hessianextreme(problem, x, side, u0, options)
% [u, lambda, info] = hessianextreme(problem, x, side, u0, options, storedb)
% [u, lambda, info] = hessianextreme(problem, x, side, u0, options, storedb, key)
% 
% (For side, u0 and options, pass [] to omit any.)
%
% Given a Manopt problem structure and a point x on the manifold problem.M,
% this function computes a tangent vector u at x of unit norm such that the
% Hessian quadratic form is minimized or maximized:
%
%    minimize or maximize <u, Hess f(x)[u]> such that <u, u> = 1,
%
% where <.,.> is the Riemannian metric on the tangent space at x. Choose
% between minimizing and maximizing by setting side = 'min' or 'max', with
% 'min' being the default. The value attained is returned as lambda, and
% is the minimal or maximal eigenvalue of the Hessian (actually, the last
% value attained when the solver stopped). This is a real number since the
% Hessian is a symmetric operator.
%
% If u0 is specified, it should be a unit-norm tangent vector at x. It is
% then used as initial guess to solve the above problem. Pass [] to omit.
%
% The options structure, if provided, will be passed along to manoptsolve.
% As such, you may choose which solver to use to solve the above
% optimization problem by setting options.solver. See manoptsolve's help.
% The other options will be passed along to the chosen solver too.
% Pass [] to omit.
%
% Often times, it is only necessary to compute a vector u such that the
% quadratic form is negative, if that is at all possible. To do so, set the
% following stopping criterion: options.tolcost = -1e-10; (for example)
% and side = 'min'. The solver will return as soon as the quadratic cost
% defined above drops below the set value (or sooner if another stopping
% criterion triggers first.)
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% info is the info struct-array returned by the solver.
%
% See also: hessianspectrum manoptsolve tangentspherefactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Aug. 13, 2014.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   May 7, 2015 (NB):
%       Default solver options: verbosity = 0 and defaults to trustregions.
%
%   Nov 27, 2015 (NB):
%       The function now also returns the info struct-array.

    
    % By default, minimize
    if ~exist('side', 'var') || isempty(side)
        side = 'min';
    end
    
    % If no initial guess was specified, prepare the empty one.
    if ~exist('y0', 'var')
        y0 = [];
    end

    % Merge default solver options with potential user-specified options.
    % Set local defaults here
    localdefaults.verbosity = 0;
    localdefaults.solver = @trustregions;
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end
    
    % Convert the side into a sign.
    % Since Manopt minimizes, 'min' asks for no sign change.
    switch lower(side)
        case 'min'
            sign = +1;
        case 'max'
            sign = -1;
        otherwise
            error('The side should be either ''min'' or ''max''.');
    end

    % We define a manifold that is actually the unit sphere on the tangent
    % space to problem.M at x. A generalization would be to consider
    % Stiefel or Grassmann on the tangent space, but this would require
    % manipulating collections of tangent vectors, which in full generality
    % may be more complex (from a programming point of view).
    % Points are represented as tangent vectors of unit norm.
    % Tangent vectors are represented as tangent vectors orthogonal to the
    % root point, with respect to the Riemannian metric on the tangent
    % space.
    
    % M is the original manifold. x is a point on M.
    M = problem.M;
    
    % N is the manifold we build. y will be a point on N, thus also a
    % tangent vector to M at x. This is a typical Riemannian submanifold of
    % a Euclidean space, hence it is easy to describe in terms of the tools
    % available for M.
    N = tangentspherefactory(M, x);
    
    % It is usually a good idea to force a gradient computation to make
    % sure precomputable things are precomputed.
    if canGetGradient(problem)
        [unused1, unused2] = getCostGrad(problem, x, storedb, key); %#ok
    end
    
    % This is the star operator of this party.
    hessian = @(y) getHessian(problem, x, y, storedb, key);
    
    % Start a Manopt problem structure for the quadratic optimization
    % problem on the sphere N.
    new_problem.M = N;
    
    % Define the cost function, its gradient and its Hessian.

    new_problem.cost = @cost;
    function [f, store] = cost(y, store)
        store = prepare(y, store);
        f = sign*store.f;
    end

    new_problem.grad = @grad;
    function [g, store] = grad(y, store)
        store = prepare(y, store);
        g = N.lincomb(y, sign*2, store.Hy, sign*(-2)*store.f, y);
    end

    new_problem.hess = @hess;
    function [h, store] = hess(y, ydot, store)
        store = prepare(y, store);
        Hydot = hessian(ydot);
        h = N.lincomb(y, sign*2, Hydot, sign*(-2)*store.f, ydot);
        h = N.proj(y, h);
    end

    % This helper makes sure we do not duplicate Hessian computations.
    function store = prepare(y, store)
        if ~isfield(store, 'ready')
            Hy = hessian(y);
            store.f = M.inner(x, y, Hy);
            store.Hy = Hy;
            store.ready = true;
        end
    end
    
    % Call a Manopt solver to solve the quadratic optimization problem on
    % the abstract sphere N.
    [y, lambda, info] = manoptsolve(new_problem, y0, options);
    lambda = sign*lambda;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/hessianmatrix.m
================================================
function [H, basis] = hessianmatrix(problem, x, basis)
% Computes a matrix which represents the Hessian in some tangent basis.
%
% [H, basis] = hessianmatrix(problem, x)
% [H, basis] = hessianmatrix(problem, x, basis)
%
% problem is a Manopt problem structure with a manifold and cost function.
% x is a point on the manifold problem.M.
% basis (optional) is an orthonormal basis for the tangent space to the
% manifold at x. If no basis is supplied, one will be generated at random.
% If the basis spans only a subspace of the tangent space at x,
% then the returned matrix represents the Hessian restricted to that subspace.
%
% H is an n-by-n symmetric matrix (with n the number of vectors in the basis)
% such that H(i, j) is the inner product between basis{i}
% and Hess(basis{j}), with respect to the metric on the tangent space to
% problem.M at x, where Hess(basis{j}) is the vector obtained after
% applying the Hessian at x to basis{j}.
%
% For optimization, it is usually not useful to compute the Hessian matrix,
% as this quickly becomes expensive. This tool is provided mostly for
% exploration and debugging rather than to be used algorithmically in
% solvers. To access the spectrum of the Hessian, it may be more practical
% to call hessianextreme or hessianspectrum. This should coincide with eig(H).
%
%
% Example of equivalence:
%
%     Hu = getHessian(problem, x, u)
%
% is equivalent to (but much faster than):
%
%     B = tangentorthobasis(M, x);
%     H = hessianmatrix(problem, x, B);
%     u_vec = tangent2vec(M, x, B, u);
%     Hu_vec = H*u_vec;
%     Hu = lincomb(M, x, B, Hu_vec);
%
% Note that there will be some error due to numerical round-off.
% 
%
% See also: hessianspectrum hessianextreme tangentorthobasis orthogonalize tangent2vec

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 14, 2016.
% Contributors: 
% Change log: 


    % No warning if an approximate Hessian is available, as then the user
    % is presumably aware of what they are doing.
    if ~canGetHessian(problem) && ~canGetApproxHessian(problem)
        warning('manopt:hessianmatrix:nohessian', ...
                ['The Hessian appears to be unavailable.\n' ...
                 'Will try to use an approximate Hessian instead.\n'...
                 'Since this approximation may not be linear or '...
                 'symmetric,\nthe computation might fail and the '...
                 'results (if any)\nmight make no sense.']);
    end
    

    % Unless an orthonormal basis for the tangent space at x is provided,
    % pick a random one.
    if ~exist('basis', 'var') || isempty(basis)
	    n = problem.M.dim();
        basis = tangentorthobasis(problem.M, x, n);
	else
	    n = numel(basis);
    end
    
    % Create a store database and get a key for x
    storedb = StoreDB(1);
    key = storedb.getNewKey();
    
    % Apply the Hessian at x to each basis vector
    Hbasis = cell(n, 1);
    for k = 1 : numel(Hbasis)
        Hbasis{k} = getHessian(problem, x, basis{k}, storedb, key);
    end
    
    % H is the matrix which contains the inner products of
    % the ((basis vectors)) with the ((Hessian applied to basis vectors)).
    H = zeros(n);
    for i = 1 : n
        H(i, i) = problem.M.inner(x, basis{i}, Hbasis{i});
        for j = (i+1) : n
            H(i, j) = problem.M.inner(x, basis{i}, Hbasis{j});
            H(j, i) = H(i, j);
        end
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/hessianspectrum.m
================================================
function lambdas = hessianspectrum(problem, x, usepreconstr, storedb, key)
% Returns the eigenvalues of the (preconditioned) Hessian at x.
% 
% function lambdas = hessianspectrum(problem, x)
% function lambdas = hessianspectrum(problem, x, useprecon)
% function lambdas = hessianspectrum(problem, x, useprecon, storedb)
% function lambdas = hessianspectrum(problem, x, useprecon, storedb, key)
%
% If useprecon is not set, or if it is set to 'noprecon' (default), this
% computes and returns the eigenvalues of the Hessian operator (which needs
% to be symmetric but not necessarily definite) on the tangent space at x.
% There are problem.M.dim() eigenvalues. Matlab's eigs is used internally.
%
% If useprecon is set to 'precon', the eigenvalues of the composition of
% the Hessian with the preconditioner at x are computed: Precon o Hessian.
% The preconditioner must have been defined in the problem structure and
% has to be symmetric, positive definite. It is supposed to approximate the
% inverse of the (Riemannian) Hessian. Ideally, the preconditioned Hessian
% is better conditioned (smaller ratio of largest to smallest eigenvalue in
% magnitude) than the non-preconditioned spectrum. The present tool can
% help assess that.
%
% The typical ways to define a preconditioner are via problem.precon or
% problem.sqrtprecon (see comment below). These should be function handles
% with the same input/output system as problem.hess for the Hessian.
%
% If the Hessian is not available from the problem structure, an
% approximate Hessian will be used. There are no guarantees of
% interpretability, but this may nevertheless be useful at times.
%
% Even though the Hessian and the preconditioner are both symmetric, their
% composition is not symmetric. This can slow down the call to 'eigs'
% substantially. If possible, you may specify the square root of the
% preconditioner in the problem structure, as sqrtprecon. This operator on
% the tangent space at x must also be symmetric, positive definite, and
% such that SqrtPrecon o SqrtPrecon = Precon. Then, the spectrum of the
% symmetric operator SqrtPrecon o Hessian o SqrtPrecon is computed: it is
% the same as the spectrum of Precon o Hessian, but is usually faster to
% compute. If both Precon and SqrtPrecon are provided, only SqrtPrecon will
% be used.
%
% The input and the output of the Hessian and of the preconditioner are
% projected on the tangent space to avoid undesired contributions of the
% ambient space.
%
% storedb is a StoreDB object, key is the StoreDB key to point x.
%
% Requires the manifold description in problem.M to have these functions:
% 
%   u_vec = vec(x, u_mat) :
%       Returns a column vector representation of the normal (usually
%       matrix) representation of the tangent vector u_mat. vec must be an
%       isometry between the tangent space (with its Riemannian metric) and
%       a subspace of R^n where n = length(u_vec), with the 2-norm on R^n.
%       In other words: it is an orthogonal projector.
%
%   u_mat = mat(x, u_vec) :
%       The inverse of vec (its adjoint).
%
%   u_mat_clean = tangent(x, u_mat) :
%       Subtracts from the tangent vector u_mat any component that would
%       make it "not really tangent", by projection.
%
%   answer = vecmatareisometries() :
%       Returns true if the linear maps encoded by vec and mat are
%       isometries, false otherwise. It is better if the answer is yes.
%
% See also: hessianextreme canGetPrecon canGetSqrtPrecon

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 3, 2013.
% Contributors: 
% Change log:
%
%   Dec. 18, 2014 (NB):
%       The lambdas are now sorted when they are returned.
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%       Does no longer accept sqrtprecon as an input: the square root of
%       the preconditioner may now be specified directly in the problem
%       structure, following the same syntax as the preconditioner precon.
%
%   April 4, 2015 (NB):
%       By default, the spectrum is computed without the preconditioner's
%       effect, even if it is available. A new input option allows to
%       switch this behavior without the need to change the problem
%       structure.

    % Allow omission of the key, and even of storedb.
    if ~exist('key', 'var')
        if ~exist('storedb', 'var')
            storedb = StoreDB();
        end
        key = storedb.getNewKey();
    end

    % Manage the option to use or not use a preconditioner.
    % The input is a string. It is here transformed into a Boolean.
    if ~exist('usepreconstr', 'var') || isempty(usepreconstr)
        usepreconstr = 'noprecon';
    end
    switch lower(usepreconstr)
        case 'noprecon'
            useprecon = false;
        case 'precon'
            useprecon = true;
        otherwise
            % A bit of legacy code heads up.
            if isa(usepreconstr, 'function_handle')
                warning('manopt:hessianspectrum:oldsyntax', ...
                        ['This function no longer expects sqrtprecon ' ...
                         'as input. Place it in the problem structure.']);
            end
            error('Input useprecon must be either ''precon'' or ''noprecon''.');
    end

    % No warning if an approximate Hessian is available, as then the user
    % is presumably aware of what they are doing.
    if ~canGetHessian(problem) && ~canGetApproxHessian(problem)
        warning('manopt:hessianspectrum:nohessian', ...
                ['The Hessian appears to be unavailable.\n' ...
                 'Will try to use an approximate Hessian instead.\n'...
                 'Since this approximation may not be linear or '...
                 'symmetric,\nthe computation might fail and the '...
                 'results (if any)\nmight make no sense.']);
    end

    vec = @(u_mat) problem.M.vec(x, u_mat);
    mat = @(u_vec) problem.M.mat(x, u_vec);
    tgt = @(u_mat) problem.M.tangent(x, u_mat);
    
    % n: size of a vectorized tangent vector
    % dim: dimension of the tangent space
    % necessarily, n >= dim.
    % The vectorized operators we build below will have at least n - dim
    % zero eigenvalues.
    n = length(vec(problem.M.zerovec(x)));
    dim = problem.M.dim();
    
    % It is usually a good idea to force a gradient computation to make
    % sure precomputable things are precomputed.
    if canGetGradient(problem)
        [unused1, unused2] = getCostGrad(problem, x, storedb, key); %#ok
    end
    
    hess = @(u_mat) tgt(getHessian(problem, x, tgt(u_mat), storedb, key));
    hess_vec = @(u_vec) vec(hess(mat(u_vec)));
    
    % Regardless of preconditioning, we can only have a symmetric
    % eigenvalue problem if the vec/mat pair of the manifold is an
    % isometry:
    vec_mat_are_isometries = problem.M.vecmatareisometries();
    
    
    if ~useprecon

        % No preconditioner to use: simply use the Hessian as is.

        eigs_opts.issym = vec_mat_are_isometries;
        eigs_opts.isreal = true;
        lambdas = eigs(hess_vec, n, dim, 'LM', eigs_opts);
            
    elseif canGetSqrtPrecon(problem)

        % There is a preconditioner, and we have its square root: deal with
        % the symmetric composition SqrtPrecon o Hessian o SqrtPrecon.

        sqrtprec = @(u_mat) tgt(getSqrtPrecon(problem, x, tgt(u_mat), storedb, key));
        sqrtprec_vec = @(u_vec) vec(sqrtprec(mat(u_vec)));

        eigs_opts.issym = vec_mat_are_isometries;
        eigs_opts.isreal = true;
        lambdas = eigs(@(u_vec) ...
                      sqrtprec_vec(hess_vec(sqrtprec_vec(u_vec))), ...
                      n, dim, 'LM', eigs_opts);
            
    elseif canGetPrecon(problem)
            
        % There is a preconditioner, but we don't have its square root:
        % deal with the non-symmetric composition Precon o Hessian.

        prec = @(u_mat) tgt(getPrecon(problem, x, tgt(u_mat), storedb, key));
        prec_vec = @(u_vec) vec(prec(mat(u_vec)));
        % prec_inv_vec = @(u_vec) pcg(prec_vec, u_vec);

        eigs_opts.issym = false;
        eigs_opts.isreal = true;
        lambdas = eigs(@(u_vec) prec_vec(hess_vec(u_vec)), ...
                       n, dim, 'LM', eigs_opts);
        
    else
        
        error('No preconditioner is available in the problem structure.');
        
    end
    
    lambdas = sort(lambdas);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/identify_linear_piece.m
================================================
function [range, poly] = identify_linear_piece(x, y, window_length)
% Identify a segment of the curve (x, y) that appears to be linear.
%
% function [range poly] = identify_linear_piece(x, y, window_length)
%
% This function attempts to identify a contiguous segment of the curve
% defined by the vectors x and y that appears to be linear. A line is fit
% through the data over all windows of length window_length and the best
% fit is retained. The output specifies the range of indices such that
% x(range) is the portion over which (x, y) is the most linear and the
% output poly specifies a first order polynomial that best fits (x, y) over
% that range, following the usual matlab convention for polynomials
% (highest degree coefficients first).
%
% See also: checkdiff checkgradient checkhessian

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 8, 2013.
% Contributors: 
% Change log: 

    residues = zeros(length(x)-window_length, 1);
    polys = zeros(2, length(residues));
    for i = 1 : length(residues)
        range = i:(i+window_length);
        [poly, meta] = polyfit(x(range), y(range), 1);
        residues(i) = meta.normr;
        polys(:, i) = poly';
    end
    [unused, best] = min(residues); %#ok<ASGLU>
    range = best:(best+window_length);
    poly = polys(:, best)';

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/lincomb.m
================================================
function vec = lincomb(M, x, vecs, coeffs)
% Computes a linear combination of tangent vectors in the Manopt framework.
%
% vec = lincomb(M, x, vecs, coeffs)
%
% M is a Manopt manifold structure obtained from a factory.
% x is a point on the manifold M.
% vecs is a cell containing n tangent vectors at x.
% coeffs is a vector of length n
%
% vec is a tangent vector at x obtained as the linear combination
%
%    vec = coeffs(1)*vecs{1} + ... + coeffs(n)*vecs{n}
%
% If vecs is an orthonormal basis, then tangent2vec is the inverse of
% lincomb.
%
% See also: grammatrix orthogonalize tangentorthobasis tangent2vec

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 28, 2016.
% Contributors: 
% Change log: 


    n = numel(vecs);
    assert(numel(coeffs) == n);
    
    switch n
       
        case 0
            
            vec = M.zerovec(x);
            
        case 1
            
            vec = M.lincomb(x, coeffs(1), vecs{1});
            
        otherwise
            
            vec = M.lincomb(x, coeffs(1), vecs{1}, coeffs(2), vecs{2});
            
            for k = 3 : n
                
                vec = M.lincomb(x, 1, vec, coeffs(k), vecs{k});
                
            end
        
    end
        

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/manoptsolve.m
================================================
function [x, cost, info, options] = manoptsolve(problem, x0, options)
% Gateway helper function to call a Manopt solver, chosen in the options.
%
% function [x, cost, info, options] = manoptsolve(problem)
% function [x, cost, info, options] = manoptsolve(problem, x0)
% function [x, cost, info, options] = manoptsolve(problem, x0, options)
% function [x, cost, info, options] = manoptsolve(problem, [], options)
%
% Depending on what is available in the Manopt problem structure, one of
% the Manopt solvers will be called and the outputs passed along. It is
% also possible to force the choice of a solver by specifying it in the
% options structure. For example:
%
%    options.solver = @trustregions;
%
% Simply specify a function handle to a Manopt solver.
%
% See also: trustregions conjugategradient steepestdescent

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Aug. 13, 2014.
% Contributors: 
% Change log: 

    % At the very least, we need a cost function.
    if ~canGetCost(problem)
        error('The problem structure must specify a cost function.');
    end
    
    % Depending on the number of differentials available, pick a different
    % default solver.
    if ~canGetGradient(problem)
        localdefaults.solver = @neldermead;
    elseif ~canGetHessian(problem)
        localdefaults.solver = @conjugategradient;
    else
        localdefaults.solver = @trustregions;
    end
    
    % Merge local defaults with user options, if any.
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    % If no initial guess was specified, prepare the empty one.
    if ~exist('x0', 'var')
        x0 = [];
    end
    
    % Issue the actual call.
    [x, cost, info, options] = options.solver(problem, x0, options);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/matrixlincomb.m
================================================
function v = matrixlincomb(x, a1, d1, a2, d2) %#ok<INUSL>
% Linear combination function for tangent vectors represented as matrices.
%
% function v = lincomb(x, a1, d1)
% function v = lincomb(x, a1, d1, a2, d2)
%
% Given a point x, two tangent vectors d1 and d2 at x, and two real
% coefficients a1 and a2, returns a tangent vector at x representing
% a1*d1 + a2*d2, if d1 and d2 are represented as matrices (or more
% generally as arrays in Matlab).
%
% If a2 and d2 are omitted, the returned tangent vector is a1*d1.
%
% The input x is actually unused.
%
% This function is a helper to define manifolds in Manopt.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, July 2, 2015.
% Contributors: 
% Change log: 

    if nargin == 3
        v = a1*d1;
    elseif nargin == 5
        v = a1*d1 + a2*d2;
    else
        error('matrixlincomb takes either 3 or 5 inputs.');
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multihconj.m
================================================
function b = multihconj(a, dim)
%MULTIHCONJ  Hermitian conjugating arrays of matrices.
%    B = MULTIHCONJ(A) is equivalent to B = MULTIHCONJ(A, DIM), where
%    DIM = 1.
%
%    B = MULTIHCONJ(A, DIM) is equivalent to
%    B = PERMUTE(A, [1:DIM-1, DIM+1, DIM, DIM+2:NDIMS(A)]), where A is an
%    array containing N P-by-Q matrices along its dimensions DIM and DIM+1,
%    and B is an array containing the Q-by-P Hermitian conjugate (') of
%    those N matrices along the same dimensions. N = NUMEL(A) / (P*Q), i.e.
%    N is equal to the number of elements in A divided by the number of
%    elements in each matrix.
%
%
%    Example:
%       A 5-by-9-by-3-by-2 array may be considered to be a block array
%       containing ten 9-by-3 matrices along dimensions 2 and 3. In this
%       case, its size is so indicated:  5-by-(9-by-3)-by-2 or 5x(9x3)x2.
%       If A is ................ a 5x(9x3)x2 array of 9x3 matrices,
%       C = MULTIHCONJ(A, 2) is a 5x(3x9)x2 array of 3x9 matrices.
%
%    See also MULTITRANSP MULTIHERM.

% This file is part of Manopt: www.manopt.org.
% Original author: Hiroyuki Sato, April 27, 2015.
% Contributors: 
% Change log: 

    % Setting DIM if not supplied.
    if nargin == 1, dim = 1; end

    % Transposing
    b = multitransp(a, dim);

    %Conjugating
    b = conj(b);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multiherm.m
================================================
function Y = multiherm(X)
% Returns the Hermitian parts of the matrices in the 3D matrix X
%
% function Y = multiherm(X)
%
% Y is a 3D matrix the same size as X. Each slice Y(:, :, i) is the
% Hermitian part of the slice X(:, :, i).
%
% See also: multiprod multitransp multihconj multiscale multiskew

% This file is part of Manopt: www.manopt.org.
% Original author: Hiroyuki Sato, April 27, 2015.
% Contributors: 
% Change log: 

    Y = .5*(X + multihconj(X));
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multiprod.m
================================================
function c = multiprod(a, b, idA, idB)
% Multiplying 1-D or 2-D subarrays contained in two N-D arrays.
% 
%   C = MULTIPROD(A,B) is equivalent  to C = MULTIPROD(A,B,[1 2],[1 2])
%   C = MULTIPROD(A,B,[D1 D2]) is eq. to C = MULTIPROD(A,B,[D1 D2],[D1 D2])
%   C = MULTIPROD(A,B,D1) is equival. to C = MULTIPROD(A,B,D1,D1)
%
%   MULTIPROD performs multiple matrix products, with array expansion (AX)
%   enabled. Its first two arguments A and B are "block arrays" of any
%   size, containing one or more 1-D or 2-D subarrays, called "blocks" (*).
%   For instance, a 5x6x3 array may be viewed as an array containing five
%   6x3 blocks. In this case, its size is denoted by 5x(6x3). The 1 or 2
%   adjacent dimensions along which the blocks are contained are called the
%   "internal dimensions" (IDs) of the array ().
%
%   1) 2-D by 2-D BLOCK(S) (*)
%         C = MULTIPROD(A, B, [DA1 DA2], [DB1 DB2]) contains the products
%         of the PxQ matrices in A by the RxS matrices in B. [DA1 DA2] are
%         the IDs of A; [DB1 DB2] are the IDs of B.
%
%   2) 2-D by 1-D BLOCK(S) (*)
%         C = MULTIPROD(A, B, [DA1 DA2], DB1) contains the products of the
%         PxQ matrices in A by the R-element vectors in B. The latter are
%         considered to be Rx1 matrices. [DA1 DA2] are the IDs of A; DB1 is
%         the ID of B.
%
%   3) 1-D by 2-D BLOCK(S) (*)
%         C = MULTIPROD(A, B, DA1, [DB1 DB2]) contains the products of the 
%         Q-element vectors in A by the RxS matrices in B. The vectors in A
%         are considered to be 1xQ matrices. DA1 is the ID of A; [DB1 DB2]
%         are the IDs of B.
%
%   4) 1-D BY 1-D BLOCK(S) (*)
%      (a) If either SIZE(A, DA1) == 1 or SIZE(B, DB1) == 1, or both,
%             C = MULTIPROD(A, B, DA1, DB1) returns products of scalars by 
%             vectors, or vectors by scalars or scalars by scalars.
%      (b) If SIZE(A, DA1) == SIZE(B, DB1), 
%             C = MULTIPROD(A, B, [0 DA1], [DB1 0]) or 
%             C = MULTIPROD(A, B, DA1, DB1) virtually turns the vectors
%             contained in A and B into 1xP and Px1 matrices, respectively,
%             then returns their products, similar to scalar products.
%             Namely, C = DOT2(A, B, DA1, DB1) is equivalent to 
%             C = MULTIPROD(CONJ(A), B, [0 DA1], [DB1 0]).
%      (c) Without limitations on the length of the vectors in A and B,
%             C = MULTIPROD(A, B, [DA1 0], [0 DB1]) turns the vectors
%             contained in A and B into Px1 and 1xQ matrices, respectively,
%             then returns their products, similar to outer products.
%             Namely, C = OUTER(A, B, DA1, DB1) is equivalent to
%             C = MULTIPROD(CONJ(A), B, [DA1 0], [0 DB1]).
%
%   Common constraints for all syntaxes:
%      The external dimensions of A and B must either be identical or 
%      compatible with AX rules. The internal dimensions of each block
%      array must be adjacent (DA2 == DA1 + 1 and DB2 == DB1 + 1 are
%      required). DA1 and DB1 are allowed to be larger than NDIMS(A) and
%      NDIMS(B). In syntaxes 1, 2, and 3, Q == R is required, unless the
%      blocks in A or B are scalars. 
%
%   Array expansion (AX):
%      AX is a powerful generalization to N-D of the concept of scalar
%      expansion. Indeed, A and B may be scalars, vectors, matrices or
%      multi-dimensional arrays. Scalar expansion is the virtual
%      replication or annihilation of a scalar which allows you to combine
%      it, element by element, with an array X of any size (e.g. X+10,
%      X*10, or []-10). Similarly, in MULTIPROD, the purpose of AX is to
%      automatically match the size of the external dimensions (EDs) of A
%      and B, so that block-by-block products can be performed. ED matching
%      is achieved by means of a dimension shift followed by a singleton
%      expansion:
%      1) Dimension shift (see SHIFTDIM).
%            Whenever DA1 ~= DB1, a shift is applied to impose DA1 == DB1.
%            If DA1 > DB1, B is shifted to the right by DA1 - DB1 steps.
%            If DB1 > DA1, A is shifted to the right by DB1 - DA1 steps.
%      2) Singleton expansion (SX).
%            Whenever an ED of either A or B is singleton and the
%            corresponding ED of the other array is not, the mismatch is
%            fixed by virtually replicating the array (or diminishing it to
%            length 0) along that dimension.
% 
%   MULTIPROD is a generalization for N-D arrays of the matrix
%   multiplication function MTIMES, with AX enabled. Vector inner, outer,
%   and cross products generalized for N-D arrays and with AX enabled are
%   performed by DOT2, OUTER, and CROSS2 (MATLAB Central, file #8782).
%   Elementwise multiplications (see TIMES) and other elementwise binary
%   operations with AX enabled are performed by BAXFUN (MATLAB Central,
%   file #23084). Together, these functions make up the "ARRAYLAB toolbox".
%
%   Input and output format:
%      The size of the EDs of C is determined by AX. Block size is
%      determined as follows, for each of the above-listed syntaxes:
%      1) C contains PxS matrices along IDs MAX([DA1 DA2], [DB1 DB2]).
%      2) Array     Block size     ID(s)
%         ----------------------------------------------------
%         A         PxQ  (2-D)     [DA1 DA2]
%         B         R    (1-D)     DB1
%         C (a)     P    (1-D)     MAX(DA1, DB1)
%         C (b)     PxQ  (2-D)     MAX([DA1 DA2], [DB1 DB1+1])
%         ----------------------------------------------------
%         (a) The 1-D blocks in B are not scalars (R > 1).
%         (b) The 1-D blocks in B are scalars (R = 1).
%      3) Array     Block size     ID(s)
%         ----------------------------------------------------
%         A           Q  (1-D)     DA1
%         B         RxS  (2-D)     [DB1 DB2]
%         C (a)       S  (1-D)     MAX(DA1, DB1)
%         C (b)     RxS  (2-D)     MAX([DA1 DA1+1], [DB1 DB2])
%         ----------------------------------------------------
%         (a) The 1-D blocks in A are not scalars (Q > 1).
%         (b) The 1-D blocks in A are scalars (Q = 1).
%      4)     Array     Block size         ID(s)
%         --------------------------------------------------------------
%         (a) A         P        (1-D)     DA1
%             B         Q        (1-D)     DB1
%             C         MAX(P,Q) (1-D)     MAX(DA1, DB1)
%         --------------------------------------------------------------
%         (b) A         P        (1-D)     DA1
%             B         P        (1-D)     DB1
%             C         1        (1-D)     MAX(DA1, DB1)
%         --------------------------------------------------------------
%         (c) A         P        (1-D)     DA1
%             B         Q        (1-D)     DB1
%             C         PxQ      (2-D)     MAX([DA1 DA1+1], [DB1 DB1+1])
%         --------------------------------------------------------------
%
%   Terminological notes:
%   (*) 1-D and 2-D blocks are generically referred to as "vectors" and 
%       "matrices", respectively. However, both may be also called
%       "scalars" if they have a single element. Moreover, matrices with a
%       single row or column (e.g. 1x3 or 3x1) may be also called "row
%       vectors" or "column vectors".
%   () Not to be confused with the "inner dimensions" of the two matrices
%       involved in a product X * Y, defined as the 2nd dimension of X and
%       the 1st of Y (DA2 and DB1 in syntaxes 1, 2, 3).
%
%   Examples:
%    1) If  A is .................... a 5x(6x3)x2 array,
%       and B is .................... a 5x(3x4)x2 array,
%       C = MULTIPROD(A, B, [2 3]) is a 5x(6x4)x2 array.
%
%       A single matrix A pre-multiplies each matrix in B
%       If  A is ........................... a (1x3)    single matrix,
%       and B is ........................... a 10x(3x4) 3-D array,
%       C = MULTIPROD(A, B, [1 2], [3 4]) is a 10x(1x4) 3-D array.
%
%       Each matrix in A pre-multiplies each matrix in B (all possible
%       combinations)
%       If  A is .................... a (6x3)x5   array,
%       and B is .................... a (3x4)x1x2 array,
%       C = MULTIPROD(A, B, [1 2]) is a (6x4)x5x2 array.
%
%   2a) If  A is ........................... a 5x(6x3)x2 4-D array,
%       and B is ........................... a 5x(3)x2   3-D array,
%       C = MULTIPROD(A, B, [2 3], [2]) is   a 5x(6)x2   3-D array.
%
%   2b) If  A is ........................... a 5x(6x3)x2 4-D array,
%       and B is ........................... a 5x(1)x2   3-D array,
%       C = MULTIPROD(A, B, [2 3], [2]) is   a 5x(6x3)x2 4-D array.
%
%   4a) If both A and B are .................. 5x(6)x2   3-D arrays,
%       C = MULTIPROD(A, B, 2) is .......... a 5x(1)x2   3-D array, while
%   4b) C = MULTIPROD(A, B, [2 0], [0 2]) is a 5x(6x6)x2 4-D array
%
%   See also DOT2, OUTER, CROSS2, BAXFUN, MULTITRANSP, MULTITRACE, MULTISCALE.

% $ Version: 2.1 $
% CODE      by:            Paolo de Leva
%                          (Univ. of Rome, Foro Italico, IT)    2009 Jan 24
%           optimized by:  Paolo de Leva
%                          Jinhui Bai (Georgetown Univ., D.C.)  2009 Jan 24
% COMMENTS  by:            Paolo de Leva                        2009 Feb 24
% OUTPUT    tested by:     Paolo de Leva                        2009 Feb 24
% -------------------------------------------------------------------------

assert(nargin >= 2 && nargin <= 4, 'Takes from 2 to 4 inputs.');

switch nargin % Setting IDA and/or IDB
    case 2, idA = [1 2]; idB = [1 2];
    case 3, idB = idA;
end

% ESC 1 - Special simple case (both A and B are 2D), solved using C = A * B

     if ndims(a)==2 && ndims(b)==2 && ...
         isequal(idA,[1 2]) && isequal(idB,[1 2])
         c = a * b; return
     end

% MAIN 0 - Checking and evaluating array size, block size, and IDs

     sizeA0 = size(a);
     sizeB0 = size(b);
     [sizeA, sizeB, shiftC, delC, sizeisnew, idA, idB, ...
     squashOK, sxtimesOK, timesOK, mtimesOK, sumOK] = ...
                                           sizeval(idA,idB, sizeA0,sizeB0);

% MAIN 1 - Applying dimension shift (first step of AX) and 
%          turning both A and B into arrays of either 1-D or 2-D blocks

     if sizeisnew(1), a = reshape(a, sizeA); end    
     if sizeisnew(2), b = reshape(b, sizeB); end

% MAIN 2 - Performing products with or without SX (second step of AX)

     if squashOK % SQUASH + MTIMES (fastest engine)
         c = squash2D_mtimes(a,b, idA,idB, sizeA,sizeB, squashOK); 
     elseif timesOK % TIMES (preferred w.r. to SX + TIMES)
         if sumOK, c = sum(a .* b, sumOK);
         else      c =     a .* b; end
     elseif sxtimesOK % SX + TIMES
         if sumOK, c = sum(bsxfun(@times, a, b), sumOK);
         else      c =     bsxfun(@times, a, b); end
     elseif mtimesOK % MTIMES (rarely used)
         c = a * b;
     end

% MAIN 3 - Reshaping C (by inserting or removing singleton dimensions)

     [sizeC sizeCisnew] = adjustsize(size(c), shiftC, false, delC, false);
     if sizeCisnew, c = reshape(c, sizeC); end


function c = squash2D_mtimes(a, b, idA, idB, sizeA, sizeB, squashOK)
% SQUASH2D_MTIMES  Multiproduct with single-block expansion (SBX).
%    Actually, no expansion is performed. The multi-block array is
%    rearranged from N-D to 2-D, then MTIMES is applied, and eventually the
%    result is rearranged back to N-D. No additional memory is required.
%    One and only one of the two arrays must be single-block, and its IDs
%    must be [1 2] (MAIN 1 removes leading singletons). Both arrays
%    must contain 2-D blocks (MAIN 1 expands 1-D blocks to 2-D).

    if squashOK == 1 % A is multi-block, B is single-block (squashing A)

        % STEP 1 - Moving IDA(2) to last dimension
        nd = length(sizeA);
        d2 = idA(2);    
        order = [1:(d2-1) (d2+1):nd d2]; % Partial shifting
        a = permute(a, order); % ...xQ

        % STEP 2 - Squashing A from N-D to 2-D  
        q = sizeB(1);
        s = sizeB(2);
        lengthorder = length(order);
        collapsedsize = sizeA(order(1:lengthorder-1)); 
        n = prod(collapsedsize);
        a = reshape(a, [n, q]); % NxQ    
        fullsize = [collapsedsize s]; % Size to reshape C back to N-D

    else % B is multi-block, A is single-block (squashing B)

        % STEP 1 - Moving IDB(1) to first dimension
        nd = length(sizeB);
        d1 = idB(1);    
        order = [d1 1:(d1-1) (d1+1):nd]; % Partial shifting
        b = permute(b, order); % Qx...

        % STEP 2 - Squashing B from N-D to 2-D  
        p = sizeA(1);
        q = sizeA(2);
        lengthorder = length(order);
        collapsedsize = sizeB(order(2:lengthorder)); 
        n = prod(collapsedsize);
        b = reshape(b, [q, n]); % QxN
        fullsize = [p collapsedsize]; % Size to reshape C back to N-D

    end

    % FINAL STEPS - Multiplication, reshape to N-D, inverse permutation
    invorder(order) = 1 : lengthorder;
    c = permute (reshape(a*b, fullsize), invorder);


function [sizeA, sizeB, shiftC, delC, sizeisnew, idA, idB, ...
          squashOK, sxtimesOK, timesOK, mtimesOK, sumOK] = ...
                                          sizeval(idA0,idB0, sizeA0,sizeB0)
%SIZEVAL   Evaluation of array size, block size, and IDs
%    Possible values for IDA and IDB:
%        [DA1 DA2], [DB1 DB2]
%        [DA1 DA2], [DB1]
%        [DA1],     [DB1 DB2]
%        [DA1],     [DB1]
%        [DA1 0],   [0 DB1]
%        [0 DA1],   [DB1 0]
%
%    sizeA/B     Equal to sizeA0/B0 if RESHAPE is not needed in MAIN 1
%    shiftC, delC    Variables controlling MAIN 3.
%    sizeisnew   1x2 logical array; activates reshaping of A and B.
%    idA/B       May change only if squashOK ~= 0
%    squashOK    If only A or B is a multi-block array (M-B) and the other
%                is single-block (1-B), it will be rearranged from N-D to
%                2-D. If both A and B are 1-B or M-B arrays, squashOK = 0.
%                If only A (or B) is a M-B array, squashOK = 1 (or 2).
%    sxtimesOK, timesOK, mtimesOK    Flags controlling MAIN 2 (TRUE/FALSE).
%    sumOK       Dimension along which SUM is performed. If SUM is not
%                needed, sumOK = 0.

% Initializing output arguments

    idA = idA0;
    idB = idB0;
     squashOK = 0;
    sxtimesOK = false;
      timesOK = false;
     mtimesOK = false;
        sumOK = 0;
    shiftC = 0;
    delC = 0;

% Checking for gross input errors

    NidA = numel(idA);
    NidB = numel(idB);
    idA1 = idA(1);
    idB1 = idB(1);
    if  NidA>2 || NidB>2 || NidA==0 || NidB==0 || ...
           ~isreal(idA1) ||    ~isreal(idB1)   || ...
        ~isnumeric(idA1) || ~isnumeric(idB1)   || ...
                 0>idA1  ||          0>idB1    || ... % negative 
         idA1~=fix(idA1) ||  idB1~=fix(idB1)   || ... % non-integer
         ~isfinite(idA1) ||  ~isfinite(idB1) % Inf or NaN               
        error('MULTIPROD:InvalidDimensionArgument', ...
        ['Internal-dimension arguments (e.g., [IDA1 IDA2]) must\n', ...
         'contain only one or two non-negative finite integers']);
    end

% Checking Syntaxes containing zeros (4b/c)

    declared_outer = false;
    idA2 = idA(NidA); % It may be IDA1 = IDA2 (1-D block)
    idB2 = idB(NidB);

    if any(idA==0) || any(idB==0)
        
        % "Inner products": C = MULTIPROD(A, B, [0 DA1], [DB1 0])
        if idA1==0 && idA2>0 && idB1>0 && idB2==0
            idA1 = idA2;
            idB2 = idB1;
        % "Outer products": C = MULTIPROD(A, B, [DA1 0], [0 DB1]) 
        elseif idA1>0 && idA2==0 && idB1==0 && idB2>0
            declared_outer = true;
            idA2 = idA1;
            idB1 = idB2;
        else
            error('MULTIPROD:InvalidDimensionArgument', ...
            ['Misused zeros in the internal-dimension arguments\n', ...
            '(see help heads 4b and 4c)']);
        end
        NidA = 1; 
        NidB = 1;
        idA = idA1;
        idB = idB1;

    elseif (NidA==2 && idA2~=idA1+1) || ...  % Non-adjacent IDs
           (NidB==2 && idB2~=idB1+1)
        error('MULTIPROD:InvalidDimensionArgument', ...
        ['If an array contains 2-D blocks, its two internal dimensions', ... 
        'must be adjacent (e.g. IDA2 == IDA1+1)']);
    end

% ESC - Case for which no reshaping is needed (both A and B are scalars)

    scalarA = isequal(sizeA0, [1 1]);
    scalarB = isequal(sizeB0, [1 1]);
    if scalarA && scalarB
        sizeA = sizeA0;
        sizeB = sizeB0;
        sizeisnew = [false false];
        timesOK = true; return
    end

% Computing and checking adjusted sizes
% The lengths of ADJSIZEA and ADJSIZEB must be >= IDA(END) and IDB(END)

    NsA = idA2 - length(sizeA0); % Number of added trailing singletons
    NsB = idB2 - length(sizeB0);
    adjsizeA = [sizeA0 ones(1,NsA)];
    adjsizeB = [sizeB0 ones(1,NsB)];
    extsizeA = adjsizeA([1:idA1-1, idA2+1:end]); % Size of EDs
    extsizeB = adjsizeB([1:idB1-1, idB2+1:end]);
    p = adjsizeA(idA1);
    q = adjsizeA(idA2);
    r = adjsizeB(idB1);
    s = adjsizeB(idB2);    
    scalarsinA = (p==1 && q==1);
    scalarsinB = (r==1 && s==1);
    singleA = all(extsizeA==1);
    singleB = all(extsizeB==1);
    if q~=r && ~scalarsinA && ~scalarsinB && ~declared_outer
       error('MULTIPROD:InnerDimensionsMismatch', ...
             'Inner matrix dimensions must agree.');
    end

% STEP 1/3 - DIMENSION SHIFTING (FIRST STEP OF AX)
%   Pipeline 1 (using TIMES) never needs left, and may need right shifting.
%   Pipeline 2 (using MTIMES) may need left shifting of A and right of B.

    shiftA = 0;
    shiftB = 0;
    diffBA = idB1 - idA1;    
    if scalarA % Do nothing
    elseif singleA && ~scalarsinB, shiftA = -idA1 + 1; %  Left shifting A
    elseif idB1 > idA1,            shiftA = diffBA;    % Right shifting A        
    end    
    if scalarB % Do nothing
    elseif singleB && ~scalarsinA, shiftB = -idB1 + 1; %  Left shifting B
    elseif idA1 > idB1,            shiftB = -diffBA;   % Right shifting B
    end

% STEP 2/3 - SELECTION OF PROPER ENGINE AND BLOCK SIZE ADJUSTMENTS

    addA  = 0; addB  = 0;
    delA  = 0; delB  = 0;
    swapA = 0; swapB = 0;
    idC1 = max(idA1, idB1);
    idC2 = idC1 + 1;
    checktimes = false;

    if (singleA||singleB) &&~scalarsinA &&~scalarsinB % Engine using MTIMES

        if singleA && singleB 
            mtimesOK = true;
            shiftC=idC1-1; % Right shifting C
            idC1=1; idC2=2;
        elseif singleA
            squashOK = 2;
            idB = [idB1, idB1+1] + shiftB;
        else % singleB
            squashOK = 1;
            idA = [idA1, idA1+1] + shiftA;
        end

        if NidA==2 && NidB==2 % 1) 2-D BLOCKS BY 2-D BLOCKS
            % OK 
        elseif NidA==2        % 2) 2-D BLOCKS BY 1-D BLOCKS
            addB=idB1+1; delC=idC2;
        elseif NidB==2        % 3) 1-D BLOCKS BY 2-D BLOCKS
            addA=idA1; delC=idC1;
        else                  % 4) 1-D BLOCKS BY 1-D BLOCKS
            if declared_outer
                addA=idA1+1; addB=idB1;
            else
                addA=idA1; addB=idB1+1; delC=idC2;
            end
        end    

    else % Engine using TIMES (also used if SCALARA || SCALARB)
        
        sxtimesOK = true;

        if NidA==2 && NidB==2 % 1) 2-D BLOCKS BY 2-D BLOCKS

            if scalarA || scalarB
                timesOK=true;                
            elseif scalarsinA && scalarsinB % scal-by-scal
                checktimes=true;
            elseif scalarsinA || scalarsinB || ... % scal-by-mat
                (q==1 && r==1)  % vec-by-vec ("outer")
            elseif p==1 && s==1 % vec-by-vec ("inner")
                swapA=idA1; sumOK=idC1; checktimes=true;
            elseif s==1 % mat-by-vec
                swapB=idB1; sumOK=idC2;
            elseif p==1 % vec-by-mat
                swapA=idA1; sumOK=idC1;
            else % mat-by-mat
                addA=idA2+1; addB=idB1; sumOK=idC2; delC=idC2;
            end

        elseif NidA==2 % 2) 2-D BLOCKS BY 1-D BLOCKS

            if scalarA || scalarB
                timesOK=true;                
            elseif scalarsinA && scalarsinB % scal-by-scal
                addB=idB1; checktimes=true;
            elseif scalarsinA % scal-by-vec
                delA=idA1;
            elseif scalarsinB % mat-by-scal
                addB=idB1;
            elseif p==1 % vec-by-vec ("inner")
                delA=idA1; sumOK=idC1; checktimes=true;
            else % mat-by-vec
                addB=idB1; sumOK=idC2; delC=idC2;
            end

        elseif NidB==2 % 3) 1-D BLOCKS BY 2-D BLOCKS

            if scalarA || scalarB
                timesOK=true;                
            elseif scalarsinA && scalarsinB % scal-by-scal
                addA=idA1+1; checktimes=true;
            elseif scalarsinB % vec-by-scal
                delB=idB2;
            elseif scalarsinA % scal-by-mat
                addA=idA1+1;
            elseif s==1 % vec-by-vec ("inner")
                delB=idB2; sumOK=idC1; checktimes=true;
            else % vec-by-mat
                addA=idA1+1; sumOK=idC1; delC=idC1;
            end

        else % 4) 1-D BLOCKS BY 1-D BLOCKS

            if scalarA || scalarB
                timesOK=true;                
            elseif declared_outer % vec-by-vec ("outer")
                addA=idA1+1; addB=idB1;
            elseif scalarsinA && scalarsinB % scal-by-scal
                checktimes=true;
            elseif scalarsinA || scalarsinB % vec-by-scal
            else % vec-by-vec
                sumOK=idC1; checktimes=true;
            end
        end
    end

% STEP 3/3 - Adjusting the size of A and B. The size of C is adjusted
%            later, because it is not known yet.

    [sizeA, sizeisnew(1)] = adjustsize(sizeA0, shiftA, addA, delA, swapA);
    [sizeB, sizeisnew(2)] = adjustsize(sizeB0, shiftB, addB, delB, swapB);

    if checktimes % Faster than calling BBXFUN
        diff = length(sizeB) - length(sizeA);
        if isequal([sizeA ones(1,diff)], [sizeB ones(1,-diff)])
            timesOK = true;
        end
    end


function [sizeA, sizeisnew] = adjustsize(sizeA0, shiftA, addA, delA, swapA)
% ADJUSTSIZE  Adjusting size of a block array.

    % Dimension shifting (by adding or deleting trailing singleton dim.)
    if     shiftA>0, [sizeA,newA1] = addsing(sizeA0, 1, shiftA);
    elseif shiftA<0, [sizeA,newA1] = delsing(sizeA0, 1,-shiftA); 
    else   sizeA = sizeA0;  newA1  = false;
    end
    % Modifying block size (by adding, deleting, or moving singleton dim.)
    if      addA, [sizeA,newA2] = addsing(sizeA, addA+shiftA, 1); % 1D-->2D 
    elseif  delA, [sizeA,newA2] = delsing(sizeA, delA+shiftA, 1); % 2D-->1D
    elseif swapA, [sizeA,newA2] = swapdim(sizeA,swapA+shiftA); % ID Swapping
    else                 newA2  = false;
    end
    sizeisnew = newA1 || newA2;


function [newsize, flag] = addsing(size0, dim, ns)
%ADDSING   Adding NS singleton dimensions to the size of an array.
%   Warning: NS is assumed to be a positive integer.
%   Example: If the size of A is ..... SIZE0 = [5 9 3]
%            NEWSIZE = ADDSING(SIZE0, 3, 2) is [5 9 1 1 3]

    if dim > length(size0)
        newsize = size0;
        flag = false;
    else 
        newsize = [size0(1:dim-1), ones(1,ns), size0(dim:end)];
        flag = true;
    end


function [newsize, flag] = delsing(size0, dim, ns)
%DELSING   Removing NS singleton dimensions from the size of an array.
%   Warning: Trailing singletons are not removed
%   Example: If the size of A is SIZE0 = [1 1 1 5 9 3]
%            NEWSIZE = DELSING(SIZE, 1, 3) is  [5 9 3]

    if dim > length(size0)-ns % Trailing singletons are not removed
        newsize = size0;
        flag = false;
    else % Trailing singl. added, so NEWSIZE is guaranteed to be 2D or more
        newsize = size0([1:dim-1, dim+ns:end, dim]);
        flag = true;
    end


function [newsize, flag] = swapdim(size0, dim)
%SWAPDIM   Swapping two adjacent dimensions of an array (DIM and DIM+1).
%   Used only when both A and B are multi-block arrays with 2-D blocks.
%   Example: If the size of A is .......... 5x(6x3)
%            NEWSIZE = SWAPIDS(SIZE0, 2) is 5x(3x6)

    newsize = [size0 1]; % Guarantees that dimension DIM+1 exists.
    newsize = newsize([1:dim-1, dim+1, dim, dim+2:end]);
    flag = true;


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multiprodmultitransp_license.txt
================================================
Copyright (c) 2009, Paolo de Leva
All rights reserved.

Redistribution and use in source and binary forms, with or without 
modification, are permitted provided that the following conditions are 
met:

    * Redistributions of source code must retain the above copyright 
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright 
      notice, this list of conditions and the following disclaimer in 
      the documentation and/or other materials provided with the distribution
      
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multiscale.m
================================================
function A = multiscale(scale, A)
% Multiplies the 2D slices in a 3D matrix by individual scalars.
%
% function A = multiscale(scale, A)
%
% Given a vector scale of length N and a 3-dimensional matrix A of size
% n-by-m-by-N, returns a matrix A of same size such that
% A(:, :, k) := scale(k) * A(:, :, k);
%
% See also: multiprod multitransp multitrace

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 


	assert(ndims(A) <= 3, ...
           ['multiscale is only well defined for matrix arrays of 3 ' ...
            'or less dimensions.']);
	[n, m, N] = size(A);
	assert(numel(scale) == N, ...
           ['scale must be a vector whose length equals the third ' ...
            'dimension of A, that is, the number of 2D matrix slices ' ...
            'in the 3D matrix A.']);

    scale = scale(:);
    A = reshape(bsxfun(@times, reshape(A, n*m, N), scale'), n, m, N);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multiskew.m
================================================
function Y = multiskew(X)
% Returns the skew-symmetric parts of the matrices in the 3D matrix X.
%
% function Y = multiskew(X)
%
% Y is a 3D matrix the same size as X. Each slice Y(:, :, i) is the
% skew-symmetric part of the slice X(:, :, i).
%
% See also: multiprod multitransp multiscale multisym

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Jan. 31, 2013.
% Contributors: 
% Change log: 

    Y = .5*(X - multitransp(X));
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multisqnorm.m
================================================
function sqnorm = multisqnorm(A)
% Returns the squared Frobenius norms of the slices of a 3D matrix.
%
% function sqnorm = multisqnorm(A)
%
% Given a 3-dimensional matrix A of size n-by-m-by-N, returns a column
% vector of length N such that sqnorm(i) = norm(A(:, :, i), 'fro')^2.
%
% See also: multiprod multitransp multitrace norms

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 17, 2015.
% Contributors: 
% Change log: 


	assert(ndims(A) <= 3, ...
           ['multisqnorm is only well defined for matrix arrays of 3 ' ...
            'or less dimensions.']);
	[n, m, N] = size(A);
    
    % This is equivalent to squeeze(sum(norms(A, 2, 1).^2)), but faster.
    sqnorm = sum(reshape(A, n*m, N).^2, 1)';

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multisym.m
================================================
function Y = multisym(X)
% Returns the symmetric parts of the matrices in the 3D matrix X
%
% function Y = multisym(X)
%
% Y is a 3D matrix the same size as X. Each slice Y(:, :, i) is the
% symmetric part of the slice X(:, :, i).
%
% See also: multiprod multitransp multiscale multiskew

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Jan. 31, 2013.
% Contributors: 
% Change log: 

    Y = .5*(X + multitransp(X));
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multitrace.m
================================================
function tr = multitrace(A)
% Computes the traces of the 2D slices in a 3D matrix.
% 
% function tr = multitrace(A)
%
% For a 3-dimensional matrix A of size n-by-n-by-N, returns a column vector
% tr of length N such that tr(k) = trace(A(:, :, k));
%
% See also: multiprod multitransp multiscale

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 

    
    assert(ndims(A) <= 3, ...
           ['multitrace is only well defined for matrix arrays of 3 ' ...
            'or less dimensions.']);

	tr = diagsum(A, 1, 2);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/multitransp.m
================================================
function b = multitransp(a, dim)
% Transposing arrays of matrices.
% 
%    B = MULTITRANSP(A) is equivalent to B = MULTITRANSP(A, DIM), where
%    DIM = 1.
%
%    B = MULTITRANSP(A, DIM) is equivalent to
%    B = PERMUTE(A, [1:DIM-1, DIM+1, DIM, DIM+2:NDIMS(A)]), where A is an
%    array containing N P-by-Q matrices along its dimensions DIM and DIM+1,
%    and B is an array containing the Q-by-P transpose (.') of those N
%    matrices along the same dimensions. N = NUMEL(A) / (P*Q), i.e. N is
%    equal to the number of elements in A divided by the number of elements
%    in each matrix.
%
%    MULTITRANSP, PERMUTE and IPERMUTE are a generalization of TRANSPOSE
%    (.') for N-D arrays.
%
%    Example:
%       A 5-by-9-by-3-by-2 array may be considered to be a block array
%       containing ten 9-by-3 matrices along dimensions 2 and 3. In this
%       case, its size is so indicated:  5-by-(9-by-3)-by-2 or 5x(9x3)x2.
%       If A is ................ a 5x(9x3)x2 array of 9x3 matrices,
%       C = MULTITRANSP(A, 2) is a 5x(3x9)x2 array of 3x9 matrices.
%
%    See also PERMUTE, IPERMUTE, MULTIPROD, MULTITRACE, MULTISCALE.

% $ Version: 1.0 $
% CODE      by:                 Paolo de Leva (IUSM, Rome, IT) 2005 Sep 9
% COMMENTS  by:                 Code author                    2006 Nov 21
% OUTPUT    tested by:          Code author                    2005 Sep 13
% -------------------------------------------------------------------------

% Setting DIM if not supplied.
if nargin == 1, dim = 1; end

% Transposing
order = [1:dim-1, dim+1, dim, dim+2:ndims(a)];
b = permute(a, order);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/orthogonalize.m
================================================
function [orthobasis, L] = orthogonalize(M, x, basis)
% Orthonormalizes a basis of tangent vectors in the Manopt framework.
%
% function [orthobasis, L] = orthogonalize(M, x, basis)
%
% M is a Manopt manifold structure obtained from a factory.
% x is a point on the manifold M.
% basis is a cell containing n linearly independent tangent vectors at x.
%
% orthobasis is a cell of same size as basis which contains an orthonormal
% basis for the same subspace as that spanned by basis. Orthonormality is
% assessed with respect to the metric on the tangent space to M at x.
% L is upper triangular of size n x n if basis has n vectors, such that,
% basis{k} = sum_j=1^k orthobasis{j} * L(j, k) (akin to R in a QR
% factorization.)
%
% See also: grammatrix tangentorthobasis

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 28, 2016.
% Contributors: 
% Change log: 


    n = numel(basis);
    orthobasis = cell(size(basis));
    
    % Build the Gram matrix of the basis vectors.
    G = grammatrix(M, x, basis);
    
    % If the vectors in 'basis' were the columns of V, and the inner
    % product were the classical dot product, then G = V'*V. We are looking
    % for R, an invertible matrix such that V*R is orthogonal. Thus, R
    % satisfies R'*V'*V*R = eye(n); equivalently:
    %  G = inv(R)'*inv(R).
    % Computing a Cholesky factorization of G yields L such that G = L'*L.
    % Thus, R = inv(L). Each column of R states exactly which linear
    % combinations of the vectors in 'basis' must be computed to produce
    % the orthonormal basis.
    %
    % Of course, in that formalism, we could directly take a qr of V, but
    % in the actual setting V is not available; the only simple object
    % available is G.
	%
	% If this simple code turns out not to be satisfactory (most likely
	% because of numerical instability), it may be good to consider
	% implementing a modified Gram-Schmidt algorithm instead, and even to
	% provide a helper function which calls it twice.
    L = chol(G);
    R = inv(L);
    
    % Note that R is upper triangular.
    % We now compute the n linear combinations.
    
    for k = 1 : n
        
        orthobasis{k} = lincomb(M, x, basis(1:k), R(1:k, k));
        
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/plotprofile.m
================================================
function cost = plotprofile(problem, x, d, t)
% Plot the cost function along a geodesic or a retraction path.
%
% function plotprofile(problem)
% function plotprofile(problem, x)
% function plotprofile(problem, x, d)
% function plotprofile(problem, x, d, t)
% function plotprofile(problem, x, [], t)
% function plotprofile(problem, [], [], t)
%
% function costs = plotprofile(problem, x, d, t)
%
% Plot profile evaluates the cost function along a geodesic gamma(t) such
% that gamma(0) = x and the derivative of gamma at 0 is the direction d.
% The input t is a vector specifying for which values of t we must evaluate
% f(gamma(t)) (it may include negative values).
%
% If the function is called with an output, the plot is not drawn and the
% values of the cost are returned for the instants t.
%
% If x is omitted, a random point is picked. If d is omitted, a random
% tangent vector at x is picked. If t is omitted, it is generated as a
% linspace over [-1, 1].

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Jan. 9, 2013.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   Nov. 12, 2016 (NB):
%       Making more inputs optional.

    % Verify that the problem description is sufficient.
    if ~canGetCost(problem)
        error('It seems no cost was provided.');  
    end
    
    if ~exist('x', 'var') || isempty(x)
        x = problem.M.rand();
        if exist('d', 'var') && ~isempty(d)
            error('If x is omitted, d should not be specified.');
        end
    end
    if ~exist('d', 'var') || isempty(d)
        d = problem.M.randvec(x);
    end
    if ~exist('t', 'var') || isempty(t)
        t = linspace(-1, 1, 101);
    end
    
    if isfield(problem.M, 'exp')
        expo = problem.M.exp;
        str = 'Exp';
    else
        expo = problem.M.retr;
        str = 'Retr';
    end
    
    storedb = StoreDB();
    linesearch_fun = @(t) getCost(problem, expo(x, d, t), storedb);
    
    cost = zeros(size(t));
    for i = 1 : numel(t)
        cost(i) = linesearch_fun(t(i));
    end
    
    if nargout == 0
        plot(t, cost);
        xlabel('t');
        ylabel(['f(' str '_x(t*d))']);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/powermanifold.m
================================================
function Mn = powermanifold(M, n)
% Returns a structure describing a power manifold M^n = M x M x ... x M.
%
% function Mn = powermanifold(M, n)
%
% Input: a manifold structure M and an integer n >= 1.
% 
% Output: a manifold structure Mn representing M x ... x M (n copies of M)
% with the metric of M extended element-wise. Points and vectors are stored
% as cells of size nx1.
%
% This code is for prototyping uses. The structures returned are often
% inefficient representations of power manifolds owing to their use of
% for-loops, but they should allow to rapidly try out an idea.
%
% Example (an inefficient representation of the oblique manifold (3, 10)):
% Mn = powermanifold(spherefactory(3), 10)
% disp(Mn.name());
% x = Mn.rand()
%
% See also: productmanifold

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log:
%   NB, July 4, 2013: Added support for vec, mat, tangent.
%                     Added support for egrad2rgrad and ehess2rhess.

    
    assert(n >= 1, 'n must be an integer larger than or equal to 1.');
    
    Mn.name = @() sprintf('[%s]^%d', M.name(), n);
    
    Mn.dim = @() n*M.dim();
    
    Mn.inner = @inner;
    function val = inner(x, u, v)
        val = 0;
        for i = 1 : n
            val = val + M.inner(x{i}, u{i}, v{i});
        end
    end

    Mn.norm = @(x, d) sqrt(Mn.inner(x, d, d));

    Mn.dist = @dist;
    function d = dist(x, y)
        sqd = 0;
        for i = 1 : n
            sqd = sqd + M.dist(x{i}, y{i})^2;
        end
        d = sqrt(sqd);
    end

    Mn.typicaldist = @typicaldist;
    function d = typicaldist()
        sqd = 0;
        for i = 1 : n
            sqd = sqd + M.typicaldist()^2;
        end
        d = sqrt(sqd);
    end
    
    Mn.proj = @proj;
    function u = proj(x, u)
        for i = 1 : n
            u{i} = M.proj(x{i}, u{i});
        end
    end
    
    Mn.tangent = @tangent;
    function u = tangent(x, u)
        for i = 1 : n
            u{i} = M.tangent(x{i}, u{i});
        end
    end
    
    if isfield(M, 'tangent2ambient')
        Mn.tangent2ambient = @tangent2ambient;
    else
        Mn.tangent2ambient = @(x, u) u;
    end
    function u = tangent2ambient(x, u)
        for i = 1 : n
            u{i} = M.tangent2ambient(x{i}, u{i});
        end
    end
    
    Mn.egrad2rgrad = @egrad2rgrad;
    function g = egrad2rgrad(x, g)
        for i = 1 : n
            g{i} = M.egrad2rgrad(x{i}, g{i});
        end
    end
    
    Mn.ehess2rhess = @ehess2rhess;
    function h = ehess2rhess(x, eg, eh, h)
        for i = 1 : n
            h{i} = M.ehess2rhess(x{i}, eg{i}, eh{i}, h{i});
        end
    end
    
    Mn.exp = @expo;
    function x = expo(x, u, t)
        if nargin < 3
            t = 1.0;
        end
        for i = 1 : n
            x{i} = M.exp(x{i}, u{i}, t);
        end
    end
    
    Mn.retr = @retr;
    function x = retr(x, u, t)
        if nargin < 3
            t = 1.0;
        end
        for i = 1 : n
            x{i} = M.retr(x{i}, u{i}, t);
        end
    end
    
    if isfield(M, 'log')
        Mn.log = @loga;
    end
    function u = loga(x, y)
        u = cell(n, 1);
        for i = 1 : n
            u{i} = M.log(x{i}, y{i});
        end
    end
    
    Mn.hash = @hash;
    function str = hash(x)
        str = '';
        for i = 1 : n
            str = [str M.hash(x{i})]; %#ok<AGROW>
        end
        str = ['z' hashmd5(str)];
    end

    Mn.lincomb = @lincomb;
    function x = lincomb(x, a1, u1, a2, u2)
        if nargin == 3
            for i = 1 : n
                x{i} = M.lincomb(x{i}, a1, u1{i});
            end
        elseif nargin == 5
            for i = 1 : n
                x{i} = M.lincomb(x{i}, a1, u1{i}, a2, u2{i});
            end
        else
            error('Bad usage of powermanifold.lincomb');
        end
    end

    Mn.rand = @rand;
    function x = rand()
        x = cell(n, 1);
        for i = 1 : n
            x{i} = M.rand();
        end
    end

    Mn.randvec = @randvec;
    function u = randvec(x)
        u = cell(n, 1);
        for i = 1 : n
            u{i} = M.randvec(x{i});
        end
        u = Mn.lincomb(x, 1/sqrt(n), u);
    end

    Mn.zerovec = @zerovec;
    function u = zerovec(x)
        u = cell(n, 1);
        for i = 1 : n
            u{i} = M.zerovec(x{i});
        end
    end

    if isfield(M, 'transp')
        Mn.transp = @transp;
    end
    function u = transp(x1, x2, u)
        for i = 1 : n
            u{i} = M.transp(x1{i}, x2{i}, u{i});
        end
    end

    if isfield(M, 'pairmean')
        Mn.pairmean = @pairmean;
    end
    function y = pairmean(x1, x2)
        y = cell(n, 1);
        for i = 1 : n
            y{i} = M.pairmean(x1{i}, x2{i});
        end
    end

    % Compute the length of a vectorized tangent vector of M at x, assuming
    % this length is independent of the point x (that should be fine).
    if isfield(M, 'vec')
        rand_x = M.rand();
        zero_u = M.zerovec(rand_x);
        len_vec = length(M.vec(rand_x, zero_u));

        Mn.vec = @vec;
        
        if isfield(M, 'mat')
            Mn.mat = @mat;
        end
        
    end
    
    function u_vec = vec(x, u_mat)
        u_vec = zeros(len_vec, n);
        for i = 1 : n
            u_vec(:, i) = M.vec(x{i}, u_mat{i});
        end
        u_vec = u_vec(:);
    end

    function u_mat = mat(x, u_vec)
        u_mat = cell(n, 1);
        u_vec = reshape(u_vec, len_vec, n);
        for i = 1 : n
            u_mat{i} = M.mat(x{i}, u_vec(:, i));
        end
    end

    if isfield(M, 'vecmatareisometries')
        Mn.vecmatareisometries = M.vecmatareisometries;
    else
        Mn.vecmatareisometries = @() false;
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/productmanifold.m
================================================
function M = productmanifold(elements)
% Returns a structure describing a product manifold M = M1 x M2 x ... x Mn.
%
% function M = productmanifold(elements)
%
% Input: an elements structure such that each field contains a manifold
% structure.
% 
% Output: a manifold structure M representing the manifold obtained by
% taking the Cartesian product of the manifolds described in the elements
% structure, with the metric obtainded by element-wise extension. Points
% and vectors are stored as structures with the same fieldnames as in
% elements.
%
% Example:
% M = productmanifold(struct('X', spherefactory(3), 'Y', spherefactory(4)))
% disp(M.name());
% x = M.rand()
%
% Points of M = S^2 x S^3 are represented as structures with two fields, X
% and Y. The values associated to X are points of S^2, and likewise points
% of S^3 for the field Y. Tangent vectors are also represented as
% structures with two corresponding fields X and Y.
% 
% See also: powermanifold

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 30, 2012.
% Contributors: 
% Change log: 
%   NB, July 4, 2013: Added support for vec, mat, tangent.
%                     Added support for egrad2rgrad and ehess2rhess.
%                     Modified hash function to make hash strings shorter.


    elems = fieldnames(elements);
    nelems = numel(elems);
    
    assert(nelems >= 1, ...
           'elements must be a structure with at least one field.');
    
    M.name = @name;
    function str = name()
        str = 'Product manifold: ';
        str = [str sprintf('[%s: %s]', ...
                           elems{1}, elements.(elems{1}).name())];
        for i = 2 : nelems
            str = [str sprintf(' x [%s: %s]', ...
                   elems{i}, elements.(elems{i}).name())]; %#ok<AGROW>
        end
    end
    
    M.dim = @dim;
    function d = dim()
        d = 0;
        for i = 1 : nelems
            d = d + elements.(elems{i}).dim();
        end
    end
    
    M.inner = @inner;
    function val = inner(x, u, v)
        val = 0;
        for i = 1 : nelems
            val = val + elements.(elems{i}).inner(x.(elems{i}), ...
                                               u.(elems{i}), v.(elems{i}));
        end
    end

    M.norm = @(x, d) sqrt(M.inner(x, d, d));

    M.dist = @dist;
    function d = dist(x, y)
        sqd = 0;
        for i = 1 : nelems
            sqd = sqd + elements.(elems{i}).dist(x.(elems{i}), ...
                                                 y.(elems{i}))^2;
        end
        d = sqrt(sqd);
    end
    
    M.typicaldist = @typicaldist;
    function d = typicaldist
        sqd = 0;
        for i = 1 : nelems
            sqd = sqd + elements.(elems{i}).typicaldist()^2;
        end
        d = sqrt(sqd);
    end

    M.proj = @proj;
    function v = proj(x, u)
        for i = 1 : nelems
            v.(elems{i}) = elements.(elems{i}).proj(x.(elems{i}), ...
                                                    u.(elems{i}));
        end
    end

    M.tangent = @tangent;
    function v = tangent(x, u)
        for i = 1 : nelems
            v.(elems{i}) = elements.(elems{i}).tangent(x.(elems{i}), ...
                                                       u.(elems{i}));
        end
    end

    M.tangent2ambient = @tangent2ambient;
    function v = tangent2ambient(x, u)
        for i = 1 : nelems
            if isfield(elements.(elems{i}), 'tangent2ambient')
                v.(elems{i}) = ...
                    elements.(elems{i}).tangent2ambient( ...
                                               x.(elems{i}), u.(elems{i}));
            else
                v.(elems{i}) = u.(elems{i});
            end
        end
    end

    M.egrad2rgrad = @egrad2rgrad;
    function g = egrad2rgrad(x, g)
        for i = 1 : nelems
            g.(elems{i}) = elements.(elems{i}).egrad2rgrad(...
                                               x.(elems{i}), g.(elems{i}));
        end
    end

    M.ehess2rhess = @ehess2rhess;
    function h = ehess2rhess(x, eg, eh, h)
        for i = 1 : nelems
            h.(elems{i}) = elements.(elems{i}).ehess2rhess(...
                 x.(elems{i}), eg.(elems{i}), eh.(elems{i}), h.(elems{i}));
        end
    end
    
    M.exp = @exp;
    function y = exp(x, u, t)
        if nargin < 3
            t = 1.0;
        end
        for i = 1 : nelems
            y.(elems{i}) = elements.(elems{i}).exp(x.(elems{i}), ...
                                                   u.(elems{i}), t);
        end
    end
    
    M.retr = @retr;
    function y = retr(x, u, t)
        if nargin < 3
            t = 1.0;
        end
        for i = 1 : nelems
            y.(elems{i}) = elements.(elems{i}).retr(x.(elems{i}), ...
                                                    u.(elems{i}), t);
        end
    end
    
    M.log = @log;
    function u = log(x1, x2)
        for i = 1 : nelems
            u.(elems{i}) = elements.(elems{i}).log(x1.(elems{i}), ...
                                                   x2.(elems{i}));
        end
    end

    M.hash = @hash;
    function str = hash(x)
        str = '';
        for i = 1 : nelems
            str = [str elements.(elems{i}).hash(x.(elems{i}))]; %#ok<AGROW>
        end
        str = ['z' hashmd5(str)];
    end

    M.lincomb = @lincomb;
    function v = lincomb(x, a1, u1, a2, u2)
        if nargin == 3
            for i = 1 : nelems
                v.(elems{i}) = elements.(elems{i}).lincomb(x.(elems{i}), ...
                                                        a1, u1.(elems{i}));
            end
        elseif nargin == 5
            for i = 1 : nelems
                v.(elems{i}) = elements.(elems{i}).lincomb(x.(elems{i}), ...
                                     a1, u1.(elems{i}), a2, u2.(elems{i}));
            end
        else
            error('Bad usage of productmanifold.lincomb');
        end
    end

    M.rand = @rand;
    function x = rand()
        for i = 1 : nelems
            x.(elems{i}) = elements.(elems{i}).rand();
        end
    end

    M.randvec = @randvec;
    function u = randvec(x)
        for i = 1 : nelems
            u.(elems{i}) = elements.(elems{i}).randvec(x.(elems{i}));
        end
        u = M.lincomb(x, 1/sqrt(nelems), u);
    end

    M.zerovec = @zerovec;
    function u = zerovec(x)
        for i = 1 : nelems
            u.(elems{i}) = elements.(elems{i}).zerovec(x.(elems{i}));
        end
    end

    M.transp = @transp;
    function v = transp(x1, x2, u)
        for i = 1 : nelems
            v.(elems{i}) = elements.(elems{i}).transp(x1.(elems{i}), ...
                                              x2.(elems{i}), u.(elems{i}));
        end
    end

    M.pairmean = @pairmean;
    function y = pairmean(x1, x2)
        for i = 1 : nelems
            y.(elems{i}) = elements.(elems{i}).pairmean(x1.(elems{i}), ...
                                                        x2.(elems{i}));
        end
    end


    % Gather the length of the column vector representations of tangent
    % vectors for each of the manifolds. Raise a flag if any of the base
    % manifolds has no vec function available.
    vec_available = true;
    vec_lens = zeros(nelems, 1);
    for ii = 1 : nelems
        Mi = elements.(elems{ii});
        if isfield(Mi, 'vec')
            rand_x = Mi.rand();
            zero_u = Mi.zerovec(rand_x);
            vec_lens(ii) = length(Mi.vec(rand_x, zero_u));
        else
            vec_available = false;
            break;
        end
    end
    vec_pos = cumsum([1 ; vec_lens]);
    
    if vec_available
        M.vec = @vec;
        M.mat = @mat;
    end
    
    function u_vec = vec(x, u_mat)
        u_vec = zeros(vec_pos(end)-1, 1);
        for i = 1 : nelems
            range = vec_pos(i) : (vec_pos(i+1)-1);
            u_vec(range) = elements.(elems{i}).vec(x.(elems{i}), ...
                                                   u_mat.(elems{i}));
        end
    end

    function u_mat = mat(x, u_vec)
        u_mat = struct();
        for i = 1 : nelems
            range = vec_pos(i) : (vec_pos(i+1)-1);
            u_mat.(elems{i}) = elements.(elems{i}).mat(x.(elems{i}), ...
                                                       u_vec(range));
        end
    end

    vecmatareisometries = true;
    for ii = 1 : nelems
        if ~isfield(elements.(elems{ii}), 'vecmatareisometries') || ...
           ~elements.(elems{ii}).vecmatareisometries()
            vecmatareisometries = false;
            break;
        end
    end
    M.vecmatareisometries = @() vecmatareisometries;    

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/smallestinconvexhull.m
================================================
function [u_norm, coeffs, u] = smallestinconvexhull(M, x, U, tol)
% Computes a minimal norm convex combination of given tangent vectors in Manopt.
%
% function [u_norm, coeffs, u] = smallestinconvexhull(M, x, U)
% function [u_norm, coeffs, u] = smallestinconvexhull(M, x, U, tol)
%
% M is a manifold as returned by a Manopt factory.
% x is a point on this manifold.
% U is a cell containing N tangent vectors U{1} to U{N} at x.
% tol (default: 1e-8): tolerance for solving the quadratic program.
% 
% This function computes u, a tangent vector at x contained in the convex
% hull spanned by the N vectors U{i}, with minimal norm (according to the
% Riemannian metric on M). This is obtained by solving a convex quadratic
% program involving the Gram matrix of the given tangent vectors.
% The quadratic program is solved using Matlab's built-in quadprog,
% which requires the optimization toolbox. If this toolbox is not
% available, consider replacing with CVX for example.
%
%
% u_norm is the norm of the smallest vector u.
% coeffs is a vector of length N with entries in [0, 1] summing to 1.
% u is the sought vector: u = coeffs(1)*U{1} + ... + coeffs(N)*U{N}.
%
% Nicolas Boumal, Feb. 19, 2013
% Modified April 6, 2016 to work with Manopt.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 28, 2016.
% Contributors: 
% Change log: 
%
%   June 28, 2016 (NB):
%       Adapted for Manopt from original code by same author (Feb. 19, 2013)

% Example code: pick a manifold, a point, and a collection of tangent
% vectors at that point, then get the smallest vector in the convex hull
% of those:
% 
% M = spherefactory(5);
% x = M.rand();
% N = 3;
% U = cell(N,1);
% for k = 1 : N, U{k} = M.randvec(x); end
% [u_norm, coeffs, u] = smallestinconvexhull(M, x, U)

    % We simply need to solve the following quadratic program:
    % minimize ||u||^2 such that u = sum_i s_i U_i, 0 <= s_i <= 1
    %                            and sum_i s_i = 1
    %
    % This is equivalent to solving:
    %  min s'*G*s s.t. 0 <= s <= 1, s'*ones = 1, with G(i, j) = <U_i, U_j> (Gram matrix)
    % Then our solution is s_1 U_1 + ... + s_N U_N.
    
    
    % Compute the Gram matrix of the given tangent vectors
    N = numel(U);
    G = grammatrix(M, x, U);
    
    % Solve the quadratic program.
    % If the optimization toolbox is not available, consider replacing with
    % CVX.
    
    if ~exist('tol', 'var') || isempty(tol)
        tol = 1e-8;
    end
    
    opts = optimset('Display', 'off', 'TolFun', tol);
    [s_opt, cost_opt] ...
          = quadprog(G, zeros(N, 1),     ...  % objective (squared norm)
                     [], [],             ...  % inequalities (none)
                     ones(1, N), 1,      ...  % equality (sum to 1)
                     zeros(N, 1),        ...  % lower bounds (s_i >= 0)
                     ones(N, 1),         ...  % upper bounds (s_i <= 1)
                     [],                 ...  % we do not specify an initial guess
                     opts);

    % Norm of the smallest tangent vector in the convex hull:
    u_norm = real(sqrt(2*cost_opt));

    % Keep track of optimal coefficients
    coeffs = s_opt;
    
    % If required, construct the vector explicitly.
    if nargout >= 3
        u = lincomb(M, x, U, coeffs);
    end
                 
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/statsfunhelper.m
================================================
function statsfun = statsfunhelper(inp1, inp2)
% Helper tool to create a statsfun for the options structure of solvers.
%
% function statsfun = statsfunhelper(name, fun)
% function statsfun = statsfunhelper(S)
%
% Usage with (name, fun):
%
% Input 1: name is a string which is a valid field name (no spaces, starts
% with a letter or an underscore, only alphanumeric characters and
% underscores).
% 
% Input2: fun is a function handle with one output and 1 to 4 inputs, as
% follows (your choice):
% 
%  fun(x)  or  fun(problem, x)  or  
%  fun(problem, x, stats)  or  fun(problem, x, stats, store)
% 
% where the inputs are the ones that would be given to options.statsfun, as
% described in the help of the solver used. Typically, x is the point on
% the manifold at the current iterate, problem is the Manopt problem
% structure, stats is all the current statistics recorded for that iterate
% and store is the cache structure at the current iterate.
%
% When calling a Manopt solver with the options structure, such as for
% example with:
%
%  [x, xcost, info] = steepestdescent(problem, [], options);
%
% you may set a field of the options structure as follows:
%
%  options.statsfun = statsfunhelper('nameofthefield', fun);
%
% As a result, at each iteration, the stats structure will contain a field
% stats.nameofthefield with the value returned by the call to fun at that
% iterate. The stats structures are stored in the struct-array info.
% As an example, if the value returned by fun is a scalar, then
% [info.nameofthefield] is a vector containing all returned values.
%
%
% Usage with S:
%
% The input S is a structure. For each field of S, say S.field, the stats
% structure will be augmented with stats.field = fun(..), where fun is the
% function handle stored in S.field, and with the same conventions as
% above. This version allows to record more than one bit of information at
% each iteration. Example:
% 
%  metrics.nameofthefield = fun;
%  metrics.othername = otherfun;
%  options.statsfun = statsfunhelper(metrics);
%
% The different function handles (here, fun and otherfun) can take 1 to 4
% inputs too, and they do not have to take the same number of inputs.

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Dec. 17, 2014.
% Contributors: 
% Change log: 

    if (nargin == 1) && isstruct(inp1)
        S = inp1;
    elseif (nargin == 2)
        S = struct(inp1, inp2);
    else
        error('statsfunhelper takes 1 or 2 inputs. If 1 input, it must be a structure.');
    end


    function stats = thestatsfun(problem, x, stats, store)
        names = fieldnames(S);
        for it = 1 : length(names)
            name = names{it};
            fun = S.(name);
            switch nargin(fun)
                case 1
                    stats.(name) = fun(x);
                case 2
                    stats.(name) = fun(problem, x);
                case 3
                    stats.(name) = fun(problem, x, stats);
                case 4
                    stats.(name) = fun(problem, x, stats, store);
                otherwise
                    error('The functions passed to statsfunhelper must take 1 to 4 inputs.');
            end
        end
    end

    statsfun = @thestatsfun;

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/surfprofile.m
================================================
function costs = surfprofile(problem, x, d1, d2, t1, t2)
% Plot the cost function as a surface over a 2-dimensional subspace.
%
% function surfprofile(problem, x, d1, d2, t1, t2)
% function costs = surfprofile(problem, x, d1, d2, t1, t2)
%
% Evaluates the cost function at points
%
%   gamma(t1, t2) = exponential_x(t1*d1 + t2*d2)
% 
% where the exponential map at x is specified by problem.M.exp (retr is
% used instead if needed). d1 and d2 are two tangent vectors to problem.M
% at the point x. The values assigned to t1 and t2 are as specified in the
% two input vectors t1 and t2.
% 
% If the function is called with an output, the plot is not drawn and the
% values of the cost are returned in a matrix of size
% length(t1)*length(t2). To plot a surf, call surf(t1, t2, costs.') (notice
% the transpose).
%
% If x is omitted, a point is generated at random. If d1 is omitted, a
% random tangent vector at x is generated. If d2 is omitted, a random
% tangent vector at x is generated, orthogonally to d1. If t1, t2 are
% omitted, they are generated with linspace's in [-1, 1].

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Sep. 1, 2014.
% Contributors: 
% Change log: 
%
%   April 3, 2015 (NB):
%       Works with the new StoreDB class system.
%
%   Nov. 12, 2016 (NB):
%       Most inputs are now optional.

    % Verify that the problem description is sufficient.
    if ~canGetCost(problem)
        error('It seems no cost was provided.');  
    end
    

    if ~exist('x', 'var') || isempty(x)
        x = problem.M.rand();
        if (exist('d1', 'var') && ~isempty(d1)) || ...
           (exist('d2', 'var') && ~isempty(d2))
            error('If x is omitted, d1, d2 should not be specified.');
        end
    end
    if ~exist('d1', 'var') || isempty(d1)
        d1 = problem.M.randvec(x);
    end
    if ~exist('d2', 'var') || isempty(d2)
        d2 = problem.M.randvec(x);
        % Make it orthogonal to d1
        coeff = problem.M.inner(x, d1, d2) / problem.M.inner(x, d1, d1);
        d2 = problem.M.lincomb(x, 1, d2, -coeff, d1);
    end
    if ~exist('t1', 'var') || isempty(t1)
        t1 = linspace(-1, 1, 51);
    end
    if ~exist('t2', 'var') || isempty(t2)
        t2 = linspace(-1, 1, 51);
    end
    
    
    if isfield(problem.M, 'exp')
        expo = problem.M.exp;
        str = 'Exp';
    else
        expo = problem.M.retr;
        str = 'Retr';
    end
    
    storedb = StoreDB();
    linesearch_fun = @(ta, tb) getCost(problem, ...
                         expo(x, problem.M.lincomb(x, ta, d1, tb, d2)), ...
                         storedb);
    
    costs = zeros(length(t1), length(t2));
    for i = 1 : length(t1)
        for j = 1 : length(t2)
            costs(i, j) = linesearch_fun(t1(i), t2(j));
        end
    end
    
    if nargout == 0
        surf(t1, t2, costs.');
        xlabel('t1');
        ylabel('t2');
        zlabel(['f(' str '_x(t1*d1+t2*d2))']);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/tangent2vec.m
================================================
function vec = tangent2vec(M, x, basis, u)
% Expands a tangent vector into an orthogonal basis in the Manopt framework
%
% vec = tangent2vec(M, x, basis, u)
%
% The inverse operation is lincomb (see below).
%
% M is a Manopt manifold structure obtained from a factory.
% x is a point on the manifold M.
% basis is a cell containing n orthonormal tangent vectors at x, forming an
%       orthonormal basis of the tangent space at x.
% u is a tangent vector at x
%
% vec is a column vector of length n which contains the coefficients of the
%     expansion of u into the basis. Thus:
%
%    vec(k) = <basis{k}, u>_x          <- vec = tangent2vec(M, x, basis, u)
%
%    u = sum_{k=1}^n  vec(k)*basis{k}    <- u = lincomb(M, x, basis, vec)
%
% Note that tangent2vec is an isometry, that is, up to numerical round-off
% errors, with u and v two tangent vectors at x:
%
%    M.inner(x, u, v)  ==  uu'*vv,
%
% where uu = tangent2vec(M, x, basis, u), vv = tangent2vec(M, x, basis, v).
%
% See also: lincomb tangentorthobasis orthogonalize grammatrix hessianmatrix

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, Feb. 3, 2017.
% Contributors: 
% Change log: 


    n = numel(basis);
    
    vec = zeros(n, 1);
    
    for k = 1 : n
        
        vec(k) = M.inner(x, basis{k}, u);
        
    end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/tangentorthobasis.m
================================================
function orthobasis = tangentorthobasis(M, x, n)
% Returns an orthonormal basis of tangent vectors in the Manopt framework.
%
% function orthobasis = tangentorthobasis(M, x)
% function orthobasis = tangentorthobasis(M, x, n)
%
% M is a Manopt manifold structure obtained from a factory.
% x is a point on the manifold M.
% n (optional) is the dimension of the random subspace to span; by default,
%   n = M.dim() so that the returned basis spans the whole tangent space.
%
% orthobasis is a cell of n tangent vectors at x.
% With high probability, they form an orthonormal basis of the tangent
% space at x. If necessary, this can be checked by calling
%   G = grammatrix(M, x, orthobasis)
% and verifying that norm(G - eye(size(G))) is close to zero.
%
% Note: if extra accuracy is required, it may help to re-orthogonalize the
% basis returned by this function once, as follows:
%  B = tangentorthobasis(M, x, n);
%  B = orthogonalize(M, x, B);
%
% See also: grammatrix orthogonalize lincomb plotprofile

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 28, 2016.
% Contributors: 
% Change log: 

    dim = M.dim();
    if ~exist('n', 'var') || isempty(n)
        n = dim;
    end
    assert(n >= 0 && n <= dim && n == round(n), ...
           'n must be an integer between 0 and M.dim().');
    
    basis = cell(n, 1);
    
    % With high probability, n vectors taken at random in the tangent space
    % are linearly independent.
    for k = 1 : n
        basis{k} = M.randvec(x);
    end
    
    % The Gram-Schmidt process transforms any n linearly independent
    % vectors into n orthonormal vectors spanning the same subspace.
    orthobasis = orthogonalize(M, x, basis);
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/tangentspacefactory.m
================================================
function N = tangentspacefactory(M, x)
% Returns a manifold structure representing the tangent space to M at x.
%
% N = tangentspacefactory(M, x)
%
% N defines a (linear) manifold that is the tangent space to M at x. Points
% are represented as tangent vectors to M at x. Tangent vectors are also
% represented as tangent vectors to M at x.
%
% This is chiefly useful to solve optimization problems involving tangent
% vectors to M at x, which notably comes up when solving linear systems
% involving, for example, the Hessian of the cost on M at x (think of the
% Newton equations.) The Riemannian (actually, Euclidean) structure on N is
% that of the tangent space to M, that is, the inner product is inherited.
%
% See also: preconhessiansolve

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, April 9, 2015.
% Contributors: 
% Change log: 
%
%   Jan. 25, 2017 (NB):
%       Following a comment by Jesus Briales on the Manopt forum, the
%       functions N.egrad2rgrad, N.ehess2rhess and N.tangent now include a
%       projection (they were formerly identities.)
%
%   Feb. 2, 2017 (NB):
%       Following a comment by Jesus Briales on the Manopt forum, the
%       function N.proj now calls M.proj(x, .) instead of M.proj(y, .).
%       Furthermore, N.ehess2rhess was corrected in the same way.

    % N is the manifold we build. y will be a point on N, thus also a
    % tangent vector to M at x. This is a typical Euclidean space, hence it
    % will be easy to describe in terms of the tools available for M.
    N = struct();
    
    % u, u1 and u2 will be tangent vectors to N at y. The tangent space to
    % N at y is the tangent space to M at x, thus u, u1 and u2 are also
    % tangent vectors to M at x.
    
    N.dim   = @() M.dim();
    N.inner = @(y, u1, u2) M.inner(x, u1, u2);
    N.norm  = @(y, u) M.norm(x, u);
    N.proj  = @(y, u) M.proj(x, u);
    N.typicaldist = @() sqrt(N.dim());
    N.tangent = N.proj;
    N.egrad2rgrad = N.proj;
    N.ehess2rhess = @(y, eg, eh, d) M.proj(x, eh);
    N.exp = @exponential;
    N.retr = @exponential;
    N.log = @(y1, y2) M.lincomb(x, 1, y2, -1, y1);
    N.pairmean = @(y1, y2) M.lincomb(x, 0.5, y1, 0.5, y2);
    N.rand = @() M.randvec(x);
    N.randvec = @(y) M.randvec(x);
    N.zerovec = M.zerovec;
    N.lincomb = M.lincomb;
    N.transp = @(y1, y2, u) u;
    N.hash = @(y) ['z' hashmd5(M.vec(x, y))];
    
    % In a Euclidean space, the exponential is merely the sum: y + tu.
    function yy = exponential(y, u, t)
        if nargin == 2
            t = 1;
        end
        yy = M.lincomb(x, 1, y, t, u);
    end
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt/tools/tangentspherefactory.m
================================================
function N = tangentspherefactory(M, x)
% Returns a manifold struct. for the sphere on the tangent space to M at x.
%
% N = tangentspherefactory(M, x)
%
% N defines a manifold that is the unit sphere on the tangent space to M
% at x. Points are represented as tangent vectors of unit norm. Tangent
% vectors are represented as tangent vectors orthogonal to the root point,
% with respect to the Riemannian metric on the tangent space.
%
% This is chiefly useful to solve optimization problems involving unit norm
% tangent vectors to M at x, which notably comes up when looking for
% extreme eigenvectors of the Hessian of a cost function on M at x, for
% example. The Riemannian structure on this sphere is that of a Riemannian
% submanifold of the (Euclidean) tangent space, equipped with the
% Riemannian metric of M at that point.
%
% See also: hessianextreme

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, March 16, 2015.
% Contributors: 
% Change log: 
%
%   Nov 27, 2015 (NB):
%       Extra projection added in the retraction, to prevent numerical
%       drift.

    % N is the manifold we build. y will be a point on N, thus also a
    % tangent vector to M at x. This is a typical Riemannian submanifold of
    % a Euclidean space, hence it will be easy to describe in terms of the
    % tools available for M.
    N = struct();
    
    % u, u1 and u2 will be tangent vectors to N at y. The tangent space to
    % N at y is a subspace of the tangent space to M at x, thus u, u1 and
    % u2 are also tangent vectors to M at x.
    
    N.dim   = @() M.dim() - 1;
    N.inner = @(y, u1, u2) M.inner(x, u1, u2);
    N.norm  = @(y, u)      M.norm(x, u);
    N.proj  = @(y, v) M.lincomb(x, 1, v, -M.inner(x, v, y), y);
    N.typicaldist = @() 1;
    N.tangent = N.proj;
    N.egrad2rgrad = N.proj;
    N.retr = @retraction;
    N.exp = N.retr;
    function yy = retraction(y, u, t)
        if nargin == 2
            t = 1;
        end
        y_plus_tu = M.lincomb(x, 1, y, t, u);
        % This extra projection is not required mathematically,
        % but appears to be necessary numerically, sometimes.
        % The reason is that, as many retractions are operated,
        % there is a risk that the points generated would leave
        % the tangent space. If this proves to be a huge slow down,
        % one could consider adding a type of counter that only
        % executes this extra projection every so often, instead
        % of at every call.
        y_plus_tu = M.proj(x, y_plus_tu);
        nrm = M.norm(x, y_plus_tu);
        yy = M.lincomb(x, 1/nrm, y_plus_tu);
    end
    N.rand = @random;
    function y = random()
        y = M.randvec(x);
        nrm = M.norm(x, y);
        y = M.lincomb(x, 1/nrm, y);
    end
    N.randvec = @randvec;
    function u = randvec(y)
        u = N.proj(y, N.rand());
        nrm = N.norm(y, u);
        u = M.lincomb(x, 1/nrm, u);
    end
    N.zerovec = M.zerovec;
    N.lincomb = M.lincomb;
    N.transp = @(y1, y2, u) N.proj(y2, u);
    N.hash = @(y) ['z' hashmd5(M.vec(x, y))];
    
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/manopt_version.m
================================================
function [version, released] = manopt_version()
% Returns the version of the Manopt package you are running, as a vector.
%
% function [version, released] = manopt_version()
%
% version(1) is the primary version number.
% released is the date this version was released, in the same format as the
% date() function in Matlab.

    version = [4, 0, 0];
    released = '09-Sep-2017';

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/manopt/readme
================================================
test


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR/readme
================================================
test


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/RR_Assessment.tex
================================================
\begin{tabular}{|l|c|c|c|}
\hline
&\textbf{Q2n}&\textbf{SAM}&\textbf{ERGAS}\\\hline
\textbf{GT}&1.0000&0.0000&0.0000\\\hline
\textbf{EXP}&0.6513&7.2118&8.1106\\\hline
\textbf{BT-H}&0.9241&6.4530&3.9714\\\hline
\textbf{BDSD-PC}&0.9327&6.8388&3.8905\\\hline
\textbf{C-GSA}&0.9213&6.6967&4.0504\\\hline
\textbf{SR-D}&0.9113&6.6269&4.3472\\\hline
\textbf{MTF-GLP-HPM-R}&0.9228&7.0038&4.0692\\\hline
\textbf{MTF-GLP-FS}&0.9228&6.7650&4.0434\\\hline
\textbf{TV}&0.9277&6.6213&4.0630\\\hline
\textbf{PanNet}&0.9238&6.9050&4.2365\\\hline
\textbf{DRPNN}&0.9205&7.3887&4.2504\\\hline
\textbf{MSDCNN}&0.9087&7.5139&4.4214\\\hline
\textbf{BDPN}&0.9180&7.7148&4.4522\\\hline
\textbf{DiCNN}&0.8567&8.0256&5.5124\\\hline
\textbf{PNN}&0.8849&12.6019&6.7233\\\hline
\textbf{APNN}&0.9132&7.6201&4.4536\\\hline
\textbf{FusionNet}&0.8499&8.3823&6.0458\\\hline
\end{tabular}


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/SR-D/CS.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description:
%                       CSDetails is the Compressive Sensing (CS) approach for Pansharpening proposed in [Vicinanza15].
%
% Interface:
%                       I_Fus_CS = CSDetails(I_MS, I_PAN, I_MS_LR, resize_fact, sensor, TS, ol, n_atoms)
%
% Inputs:
%   I_MS:               Multispectral (MS) original image upsampled to the PAN scale;
%   I_PAN:              Panchromatic (PAN) image;
%   I_MS_LR:            MS original image;
%   ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value;
%   sensor:             String for type of sensor (e.g. 'WV2', 'IKONOS');
%   TS:                 Tiling (dimensions of the patches are TS x TS, e.g. 7 x 7);
%   ol:                 Overlap in pixels between contiguous tile;
%   n_atoms:            max number of representation atoms (default value = 10).
%
% Output:
%   I_Fus_CS:           Fusion image using the CS approach in [Vicinanza15].
%
% References:
%           [Vicinanza15]   M.R. Vicinanza, R. Restaino, G. Vivone, M. Dalla Mura, and J. Chanussot, "A pansharpening method based on the sparse representation of injected details",
%                           IEEE Geoscience and Remote Sensing Letters, vol. 12, no. 1, pp. 180-184, 2015.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
% % % % % % % % % % % % %
%
% Version: 1
%
% % % % % % % % % % % % %
%
% Copyright (C) 2019
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_CS = CS(I_MS, I_PAN, I_MS_LR, ratio, sensor, TS, ol, n_atoms)

if nargin < 9
    n_atoms = 10;
end

imageLR = double(I_MS);
imageHR = double(I_PAN);
imageLR_LR = double(I_MS_LR);

%%% Equalization
imageHR = repmat (imageHR, [1 1 size(I_MS,3)]);
for ii = 1 : size(imageLR_LR,3)
    %     imageHR(:,:,ii) = equalize_image (imageHR(:,:,ii), imageLR(:,:,ii));
    imageHR(:,:,ii) =  (imageHR(:,:,ii) - mean2(imageHR(:,:,ii))) / std2(imageHR(:,:,ii))...
        * std2(imageLR(:,:,ii)) + mean2(imageLR(:,:,ii));
end

%%% Extract details using MTF-based filters
imageLR_LP = MTF(imageLR, sensor, ratio);
imageLR_D = imageLR - imageLR_LP;
imageHR_LP = MTF(imageHR, sensor, ratio);
for ii = 1:size(imageHR,3)
    imageHR_LP(:,:,ii) = imresize(imresize(imageHR_LP(:,:,ii), 1/ratio, 'nearest'), ratio);
end
imageHR_D = imageHR - imageHR_LP;

%%% Decimation MS
for ii = 1 : size(imageLR,3)
    imageLR_LR(:,:,ii) = double(imresize(imageLR_D(:,:,ii),1/ratio, 'nearest'));
end

%%% Degradation PAN
imageHR_LR = resize_images(imageHR_D, 1, ratio, sensor);

%%% Dictionary learning
[Dh, Dl, ytilde_k] = Dict_Learn(imageHR_D, imageHR_LR, imageLR_LR, ratio, TS, ol);

%%% Sparse coefficient estimation and  HR signal reconstruction
I_Fus_CS = OMP_Rec_Detile(Dl, Dh, ytilde_k, size(imageHR,1), size(imageHR,2), size(imageLR_LR, 3), ratio, ol , TS, n_atoms);


I_Fus_CS = imageLR + I_Fus_CS;

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/SR-D/Dict_Learn.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Dict_Learn is the dictionary learning method for the 
% compressive sensing approach for Pansharpening proposed in [Vicinanza15].
% 
% INPUTS
%   I_PAN_D:            Details of the panchromatic image;
%   I_PAN_LR_D:         Details of the low resolution panchromatic image;
%   I_MS_LR_D:          Details of the MS original image or the MS original image (depending on the flag "do_detail" in CSDetails);
%   resize_fact:        Resize factor (ratio between PAN and MS images);
%   TS:                 Tiling (dimensions of the patches are TS x TS, e.g. 7 x 7);
%   ol:                 Overlap in pixels between contiguous tiles.
% 
% OUTPUTS
%   Dh:                 High spatial resolution dictionary (PAN details) built as in [Vicinanza15]; 
%   Dl:                 Low spatial resolution dictionary (Low resolution PAN details) built as in [Vicinanza15];
%   ytilde_k:           Patches in column form of the details of the MS original image or the MS original image (depending on the flag "do_detail" in CSDetails).
% 
% REFERENCE
%   [Vicinanza15]       M.R. Vicinanza, et al. "A pansharpening method based on the sparse representation of injected details." 
%                       IEEE Geoscience and Remote Sensing Letters 12.1 (2015): 180-184.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [Dh, Dl, ytilde_k] = Dict_Learn(I_PAN_D, I_PAN_LR_D, I_MS_LR_D, resize_fact, TS, ol)

nr = ceil ((size(I_PAN_D,1)/resize_fact - ol) / (TS - ol));
nc = ceil ((size(I_PAN_D,2)/resize_fact - ol) / (TS - ol));
nBands = size (I_MS_LR_D,3);

Dh = zeros (TS^2*resize_fact^2*nBands, nr*nc);
Dl = zeros (TS^2*nBands, nr*nc);
ytilde_k = zeros (TS^2*nBands, nr*nc);

% Building the dictionaries (Dh and Dl)
icount = 0;
for irow=1:nr
    for icol=1:nc
        icount = icount + 1;
        shiftr = 0; shiftc = 0;
        if irow == nr && mod(size(I_MS_LR_D,1)-ol, TS-ol) ~= 0
            shiftr = TS-ol - mod (size(I_MS_LR_D,1)-ol, TS-ol);
        end
        if icol == nc && mod(size(I_MS_LR_D,2)-ol, TS-ol) ~= 0
            shiftc = TS-ol - mod (size(I_MS_LR_D,2)-ol, TS-ol);
        end
        blockr = ((irow-1)*(TS-ol)*resize_fact+1 - shiftr*resize_fact) : ((irow*TS-(irow-1)*ol)*resize_fact - shiftr*resize_fact);
        blockc = ((icol-1)*(TS-ol)*resize_fact+1 - shiftc*resize_fact) : ((icol*TS-(icol-1)*ol)*resize_fact - shiftc*resize_fact);

        blockrl = ((irow-1)*(TS-ol)+1 - shiftr) : (irow*TS-(irow-1)*ol - shiftr);
        blockcl = ((icol-1)*(TS-ol)+1 - shiftc) : (icol*TS-(icol-1)*ol - shiftc);

        for iband = 1:nBands          
            colmn = I_PAN_D(blockr,blockc,iband);
            colmnlr = I_PAN_LR_D(blockrl,blockcl,iband);
            colmny = I_MS_LR_D(blockrl,blockcl,iband);
            Dh((iband-1)*TS^2*resize_fact^2+1:(iband-1)*TS^2*resize_fact^2+length(colmn(:)),icount) = (colmn(:));
            Dl((iband-1)*TS^2+1:(iband-1)*TS^2+length(colmnlr(:)),icount) = (colmnlr(:));
            ytilde_k((iband-1)*TS^2+1:(iband-1)*TS^2+length(colmny(:)),icount) = (colmny(:));
        end
    end
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/SR-D/OMP.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% OMP is the Orthogonal matching Pursuit (OMP) modified to work with multispectral data.
% 
% INPUTS
%   D:                  Dictionary (matrix);
%   y:                  Measurements (column vector);
%   delta:              Maximum error allowed for the constraint y = D a;
%   nBands:             Number of MS spectral bands;
%   iatom:              Id of the actual atom under analysis.
%   n_atoms:            max number of representation atoms
%
% OUTPUTS
%   a:                  Estimated alphas;
%   indx:               Vector of the atom positions in the dictionary.
% 
% REFERENCE
%   [Vicinanza15]       M.R. Vicinanza, et al. "A pansharpening method based on the sparse representation of injected details." 
%                       IEEE Geoscience and Remote Sensing Letters 12.1 (2015): 180-184.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [a, indx] = OMP(D, y,  nBands, iatom, n_atoms)


L_atom = size(D);
n = round(L_atom / nBands);
delta = 0;
res = y;
curr_delta = sum (res.^2);
j = 0;

while curr_delta > delta && j < n_atoms
    j = j+1;
    if j==1
        indx = iatom;
    else
        proj = D' * res;
        [~, imax] = max(abs(proj));
        imax = imax(1);
        indx = cat(2,indx,imax);
    end
    a = zeros (j, nBands);
    for iband = 1:nBands
        Di = D((iband-1)*n+1:iband*n,indx(1:j));
        yi = y((iband-1)*n+1:iband*n);
        DitDi = Di'*Di;
        if det (DitDi) > 1e-1
            a(:,iband) = ((DitDi)\(Di')) * yi;
        end
        Da((iband-1)*n+1:iband*n) = Di * a(:,iband);
    end
    res = y - Da';
    curr_delta = sum(res.^2);
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/SR-D/OMP_Rec_Detile.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% OMP_Rec_Detile performs:
% 1) The estimation of the coefficients \alpha at reduced resolution using an orthogonal matching pursuit (OMP) procedure for multispectral images;
% 2) The reconstruction of the patches at full resolution using the hypothesis of invariance among scales of the \alpha coefficients;
% 3) The detiling step to get the final image details at full resolution for the approach proposed in [Vicinanza15].
%
% INPUTS
%   Dl:                 Low spatial resolution dictionary (Low resolution PAN details) built as in [Vicinanza15];
%   Dh:                 High spatial resolution dictionary (PAN details) built as in [Vicinanza15];
%   ytilde_k:           Patches in column form of the details of the MS original image or the MS original image (depending on the flag "do_detail" in CSDetails);
%   H_PAN,L_PAN,C_PAN:  PAN (row and column) dimensions and number of MS spectral bands;
%   resize_fact:        Resize factor (ratio between PAN and MS images);
%   TS:                 Tiling (dimensions of the patches are TS x TS, e.g. 7 x 7);
%   ol:                 Overlap in pixels between contiguous tiles.
%   n_atoms:            max number of representation atoms
%
% OUTPUT
%   I_Fus_CS:           Reconstructed details (or fused image if do_detail flag is 0) using the CS approach in [Vicinanza15] for the final pansharpening product.
%
% REFERENCE
%   [Vicinanza15]       M.R. Vicinanza, et al. "A pansharpening method based on the sparse representation of injected details."
%                       IEEE Geoscience and Remote Sensing Letters 12.1 (2015): 180-184.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Fus_CS = OMP_Rec_Detile(Dl, Dh, ytilde_k, H_PAN, L_PAN, C_MS, resize_fact, ol, TS, n_atoms)

I_Fus_CS = zeros ([H_PAN L_PAN C_MS]);
countpx = zeros ([H_PAN L_PAN C_MS]);
nr = ceil ((H_PAN/resize_fact - ol) / (TS - ol));
nc = ceil ((L_PAN/resize_fact - ol) / (TS - ol));
shiftr_glob = 0; shiftc_glob = 0;

if mod(H_PAN/resize_fact-ol, TS-ol) ~= 0
    shiftr_glob = TS-ol - mod (H_PAN/resize_fact-ol, TS-ol);
end

if mod(L_PAN/resize_fact-ol, TS-ol) ~= 0
    shiftc_glob = TS-ol - mod (L_PAN/resize_fact-ol, TS-ol);
end

alpha_count = 0;
Latom = size (Dl, 2);
Dict_Size = size (ytilde_k, 2);
iatom = 0;
for irow=1:nr
    for icol=1:nc
        iatom = iatom+1;
        if irow == nr
            shiftr = shiftr_glob;
        else
            shiftr = 0;
        end
        if icol == nc
            shiftc = shiftc_glob;
        else
            shiftc = 0;
        end
        blockr = ((irow-1)*(TS-ol)*resize_fact+1 - shiftr*resize_fact) : ((irow*TS-(irow-1)*ol)*resize_fact - shiftr*resize_fact);
        blockc = ((icol-1)*(TS-ol)*resize_fact+1 - shiftc*resize_fact) : ((icol*TS-(icol-1)*ol)*resize_fact - shiftc*resize_fact);
        Lr = length (blockr); Lc = length (blockc);
        y_cur = ytilde_k(:,iatom);
        
        % Sparse coding with OMP for MS data
        [alpha,inds] = OMP(Dl, y_cur, C_MS, iatom, n_atoms);

        % Patch reconstruction and detiling
        for iband = 1:C_MS
            reconstr_patch = Dh((iband-1)*TS^2*resize_fact^2+1:iband*TS^2*resize_fact^2,inds) * alpha(:,iband);
            I_Fus_CS(blockr,blockc,iband) = I_Fus_CS(blockr,blockc,iband) + reshape (reconstr_patch, Lr, Lc);
            countpx(blockr,blockc,iband) = countpx(blockr,blockc,iband) +1;
        end
        
        if mod(iatom,100)==1
            fprintf ('OMP band by band and detile: atom %i of %i\n', iatom, Dict_Size);
        end
        alpha_count = alpha_count + sum( sum(alpha,2)~=0 );
    end
end

% Average overlapping patches
I_Fus_CS = I_Fus_CS ./ countpx;

fprintf ('Sparsity di alfa = %.2f: %.1f atoms on %i used for each patch on average\n', (Dict_Size*Latom-alpha_count)/Dict_Size/Latom*100, alpha_count/Dict_Size, Dict_Size)

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/TV/TV_pansharpen.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           This function minimizes 
%               J(x) = || y - M*x ||^2 + lambda*TV(x)
%           where
%               y = [yms^T, ypan^T]^T
%               x is the pansharpened ms image
%               M models the relationship between
%               y and x; see [Palsson07] for details
% 
% Interface:
%           x = TV_pansharpen(yms,ypan,alpha,lambda,c,maxiter,w)
%
% Inputs:
%           yms:            The observed MS image;
%           ypan:           The PAN image;
%           alpha:          convergence parameter 1, suggested value=0.75;
%               c:          convergence parameter 2, suggested value=8;
%         maxiter:          number of iterations;
%               w:          We assume the pan image to be a linear 
%                           combination of the pansharpened ms image,
%                           w contains the weights.                    
% Output:
%               x:          Pansharpened image.
% 
% Reference:
%           [Palsson14]     F. Palsson, J.R. Sveinsson, and M.O. Ulfarsson, A New Pansharpening Algorithm Based on Total Variation
%                           IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 1, pp. 318 - 322, 2014.
%           [Vivone20]      G. Vivone, M. Dalla Mura, A. Garzelli, R. Restaino, G. Scarpa, M.O. Ulfarsson, L. Alparone, and J. Chanussot, "A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods", 
%                           IEEE Geoscience and Remote Sensing Magazine, doi: 10.1109/MGRS.2020.3019315.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function x = TV_pansharpen(yms,ypan,alpha,lambda,c,maxiter,w)
    
    z=zeros([size(ypan) size(yms,3)*2]);
    x=zeros([size(ypan) size(yms,3)]);

    for k=1:maxiter
        b=computeb(yms,ypan,x,alpha,w);
        z=znext(z,x,b,alpha,lambda,c);
        x=xnext(z,b,alpha);
    end
end

function b=computeb(yms,ypan,xk,alpha,w)
    [Hxms, Hxpan]=computeH(xk,w);
    b=alpha*xk+adjointH(yms-Hxms,ypan-Hxpan,w);
end

function [yms, ypan]=computeH(x,w)

    ypan=zeros([size(x,1) size(x,2)]);

    for i=1:size(x,3)
        yms(:,:,i)=decimate(x(:,:,i));
        ypan=ypan+w(i)*x(:,:,i);
    end
end

function y=decimate(x)
    % y = imfilter(x,fspecial('Gaussian',9,sigma),'replicate');
    % y = imfilter(y,fspecial('average',4),'replicate');
    % y = y(1:4:end,1:4:end);
    %  h=0.25*[1 1 1 1];
    %  x=imfilter(x,h'*h,'symmetric','same');
    %  y=downsample(downsample(x,4,1)',4,1)';
    y=imresize(x,0.25,'bilinear');
    % y=MTF_downsample(x,'QB','none',4,1);
    % y=imresize(imresize(x,1/4,'bicubic'),4,'bicubic');
end
     
function x=adjointH(yms,ypan,w)
    for i=1:size(yms,3)
        x(:,:,i)=interpolate(yms(:,:,i))+w(i)*ypan;
    end
end

function y=interpolate(x)
    % y = upsample(upsample(x,4)',4)';
    y=imresize(x,4,'bilinear');
    % y = imfilter(y,fspecial('Gaussian',9,sigma),'replicate');
    % y = imfilter(y,fspecial('average',4),'replicate');
    %  y=imresize(x,4,'bicubic');
    % y=MTF_upsample(x,'IKONOS','none',4,1);
    % y=interp23tap(x,4);
end

function z1=znext(z0,x0,b,alpha,lambda,c)
    for i=1:size(x0,3)
        W(:,:,i)= 2* alpha/lambda * sqrt(Dx(x0(:,:,i)).^2+Dy(x0(:,:,i)).^2)+c;
        W(:,:,i+size(x0,3))=2 * alpha/lambda * sqrt(Dx(x0(:,:,i)).^2+Dy(x0(:,:,i)).^2)+c;
    end
    z1=(computeDb(b)+cIDDTz(z0,c))./W;
end

function DX = Dx(v) 
    DX=[diff(v,1,2) zeros(size(v,1),1)];
end

function DY = Dy(v) 
    DY=[diff(v); zeros(1,size(v,2))];
end

function Db=computeDb(b)

    for i=1:size(b,3)
        Db(:,:,i)=Dx(b(:,:,i));
    end
    for i=size(b,3)+1:2*size(b,3)
        Db(:,:,i)=Dy(b(:,:,i-size(b,3)));
    end
end

function ddtz=cIDDTz(z,c)

    for i=1:size(z,3)/2
        dtz(:,:,i)=DxT(z(:,:,i))+DyT(z(:,:,i+4));
    end

    ddtz=computeDb(dtz);
    cIddtz=c*z-ddtz;
end

function DXT=DxT(v)
    DXT=DyT(v')';
end

function DYT = DyT(v)

    u0 = -v(1,:);
    u1 = -diff(v);
    u2 = v(end-1,:);
    DYT = [u0; u1(1:(end-1),:); u2];
    return
end

function x1=xnext(z1,b,alpha)
    x1=(b-DTz(z1))./alpha;
end

function dtz=DTz(z)
    for i=1:size(z,3)/2
        dtz(:,:,i)=DxT(z(:,:,i))+DyT(z(:,:,i+4));
    end
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/LPfilter.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           LPfilter filters the panchromatic (PAN) image using  trous wavelet transform. 
% 
% Interface:
%           HRPanLP = LPfilter(HRPan,ratio)
%
% Inputs:
%           HRPan:          PAN image;
%           ratio:          Scale ratio between MS and PAN.
%
% Outputs:
%           HRPanLP:       Output filtered MS image.
% 
% References:
%           [Vivone15]          G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function HRPanLP = LPfilter(HRPan,ratio)

h=[1 4 6 4 1 ]/16;
g=[0 0 1 0 0 ]-h;
htilde=[ 1 4 6 4 1]/16;
gtilde=[ 0 0 1 0 0 ]+htilde;
h=sqrt(2)*h;
g=sqrt(2)*g;
htilde=sqrt(2)*htilde;
gtilde=sqrt(2)*gtilde;
WF={h,g,htilde,gtilde};

Levels = ceil(log2(ratio));

WT = ndwt2_working(HRPan,Levels,WF);

for ii = 2 : numel(WT.dec), WT.dec{ii} = zeros(size(WT.dec{ii})); end

HRPanLP = indwt2_working(WT,'c');

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/LPfilterGauss.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           LPfilterGauss filters the panchromatic (PAN) image using a Gaussin filter with gain at Nyquist frequency 0.3. 
% 
% Interface:
%           I_PAN_LR = LPfilterGauss(I_PAN,ratio)
%
% Inputs:
%           I_PAN:          PAN image;
%           ratio:          Scale ratio between MS and PAN.
%
% Outputs:
%           I_PAN_LR:       Output filtered MS image.
% 
% References:
%           [Aiazzi02]          B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on
%                               oversampled multiresolution analysis, IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 23002312, October
%                               2002.
%           [Aiazzi06]          B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery,
%                               Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006.
%           [Vivone14a]         G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral
%                               image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014.
%           [Vivone15]          G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_PAN_LR = LPfilterGauss(I_PAN,ratio)
    GNyq = 0.3;
    N = 41;
    fcut = 1/ratio;

    alpha = sqrt((N*(fcut/2))^2/(-2*log(GNyq)));
    H = fspecial('gaussian', N, alpha);
    Hd = H./max(H(:));
    h = fwind1(Hd,kaiser(N));
    I_PAN_LR = imfilter(I_PAN,real(h),'replicate');
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/LPfilterPlusDec.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           LPfilterPlusDec filters and decimates the image I_PAN using a Starck and Murtagh (S&M) filter. 
% 
% Interface:
%           I_PAN_LR = LPfilterPlusDec(I_PAN,ratio)
%
% Inputs:
%           I_PAN:          Image to be filtered and decimated;
%           ratio:          Scale ratio between MS and PAN. Pre-condition: Resize factors power of 2.
%
% Outputs:
%           I_PAN_LR:       Filtered and decimated image.
% 
% References:
%           [Starck07]      J.-L. Starck, J. Fadili, and F. Murtagh, The undecimated wavelet decomposition and its reconstruction, IEEE Transactions on Image
%                           Processing, vol. 16, no. 2, pp. 297309, February 2007.
%           [Vivone15]      G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                           IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function I_PAN_LR = LPfilterPlusDec(I_PAN,ratio)

h=[1 4 6 4 1 ]/16;
g=[0 0 1 0 0 ]-h;
htilde=[ 1 4 6 4 1]/16;
gtilde=[ 0 0 1 0 0 ]+htilde;
h=sqrt(2)*h;
g=sqrt(2)*g;
htilde=sqrt(2)*htilde;
gtilde=sqrt(2)*gtilde;
WF={h,g,htilde,gtilde};

Levels = ceil(log2(ratio));

WT = ndwt2_working(I_PAN,Levels,WF);

for ii = 2 : numel(WT.dec), WT.dec{ii} = zeros(size(WT.dec{ii})); end

I_PAN_LR = indwt2_working(WT,'c');

I_PAN_LR = imresize(I_PAN_LR,1/ratio,'nearest');

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/MTF.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           MTF filters the image I_MS using a Gaussin filter matched with the Modulation Transfer Function (MTF) of the MultiSpectral (MS) sensor. 
% 
% Interface:
%           I_Filtered = MTF(I_MS,sensor,ratio)
%
% Inputs:
%           I_MS:           MS image;
%           sensor:         String for type of sensor (e.g. 'WV2', 'IKONOS');
%           ratio:          Scale ratio between MS and PAN.
%
% Outputs:
%           I_Filtered:     Output filtered MS image.
% 
% References:
%           [Aiazzi02]          B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on
%                               oversampled multiresolution analysis, IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 23002312, October
%                               2002.
%           [Aiazzi06]          B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery,
%                               Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006.
%           [Vivone14a]         G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral
%                               image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014.
%           [Vivone15]          G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Filtered = MTF(I_MS,sensor,ratio)

h = genMTF(ratio, sensor, size(I_MS,3));

I_MS_LP = zeros(size(I_MS));
for ii = 1 : size(I_MS,3)
    I_MS_LP(:,:,ii) = imfilter(I_MS(:,:,ii),real(h(:,:,ii)),'replicate');
end

I_Filtered = double(I_MS_LP);

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/MTF_PAN.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           MTF filters the panchromatic (PAN) image using a Gaussin filter matched with the Modulation Transfer Function (MTF) of the PAN sensor. 
% 
% Interface:
%           I_Filtered = MTF_PAN(I_PAN,sensor,ratio)
%
% Inputs:
%           I_PAN:          PAN image;
%           sensor:         String for type of sensor (e.g. 'WV2', 'IKONOS');
%           ratio:          Scale ratio between MS and PAN.
%
% Outputs:
%           I_Filtered:     Output filtered PAN image.
% 
% References:
%           [Aiazzi02]          B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on
%                               oversampled multiresolution analysis, IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 23002312, October
%                               2002.
%           [Aiazzi06]          B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery,
%                               Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006.
%           [Vivone14a]         G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral
%                               image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014.
%           [Vivone15]         G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Filtered = MTF_PAN(I_PAN,sensor,ratio)

switch sensor
    case 'QB' 
        GNyq = 0.15; 
    case 'IKONOS'
        GNyq = 0.17;
    case {'GeoEye1','WV4'}
        GNyq = 0.16;
    case 'WV2'
        GNyq = 0.11;
    case 'WV3'
        GNyq = 0.14; 
    case 'none'
        GNyq = 0.15;
end

N = 41;
fcut = 1/ratio;
 
alpha = sqrt(((N-1)*(fcut/2))^2/(-2*log(GNyq)));
H = fspecial('gaussian', N, alpha);
Hd = H./max(H(:));
h = fwind1(Hd,kaiser(N));
I_PAN_LP = imfilter(I_PAN,real(h),'replicate');

I_Filtered= double(I_PAN_LP);

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/estimation_alpha.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Estimation coefficients linear regression model. 
% 
% Interface:
%           alpha = estimation_alpha(I_MS,I_PAN,type_estimation)
% 
% Inputs:
%           I_MS:               MS image upsampled at PAN scale;
%           I_PAN:              PAN image;
%           type_estimation:    Type of estimation (i.e. local or global).
%
% Outputs:
%           alpha:              Coefficients estimated by the linear regression model.
% 
% References:
%           [Vivone14]          G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral
%                               image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function alpha = estimation_alpha(I_MS,I_PAN,type_estimation)

if strcmp(type_estimation,'global')
    %%%%%%%% Global estimation
    IHc = reshape(I_PAN,[numel(I_PAN) 1]);
    ILRc = reshape(I_MS,[size(I_MS,1)*size(I_MS,2) size(I_MS,3)]);
    alpha = ILRc\IHc;
else
    %%%%%%%% Local estimation
    block_win = 32;
    alphas = zeros(size(I_MS,3),1);
    cont_bl = 0;
    for ii = 1 : block_win : size(I_MS,1)
        for jj = 1 : block_win : size(I_MS,2)
                imHRbl = I_PAN(ii : min(size(I_MS,1),ii + block_win - 1), jj : min(size(I_MS,2),jj + block_win - 1));
                imageLRbl = I_MS(ii : min(size(I_MS,1),ii + block_win - 1), jj : min(size(I_MS,2),jj + block_win - 1),:);
                imageHRc = reshape(imHRbl,[numel(imHRbl) 1]);
                ILRc = reshape(imageLRbl,[size(imageLRbl,1).*size(imageLRbl,2) size(imageLRbl,3)]);
                alphah = ILRc\imageHRc;
                alphas = alphas + alphah;
                cont_bl = cont_bl + 1;
        end
    end
    alpha = alphas/cont_bl;
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/genMTF.m
================================================
% Description: 
%           Generate a bank of filters shaped on the MTF of the sensor. Each filter
%           corresponds to a band acquired by the sensor. 
% 
% Interface:
%           h = genMTF(ratio, sensor, nbands)
%
% Inputs:
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value.
%           sensor:             String for type of sensor (e.g. 'WV2','IKONOS');
%           nbands:             Number of spectral bands.
%
% Outputs:
%           h:                  Gaussian filter mimicking the MTF of the MS sensor
% 
% References:
%           [Aiazzi02]          B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on
%                               oversampled multiresolution analysis,? IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 2300?2312, October
%                               2002.
%           [Aiazzi06]          B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery,?
%                               Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591?596, May 2006.
%           [Vivone14a]         G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral
%                               image pansharpening,? IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930?934, May 2014.
%           [Vivone15]          G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms?, 
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 2565?2586, May 2015.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function h = genMTF(ratio, sensor, nbands)

switch sensor
    case 'QB'
        GNyq = [0.34 0.32 0.30 0.22]; % Band Order: B,G,R,NIR
    case 'IKONOS'
        GNyq = [0.26,0.28,0.29,0.28]; % Band Order: B,G,R,NIR
    case {'GeoEye1','WV4'}
        GNyq = [0.23,0.23,0.23,0.23]; % Band Order: B,G,R,NIR
    case 'WV2'
        GNyq = [0.35 .* ones(1,7), 0.27];
    case 'WV3'
        GNyq = [0.325 0.355 0.360 0.350 0.365 0.360 0.335 0.315];
    otherwise
        GNyq = 0.3 .* ones(1, nbands);
end


%%% MTF
N = 41;
nBands = length(GNyq);
h = zeros(N, N, nBands);
fcut = 1/ratio;

for ii = 1 : nBands
    alpha = sqrt(((N-1)*(fcut/2))^2/(-2*log(GNyq(ii))));
    H = fspecial('gaussian', N, alpha);
    Hd = H./max(H(:));
    h(:,:,ii) = fwind1(Hd,kaiser(N));
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/gen_LP_image.m
================================================
% Description:
%           gen_LP_image generates the Low Resolution version of the PAN image required for the calculation of the
%           segmentation-based version of the Gram-Schmidt algorithm, based on the segmentation S.
%
% Interface:
%           I_LR_input = gen_LP_image(Local_algorithm,I_MS,I_PAN,I_MS_LR,ratio,sensor,S)
%
% Inputs:
%           PS_algorithm: Employed segmentation-based algorithm
%                            ('GSA','GS2GLP')
%           I_MS:            MS image upsampled at PAN scale
%           I_PAN:           PAN image
%           I_MS_LR:         MS image
%           ratio:           Scale ratio between MS and PAN. Pre-condition: Integer value.
%           sensor:          String for type of sensor (e.g. 'WV2','IKONOS');
%
% Outputs:
%           I_LR_input:  Low Resolution  version of the PAN image
%
% References:
%
%           [Restaino17] R. Restaino, M. Dalla Mura, G. Vivone, J. Chanussot, Context-Adaptive Pansharpening Based on Image Segmentation,
%                        IEEE Transactions on Geoscience and Remote Sensing, vol. 55, no. 2, pp. 753766, February 2017.
%           [Vivone15]   G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms,
%                        IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_LR_input = gen_LP_image(PS_algorithm,I_MS,I_PAN,I_MS_LR,ratio,sensor)

switch PS_algorithm
        
    case 'GSA'
        %%%%%%%%% Generation of LR PAN image
        PAN_LP = LPfilterGauss(I_PAN,ratio);
        %%%%%%%%%% Estimation of weights
        PAN_LP2 = imresize(PAN_LP,1/ratio,'nearest');
        alpha= estimation_alpha(cat(3,I_MS_LR,ones(size(I_MS_LR,1),size(I_MS_LR,2))),PAN_LP2,'global');
        [Height,Width,Bands] = size(I_MS);
        I_MS_col = reshape(double(I_MS), Height*Width, Bands);
        alpha = repmat(alpha', [size(I_MS_col,1),1]);
        I_LR_col = sum([I_MS_col, ones(size(I_MS_col,1),1)] .* alpha, 2);
        I_LR_input = reshape(I_LR_col, Height, Width);
        
    case 'GS2GLP'
        h = genMTF(ratio, sensor, size(I_MS,3));
        for ii=1:size(h, 3)
            PAN_LP(:,:,ii) = imfilter(I_PAN,real(h(:,:,ii)),'replicate');
        end
        PAN_LP2 = imresize(PAN_LP,1/ratio,'nearest');
        I_LR_input = interp23tap(PAN_LP2,ratio);
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/indexes_evaluation.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Reduced resolution quality indexes. 
% 
% Interface:
%           [Q_index, SAM_index, ERGAS_index, sCC, Q2n_index] = indexes_evaluation(I_F,I_GT,ratio,L,Q_blocks_size,flag_cut_bounds,dim_cut,th_values)
%
% Inputs:
%           I_F:                Fused Image;
%           I_GT:               Ground-Truth image;
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value;
%           L:                  Image radiometric resolution; 
%           Q_blocks_size:      Block size of the Q-index locally applied;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%           th_values:          Flag. If th_values == 1, apply an hard threshold to the dynamic range.
%
% Outputs:
%           Q_index:            Q index;
%           SAM_index:          Spectral Angle Mapper (SAM) index;
%           ERGAS_index:        Erreur Relative Globale Adimensionnelle de Synthse (ERGAS) index;
%           sCC:                spatial Correlation Coefficient between fused and ground-truth images;
%           Q2n_index:          Q2n index.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [Q_index, SAM_index, ERGAS_index, sCC, Q2n_index] = indexes_evaluation(I_F,I_GT,ratio,L,Q_blocks_size,flag_cut_bounds,dim_cut,th_values)

if flag_cut_bounds
    I_GT = I_GT(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:);
    I_F = I_F(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:);
end

if th_values
    I_F(I_F > 2^L) = 2^L;
    I_F(I_F < 0) = 0;
end

cd Quality_Indices

Q2n_index = q2n(I_GT,I_F,Q_blocks_size,Q_blocks_size);
Q_index = Q(I_GT,I_F,2^L);
SAM_index = SAM(I_GT,I_F);
ERGAS_index = ERGAS(I_GT,I_F,ratio);
sCC = SCC(I_F,I_GT);

cd ..

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/indexes_evaluation_FS.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Full resolution quality indexes. 
% 
% Interface:
%           [D_lambda,D_S,QNR_index,SAM_index,sCC] = indexes_evaluation_FS(I_F,I_MS_LR,I_PAN,L,th_values,I_MS,sensor,tag,ratio)
%
% Inputs:
%           I_F:                Fused image;
%           I_MS_LR:            MS image;
%           I_PAN:              Panchromatic image;
%           L:                  Image radiometric resolution; 
%           th_values:          Flag. If th_values == 1, apply an hard threshold to the dynamic range;
%           I_MS:               MS image upsampled to the PAN size;
%           sensor:             String for type of sensor (e.g. 'WV2','IKONOS');
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value;
%           flagQNR:            if flagQNR == 1, the software uses the QNR otherwise the HQNR is used.
%
% Outputs:
%           D_lambda:           D_lambda index;
%           D_S:                D_S index;
%           QNR_index:          QNR index;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [D_lambda,D_S,QNR_index] = indexes_evaluation_FS(I_F,I_MS_LR,I_PAN,L,th_values,I_MS,sensor,ratio,flagQNR)

if th_values
    I_F(I_F > 2^L) = 2^L;
    I_F(I_F < 0) = 0;
end

cd Quality_Indices

if flagQNR == 1
    [QNR_index,D_lambda,D_S]= QNR(I_F,I_MS,I_MS_LR,I_PAN,ratio);
else
    [QNR_index,D_lambda,D_S] = HQNR(I_F,I_MS_LR,I_MS,I_PAN,32,sensor,ratio);
end

cd ..

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/indwt2_working.m
================================================
function X = indwt2_working(W,varargin)
%INDWT2 Inverse nondecimated 2-D wavelet transform.
%   INDWT2 will be removed in a future release of MATLAB. Use the
%   following function instead:
%       <a href="matlab:help iswt2">iswt2</a>

% Error in R2015a
% error(message('Wavelet:warnobsolete:ErrorReplaceINDWT2'));
nbIN = nargin-1;
idxCFS  = -1;
cfsFLAG = false;
if nbIN>0
    nbCELL = numel(W.dec);
    type = varargin{1};
    if ~ischar(type)
        error(message('Wavelet:FunctionArgVal:Invalid_ArgTyp'))
    end
    type = upper(type);
    cfsFLAG = isequal(upper(type(1)),'C');
    if cfsFLAG , type = type(2:end); end
    switch type
        case {'D','H'} ,           idxCFS = 0;
        case {'AA','LL','A','L'} , idxCFS = 1;
        case {'AD','LH'} ,         idxCFS = 2;
        case {'DA','HL'} ,         idxCFS = 3;
        case {'DD','HH'} ,         idxCFS = 4;
    end
    if nbIN>1 , levREC = varargin{2}; else levREC = W.level; end
        
    if idxCFS>1
        idxCFS = idxCFS + 3*(W.level-levREC);
        if ~cfsFLAG
            for j=1:nbCELL
                if ~isequal(j,idxCFS);
                    W.dec{j} = zeros(size(W.dec{j}));
                end
            end
        else
            X = W.dec{idxCFS};   % Coefficients
            return
        end
        
    elseif idxCFS==1   % Approximations (AA or LL)
        if cfsFLAG && levREC==W.level 
            X = W.dec{1}; 
            return; % Coefficients of Approximation at level MAX
        end
        idxMinToKill = 1 + 3*(W.level-levREC)+1;
        for j=idxMinToKill:nbCELL
            W.dec{j} = zeros(size(W.dec{j}));
        end
                
    elseif idxCFS==0
        idxMaxToKill = 1 + 3*(W.level-levREC);
        for j=1:idxMaxToKill
            W.dec{j} = zeros(size(W.dec{j}));
        end
        
    else
        
    end
end

% Initialization.
Lo  = W.filters.LoR;
Hi  = W.filters.HiR;
dwtEXTM = W.mode;
perFLAG = isequal(dwtEXTM,'per');
cfs   = W.dec;
sizes = W.sizes;
level = W.level;

maxloop = level;
if idxCFS==1 && cfsFLAG , maxloop = (level-levREC); end

idxBeg = 1;
for k=1:maxloop
    idxEnd = idxBeg+3;
    dec = reshape(cfs(idxBeg:idxEnd),2,2);
    sizerec = sizes(k+1,:);
    X   = recFUNC(dec,sizerec,Lo,Hi,perFLAG);
    cfs(1:idxEnd-1) = {[]};
    cfs{idxEnd} = X;
    idxBeg = idxEnd;
end

if abs(idxCFS)==1 && ~cfsFLAG && length(W.sizeINI)==3
    % X = uint8(X);
end
%-----------------------------------------------------------------------%
function X = recFUNC(dec,sINI,Lo,Hi,perFLAG)

% Reconstruction.
perm = [2,1,3];
W = cell(1,2);
for i = 1:2
    W{i} = wrec1D(dec{i,1},Lo{2},perm,perFLAG) + ...
        wrec1D(dec{i,2},Hi{2},perm,perFLAG);
end
X = (wrec1D(W{1},Lo{1},[],perFLAG) + wrec1D(W{2},Hi{1},[],perFLAG))/4;

% Extraction of central part
sREC = size(X);
F = floor((sREC-sINI)/2);
C = ceil((sREC-sINI)/2);
X = X(1+F(1):end-C(1),1+F(2):end-C(2),:);
%-----------------------------------------------------------------------%
function X = wrec1D(X,F,perm,perFLAG)

if ~isempty(perm) , X = permute(X,perm); end
if perFLAG
    nb = length(F)-1;
    X = [X X(:,1:nb,:)];
end
X = convn(X,F);
if ~isempty(perm) , X = permute(X,perm); end
%-----------------------------------------------------------------------%


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/interp23tap.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           interp23tap interpolates the image I_Interpolated using a polynomial with 23 coefficients interpolator. 
% 
% Interface:
%           I_Interpolated = interp23tap(I_Interpolated,ratio)
%
% Inputs:
%           I_Interpolated: Image to interpolate;
%           ratio:          Scale ratio between MS and PAN. Pre-condition: Resize factors power of 2.
%
% Outputs:
%           I_Interpolated: Interpolated image.
% 
% References:
%           [Aiazzi02]      B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on
%                           oversampled multiresolution analysis,? IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 2300?2312, October
%                           2002.
%           [Aiazzi13]      B. Aiazzi, S. Baronti, M. Selva, and L. Alparone, Bi-cubic interpolation for shift-free pan-sharpening,? ISPRS Journal of Photogrammetry
%                           and Remote Sensing, vol. 86, no. 6, pp. 65?76, December 2013.
%           [Vivone15]      G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms?, 
%                           IEEE Transaction on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 2565?2586, May 2015.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function I_Interpolated = interp23tap(I_Interpolated,ratio)

if (2^round(log2(double(ratio))) ~= ratio)
    disp('Error: Only resize factors power of 2');
    return;
end 

[r,c,b] = size(I_Interpolated);

CDF23 = 2.*[0.5 0.305334091185 0 -0.072698593239 0 0.021809577942 0 -0.005192756653 0 0.000807762146 0 -0.000060081482];
CDF23 = [fliplr(CDF23(2:end)) CDF23];
BaseCoeff = CDF23;
first = 1;

for z = 1 : ratio/2

    I1LRU = zeros((2^z) * r, (2^z) * c, b);
    
    if first
        I1LRU(2:2:end,2:2:end,:) = I_Interpolated;
        first = 0;
    else
        I1LRU(1:2:end,1:2:end,:) = I_Interpolated;
    end

    for ii = 1 : b
        t = I1LRU(:,:,ii); 
        t = imfilter(t',BaseCoeff,'circular'); 
        I1LRU(:,:,ii) = imfilter(t',BaseCoeff,'circular'); 
    end
    
    I_Interpolated = I1LRU;
    
end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/k_means_clustering.m
================================================
% I_MS:         Image to segment
% n_segm:       Number of segments
% Output:
% S:            Segmentation map.
function S = k_means_clustering(I_MS, n_segm)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+
%%%  k-means Segmentation of MS image
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%+

F1 = zeros(size(I_MS,1)*size(I_MS,2),size(I_MS,3));

for ibands = 1 :size(I_MS,3)
    a = I_MS(:,:,ibands);
    F1(:,ibands) = a(:)/max(a(:));
end
IDX = kmeans(F1,n_segm);
S = reshape(IDX,[size(I_MS,1) size(I_MS,2)]);

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/matrix2latex.m
================================================
function matrix2latex(matrix, filename, varargin)

% function: matrix2latex(...)
% Author:   M. Koehler
% Contact:  koehler@in.tum.de
% Version:  1.1
% Date:     May 09, 2004

% This software is published under the GNU GPL, by the free software
% foundation. For further reading see: http://www.gnu.org/licenses/licenses.html#GPL

% Usage:
% matrix2late(matrix, filename, varargs)
% where
%   - matrix is a 2 dimensional numerical or cell array
%   - filename is a valid filename, in which the resulting latex code will
%   be stored
%   - varargs is one ore more of the following (denominator, value) combinations
%      + 'rowLabels', array -> Can be used to label the rows of the
%      resulting latex table
%      + 'columnLabels', array -> Can be used to label the columns of the
%      resulting latex table
%      + 'alignment', 'value' -> Can be used to specify the alginment of
%      the table within the latex document. Valid arguments are: 'l', 'c',
%      and 'r' for left, center, and right, respectively
%      + 'format', 'value' -> Can be used to format the input data. 'value'
%      has to be a valid format string, similar to the ones used in
%      fprintf('format', value);
%      + 'size', 'value' -> One of latex' recognized font-sizes, e.g. tiny,
%      HUGE, Large, large, LARGE, etc.
%
% Example input:
%   matrix = [1.5 1.764; 3.523 0.2];
%   rowLabels = {'row 1', 'row 2'};
%   columnLabels = {'col 1', 'col 2'};
%   matrix2latex(matrix, 'out.tex', 'rowLabels', rowLabels, 'columnLabels', columnLabels, 'alignment', 'c', 'format', '%-6.2f', 'size', 'tiny');
%
% The resulting latex file can be included into any latex document by:
% /input{out.tex}
%
% Enjoy life!!!

    rowLabels = [];
    colLabels = [];
    alignment = 'l';
    format = [];
    textsize = [];
    if (rem(nargin,2) == 1 || nargin < 2)
        error('matrix2latex: ', 'Incorrect number of arguments to %s.', mfilename);
    end

    okargs = {'rowlabels','columnlabels', 'alignment', 'format', 'size'};
    for j=1:2:(nargin-2)
        pname = varargin{j};
        pval = varargin{j+1};
        k = strmatch(lower(pname), okargs);
        if isempty(k)
            error('matrix2latex: ', 'Unknown parameter name: %s.', pname);
        elseif length(k)>1
            error('matrix2latex: ', 'Ambiguous parameter name: %s.', pname);
        else
            switch(k)
                case 1  % rowlabels
                    rowLabels = pval;
                    if isnumeric(rowLabels)
                        rowLabels = cellstr(num2str(rowLabels(:)));
                    end
                case 2  % column labels
                    colLabels = pval;
                    if isnumeric(colLabels)
                        colLabels = cellstr(num2str(colLabels(:)));
                    end
                case 3  % alignment
                    alignment = lower(pval);
                    if alignment == 'right'
                        alignment = 'r';
                    end
                    if alignment == 'left'
                        alignment = 'l';
                    end
                    if alignment == 'center'
                        alignment = 'c';
                    end
                    if alignment ~= 'l' && alignment ~= 'c' && alignment ~= 'r'
                        alignment = 'l';
                        warning('matrix2latex: ', 'Unkown alignment. (Set it to \''left\''.)');
                    end
                case 4  % format
                    format = lower(pval);
                case 5  % format
                    textsize = pval;
            end
        end
    end

    fid = fopen(filename, 'a');
    
    width = size(matrix, 2);
    height = size(matrix, 1);

    if isnumeric(matrix)
        matrix = num2cell(matrix);
        for h=1:height
            for w=1:width
                if(~isempty(format))
                    matrix{h, w} = num2str(matrix{h, w}, format);
                else
                    matrix{h, w} = num2str(matrix{h, w});
                end
            end
        end
    end
    
    if(~isempty(textsize))
        fprintf(fid, '\\begin{%s}', textsize);
    end

    fprintf(fid, '\\begin{tabular}{|');

    if(~isempty(rowLabels))
        fprintf(fid, 'l|');
    end
    for i=1:width
        fprintf(fid, '%c|', alignment);
    end
    fprintf(fid, '}\r\n');
    
    fprintf(fid, '\\hline\r\n');
    
    if(~isempty(colLabels))
        if(~isempty(rowLabels))
            fprintf(fid, '&');
        end
        for w=1:width-1
            fprintf(fid, '\\textbf{%s}&', colLabels{w});
        end
        fprintf(fid, '\\textbf{%s}\\\\\\hline\r\n', colLabels{width});
    end
    
    for h=1:height
        if(~isempty(rowLabels))
            fprintf(fid, '\\textbf{%s}&', rowLabels{h});
        end
        for w=1:width-1
            fprintf(fid, '%s&', matrix{h, w});
        end
        fprintf(fid, '%s\\\\\\hline\r\n', matrix{h, width});
    end

    fprintf(fid, '\\end{tabular}\r\n');
    
    if(~isempty(textsize))
        fprintf(fid, '\\end{%s}', textsize);
    end

    fclose(fid);

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/ndwt2_working.m
================================================
function varargout = ndwt2_working(X,level,varargin)
%NDWT2 Nondecimated 2-D wavelet transform.
%   NDWT2 will be removed in a future release of MATLAB. Use the
%   following function instead:
%       <a href="matlab:help swt2">swt2</a>

% Error in R2015a
% error(message('Wavelet:warnobsolete:ErrorReplaceNDWT2'));
nbIn = length(varargin);
if nbIn < 1
    error(message('MATLAB:narginchk:notEnoughInputs'));
elseif nbIn > 5
    error(message('MATLAB:narginchk:tooManyInputs'));
end

LoD = cell(1,2); HiD = cell(1,2); LoR = cell(1,2); HiR = cell(1,2);
if ischar(varargin{1})
    [LD,HD,LR,HR] = wfilters(varargin{1}); 
    for k = 1:2
        LoD{k} = LD; HiD{k} = HD; LoR{k} = LR; HiR{k} = HR;
    end

elseif isstruct(varargin{1})
    if isfield(varargin{1},'w1') && isfield(varargin{1},'w2')
        for k = 1:2
            [LoD{k},HiD{k},LoR{k},HiR{k}] = ...
                wfilters(varargin{1}.(['w' int2str(k)]));
        end
    elseif isfield(varargin{1},'LoD') && isfield(varargin{1},'HiD') && ...
           isfield(varargin{1},'LoR') && isfield(varargin{1},'HiR')
        for k = 1:2
            LoD{k} = varargin{1}.LoD{k}; HiD{k} = varargin{1}.HiD{k};
            LoR{k} = varargin{1}.LoR{k}; HiR{k} = varargin{1}.HiR{k};
        end
    else
        error(message('Wavelet:FunctionArgVal:Invalid_ArgVal'));
    end
        
elseif iscell(varargin{1})
    if ischar(varargin{1}{1})
        for k = 1:2
            [LoD{k},HiD{k},LoR{k},HiR{k}] = wfilters(varargin{1}{k});
        end
    else
        LoD(1:end) = varargin{1}(1); HiD(1:end) = varargin{1}(2);
        LoR(1:end) = varargin{1}(3); HiR(1:end) = varargin{1}(4);
    end
else
    
end
nextArg = 2;

dwtEXTM = 'sym';
while nbIn>=nextArg
    argName = varargin{nextArg};
    argVal  = varargin{nextArg+1};
    nextArg = nextArg + 2;
    switch argName
        case 'mode' , dwtEXTM = argVal;
    end
end

% Initialization.
if isempty(X) , varargout{1} = []; return; end
sX = size(X);
X = double(X);
sizes = zeros(level+1,length(sX));
sizes(level+1,:) = sX;

for k=1:level
    dec = decFUNC(X,LoD,HiD,dwtEXTM);
    X = dec{1,1,1};
    sizes(level+1-k,:) = size(X);
    dec = reshape(dec,4,1,1);
    if k>1
        cfs(1) = [];
        cfs = cat(1,dec,cfs);
    else
        cfs = dec;
    end
end

WT.sizeINI = sX;
WT.level = level;
WT.filters.LoD = LoD;
WT.filters.HiD = HiD;
WT.filters.LoR = LoR;
WT.filters.HiR = HiR;
WT.mode = dwtEXTM;
WT.dec = cfs;
WT.sizes = sizes;
varargout{1} = WT;

%-------------------------------------------------------------------------%
function dec = decFUNC(X,LoD,HiD,dwtEXTM)

dec = cell(2,2);
permVect = [];
[a_Lo,d_Hi] = wdec1D(X,LoD{1},HiD{1},permVect,dwtEXTM);
permVect = [2,1,3];
[dec{1,1},dec{1,2}] = wdec1D(a_Lo,LoD{2},HiD{2},permVect,dwtEXTM);
[dec{2,1},dec{2,2}] = wdec1D(d_Hi,LoD{2},HiD{2},permVect,dwtEXTM);
%-------------------------------------------------------------------------%
function [L,H] = wdec1D(X,Lo,Hi,perm,dwtEXTM)

if ~isempty(perm) , X = permute(X,perm); end
sX = size(X);
if length(sX)<3 , sX(3) = 1; end
lf = length(Lo);
lx = sX(2);
lc = lx+lf-1;
switch dwtEXTM
    case 'zpd'             % Zero extension.
        
    case {'sym','symh'}    % Symmetric extension (half-point).
        X = [X(:,lf-1:-1:1,:) , X , X(:,end:-1:end-lf+1,:)];
        
    case 'sp0'             % Smooth extension of order 0.
        X = [X(:,ones(1,lf-1),:) , X , X(:,lx*ones(1,lf-1),:)];
        
    case {'sp1','spd'}     % Smooth extension of order 1.
        Z = zeros(sX(1),sX(2)+ 2*lf-2,sX(3));
        Z(:,lf:lf+lx-1,:) = X;
        last = sX(2)+lf-1;
        for k = 1:lf-1
            Z(:,last+k,:) = 2*Z(:,last+k-1,:)- Z(:,last+k-2,:);
            Z(:,lf-k,:)   = 2*Z(:,lf-k+1,:)- Z(:,lf-k+2,:);
        end
        X = Z; clear Z;
        
    case 'symw'            % Symmetric extension (whole-point).
        X = [X(:,lf:-1:2,:) , X , X(:,end-1:-1:end-lf,:)];
        
    case {'asym','asymh'}  % Antisymmetric extension (half-point).
        X = [-X(:,lf-1:-1:1,:) , X , -X(:,end:-1:end-lf+1,:)];        
        
    case 'asymw'           % Antisymmetric extension (whole-point).
        X = [-X(:,lf:-1:2,:) , X , -X(:,end-1:-1:end-lf,:)];

    case 'rndu'            % Uniformly randomized extension.
        X = [randn(sX(1),lf-1,sX(3)) , X , randn(sX(1),lf-1,sX(3))];        
                        
    case 'rndn'            % Normally randomized extension.
        X = [randn(sX(1),lf-1,sX(3)) , X , randn(sX(1),lf-1,sX(3))];        
                
    case 'ppd'             % Periodized extension (1).
        X = [X(:,end-lf+2:end,:) , X , X(:,1:lf-1,:)];
        
    case 'per'             % Periodized extension (2).
        if rem(lx,2) , X = [X , X(:,end,:)]; end
        X = [X(:,end-lf+2:end,:) , X , X(:,1:lf-1,:)];        
end
L = convn(X,Lo);
H = convn(X,Hi);
clear X
switch dwtEXTM
    case 'zpd'
    otherwise
        lenL = size(L,2);
        first = lf; last = lenL-lf+1;
        L = L(:,first:last,:); H = H(:,first:last,:);
        lenL = size(L,2);
        first = 1+floor((lenL-lc)/2);  last = first+lc-1;
        L = L(:,first:last,:); H = H(:,first:last,:);
end
if isequal(dwtEXTM,'per')
    first = 1; last = lx;
    L = L(:,first:last,:);
    H = H(:,first:last,:);
end

if ~isempty(perm)
    L = permute(L,perm);
    H = permute(H,perm);
end
%-------------------------------------------------------------------------%


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/printAllImagesImWriteFR.m
================================================
MatrixPrint(:,:,:,1) = I_MS;
MatrixPrint(:,:,:,2) = I_BT_H;
MatrixPrint(:,:,:,3) = I_BDSD;
MatrixPrint(:,:,:,4) = I_C_BDSD;
MatrixPrint(:,:,:,5) = I_BDSD_PC;
MatrixPrint(:,:,:,6) = I_GS;
MatrixPrint(:,:,:,7) = I_GSA;
MatrixPrint(:,:,:,8) = I_C_GSA;
MatrixPrint(:,:,:,9) = I_PRACS;
MatrixPrint(:,:,:,10) = I_AWLP;
MatrixPrint(:,:,:,11) = I_MTF_GLP;
MatrixPrint(:,:,:,12) = I_MTF_GLP_FS;
MatrixPrint(:,:,:,13) = I_MTF_GLP_HPM;
MatrixPrint(:,:,:,14) = I_MTF_GLP_HPM_H;
MatrixPrint(:,:,:,15) = I_MTF_GLP_HPM_R;
MatrixPrint(:,:,:,16) = I_MTF_GLP_CBD;
MatrixPrint(:,:,:,17) = I_C_MTF_GLP_CBD;
MatrixPrint(:,:,:,18) = I_MF;
MatrixPrint(:,:,:,19) = I_FE_HPM;
MatrixPrint(:,:,:,20) = I_SR_D;
MatrixPrint(:,:,:,21) = I_PWMBF;
MatrixPrint(:,:,:,22) = I_TV;
MatrixPrint(:,:,:,23) = I_RR;
MatrixPrint(:,:,:,24) = I_PNN;
MatrixPrint(:,:,:,25) = I_PNN_IDX;
MatrixPrint(:,:,:,26) = I_A_PNN;
MatrixPrint(:,:,:,27) = I_A_PNN_FT;

if size(I_MS,3) == 4
    vect_index_RGB = [3,2,1];
else
    vect_index_RGB = [5,3,2];
end

titleImages = algorithms;

addpath([pwd,'\Tools']);

figure, MP = showImagesAll(MatrixPrint,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,0);

cd 'Outputs'
for ii = 1 : size(MP,4)
    imwrite(MP(:,:,:,ii),sprintf('%s.png',algorithms{ii}));
end
imwrite(showPan(I_PAN,0,1,flag_cut_bounds,dim_cut),'PAN.png')
cd ..

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/printAllImagesImWriteRR.m
================================================
MatrixPrint(:,:,:,1) = I_GT;
MatrixPrint(:,:,:,2) = I_MS;
MatrixPrint(:,:,:,3) = I_BT_H;
MatrixPrint(:,:,:,4) = I_BDSD;
MatrixPrint(:,:,:,5) = I_C_BDSD;
MatrixPrint(:,:,:,6) = I_BDSD_PC;
MatrixPrint(:,:,:,7) = I_GS;
MatrixPrint(:,:,:,8) = I_GSA;
MatrixPrint(:,:,:,9) = I_C_GSA;
MatrixPrint(:,:,:,10) = I_PRACS;
MatrixPrint(:,:,:,11) = I_AWLP;
MatrixPrint(:,:,:,12) = I_MTF_GLP;
MatrixPrint(:,:,:,13) = I_MTF_GLP_FS;
MatrixPrint(:,:,:,14) = I_MTF_GLP_HPM;
MatrixPrint(:,:,:,15) = I_MTF_GLP_HPM_H;
MatrixPrint(:,:,:,16) = I_MTF_GLP_HPM_R;
MatrixPrint(:,:,:,17) = I_MTF_GLP_CBD;
MatrixPrint(:,:,:,18) = I_C_MTF_GLP_CBD;
MatrixPrint(:,:,:,19) = I_MF;
MatrixPrint(:,:,:,20) = I_FE_HPM;
MatrixPrint(:,:,:,21) = I_SR_D;
MatrixPrint(:,:,:,22) = I_PWMBF;
MatrixPrint(:,:,:,23) = I_TV;
MatrixPrint(:,:,:,24) = I_RR;
MatrixPrint(:,:,:,25) = I_PNN;
MatrixPrint(:,:,:,26) = I_PNN_IDX;
MatrixPrint(:,:,:,27) = I_A_PNN;
MatrixPrint(:,:,:,28) = I_A_PNN_FT;

if size(I_MS,3) == 4
    vect_index_RGB = [3,2,1];
else
    vect_index_RGB = [5,3,2];
end

titleImages = algorithms;

addpath([pwd,'\Tools']);

figure, MP = showImagesAll(MatrixPrint,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,0);

cd 'Outputs'
for ii = 1 : size(MP,4)
    imwrite(MP(:,:,:,ii),sprintf('%s.png',algorithms{ii}));
end
imwrite(showPan(I_PAN,0,1,flag_cut_bounds,dim_cut),'PAN.png')
cd ..

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/printImage.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Print EPS image.
% 
% Interface:
%           printImage(I_MS,title)
%
% Inputs:
%           I_MS:               Image to print;
%           title:              Filename.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function printImage(I_MS,title)

figure,imshow(I_MS,'Border','tight','InitialMagnification',100);
print(sprintf(title,'.eps'),'-depsc2','-r300');
% print(sprintf(title,'.png'),'-dpng','-r400');

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/rectangleonimage.m
================================================
function ent=rectangleonimage(pic,sw,n, ch, c, scale, type)
% sw: the location of the up-left, down-right
% n: the width of the line
% ch: ch = 1 (gray image); ch = 3 (color image) 
% c: the color of the line: c=1(red); c=2(green); c=3(blue);c=others
% scale: the salce of zooming in for SR
% type =1 (put to down-left); type =2 (put to down-right); 
% type =3 (put to up-right); type =4 (put to up-left); 
% Liang-Jian Deng (UESTC)
% improved time: 2017-3-11
%==============================%

if nargin< 5
    scale = [];
end
x0=sw(1);x1=sw(2);y0=sw(3);y1=sw(4);
[p q ch]=size(pic);

max_val = 1;

%ch=1:gray image; ch=3: color image
if ch==1
    if c==1
        pic(x0:x1,y0:y0+n)=max_val;
        pic(x0:x1,y1-n:y1)=max_val;
        pic(x0:x0+n,y0:y1)=max_val;
        pic(x1-n:x1,y0:y1)=max_val;
    elseif c==2
        pic(x0:x1,y0:y0+n)=0;
        pic(x0:x1,y1-n:y1)=0;
        pic(x0:x0+n,y0:y1)=0;
        pic(x1-n:x1,y0:y1)=0;
    else
        pic(x0:x1,y0:y0+n)=max_val-pic(x0:x1,y0:y0+n); %ȡ
        pic(x0:x1,y1-n:y1)=max_val- pic(x0:x1,y1-n:y1);
        pic(x0:x0+n,y0:y1)=max_val-pic(x0:x0+n,y0:y1);
        pic(x1-n:x1,y0:y1)=max_val-pic(x1-n:x1,y0:y1);
    end
end

if ch==3
    if c==1
        pic(x0:x1,y0:y0+n,1)=max_val; pic(x0:x1,y0:y0+n,2)=0; pic(x0:x1,y0:y0+n,3)=0;
        pic(x0:x1,y1-n:y1,1)=max_val;   pic(x0:x1,y1-n:y1,2)=0;   pic(x0:x1,y1-n:y1,3)=0;
        pic(x0:x0+n,y0:y1,1)=max_val; pic(x0:x0+n,y0:y1,2)=0; pic(x0:x0+n,y0:y1,3)=0;
        pic(x1-n:x1,y0:y1,1)=max_val;   pic(x1-n:x1,y0:y1,2)=0;   pic(x1-n:x1,y0:y1,3)=0;
        
    elseif c==2
        pic(x0:x1,y0:y0+n,1)=0;pic(x0:x1,y0:y0+n,2)=max_val;pic(x0:x1,y0:y0+n,3)=0;
        pic(x0:x1,y1-n:y1,1)=0;pic(x0:x1,y1-n:y1,2)=max_val;pic(x0:x1,y1-n:y1,3)=0;
        pic(x0:x0+n,y0:y1,1)=0;pic(x0:x0+n,y0:y1,2)=max_val;pic(x0:x0+n,y0:y1,3)=0;
        pic(x1-n:x1,y0:y1,1)=0;pic(x1-n:x1,y0:y1,2)=max_val;pic(x1-n:x1,y0:y1,3)=0;

    elseif c==3   
        pic(x0:x1,y0:y0+n,1)=0;pic(x0:x1,y0:y0+n,2)=0;pic(x0:x1,y0:y0+n,3)=max_val;
        pic(x0:x1,y1-n:y1,1)=0;pic(x0:x1,y1-n:y1,2)=0;pic(x0:x1,y1-n:y1,3)=max_val;
        pic(x0:x0+n,y0:y1,1)=0;pic(x0:x0+n,y0:y1,2)=0;pic(x0:x0+n,y0:y1,3)=max_val;
        pic(x1-n:x1,y0:y1,1)=0;pic(x1-n:x1,y0:y1,2)=0;pic(x1-n:x1,y0:y1,3)=max_val;

    else                          %inverse
        pic(x0:x1,y0:y0+n,1:3)=max_val-pic(x0:x1,y0:y0+n,1:3);
        pic(x0:x1,y1-n:y1,1:3)=max_val-pic(x0:x1,y1-n:y1,1:3);
        pic(x0:x0+n,y0:y1,1:3)=max_val-pic(x0:x0+n,y0:y1,1:3);
        pic(x1-n:x1,y0:y1,1:3)=max_val-pic(x1-n:x1,y0:y1,1:3);
    end
end

ent=pic; 
sampIm = pic(x0:x1, y0:y1, :);
SampIm = imresize(sampIm, scale,'nearest'); % nearest to zooming in the local part
switch type
    case 1   %  put zoom in image on the down-left
        [a, b, third] = size(SampIm);
        ent((p-a+1):p,1:b, :) = SampIm;
    case 2  %  put zoom in image on the down-left
        [a, b, third] = size(SampIm);
        ent((p-a+1):p,(q-b+1):q, :) = SampIm;
        
    case 3  %  put zoom in image on the up-right
        [a, b, third] = size(SampIm);
        ent(1:a,(q-b+1):q, :) = SampIm;
        
    case 4  %  put zoom in image on the up-right
        [a, b, third] = size(SampIm);
        ent(1:a,1:b, :) = SampIm;        
end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/resize_images.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description:
%            Resize_images generates the low resolution panchromatic (PAN) and multispectral (MS) images according to Wald's protocol. 
% 
% Interface:
%           [I_MS_LR, I_PAN_LR] = resize_images(I_MS,I_PAN,ratio,sensor)
% 
% Inputs:
%       	I_MS:               MS image upsampled at PAN scale;
%           I_PAN:              PAN image;
%           ratio:              Scale ratio between MS and PAN. Pre-condition: Integer value;
%           sensor:             String for type of sensor (e.g. 'WV2', 'IKONOS').
% 
% Outputs:
%           I_MS_LR:            Low Resolution MS image;
%           I_PAN_LR:           Low Resolution PAN image.
% 
% References:
%           [Wald97]            L. Wald, T. Ranchin, and M. Mangolini, Fusion of satellite images of different spatial resolutions: assessing the quality of resulting images,
%                               Photogrammetric Engineering and Remote Sensing, vol. 63, no. 6, pp. 691699, June 1997.
%           [Aiazzi02]          B. Aiazzi, L. Alparone, S. Baronti, and A. Garzelli, Context-driven fusion of high spatial and spectral resolution images based on
%                               oversampled multiresolution analysis, IEEE Transactions on Geoscience and Remote Sensing, vol. 40, no. 10, pp. 23002312, October
%                               2002.
%           [Aiazzi06]          B. Aiazzi, L. Alparone, S. Baronti, A. Garzelli, and M. Selva, MTF-tailored multiscale fusion of high-resolution MS and Pan imagery,
%                               Photogrammetric Engineering and Remote Sensing, vol. 72, no. 5, pp. 591596, May 2006.
%           [Vivone14a]         G. Vivone, R. Restaino, M. Dalla Mura, G. Licciardi, and J. Chanussot, Contrast and error-based fusion schemes for multispectral
%                               image pansharpening, IEEE Geoscience and Remote Sensing Letters, vol. 11, no. 5, pp. 930934, May 2014.
%           [Vivone15]          G. Vivone, L. Alparone, J. Chanussot, M. Dalla Mura, A. Garzelli, G. Licciardi, R. Restaino, and L. Wald, A Critical Comparison Among Pansharpening Algorithms, 
%                               IEEE Transactions on Geoscience and Remote Sensing, vol. 53, no. 5, pp. 25652586, May 2015.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [I_MS_LR, I_PAN_LR] = resize_images(I_MS,I_PAN,ratio,sensor)

I_MS = double(I_MS);
I_PAN = double(I_PAN);
  
I_MS_LP = MTF(I_MS,sensor,ratio);

%%% Decimation MS
I_MS_LP_D = zeros(round(size(I_MS,1)/ratio),round(size(I_MS,2)/ratio),size(I_MS,3));
for idim = 1 : size(I_MS,3)
    I_MS_LP_D(:,:,idim) = imresize(I_MS_LP(:,:,idim),1/ratio,'nearest');
end

I_MS_LR = double(I_MS_LP_D);

I_PAN_LR = imresize(I_PAN, 1/ratio);

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage4.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize and print a four-band multispectral image.
% 
% Interface:
%           showImage4(I_F,print,id,flag_cut_bounds,dim_cut,thvalues,L)
%
% Inputs:
%           I_MS:               Four band multispectral image;
%           print:              Flag. If print == 1, print EPS image;
%           id:                 Identifier (name) of the printed EPS image;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%           th_values:          Flag. If th_values == 1, apply an hard threshold to the dynamic range;
%           L:                  Radiomatric resolution of the input image.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function showImage4(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L)

if flag_cut_bounds
    I_MS = I_MS(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:);
end

if th_values
    I_MS(I_MS > 2^L) = 2^L;
    I_MS(I_MS < 0) = 0;
end

IMN = viewimage(I_MS(:,:,1:3));
IMN = IMN(:,:,3:-1:1);

if print
    printImage(IMN,sprintf('Outputs/%d.eps',id));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage4LR.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize and print the original four-band multispectral image.
% 
% Interface:
%           showImage4LR(I_F,print,id,flag_cut_bounds,dim_cut,thvalues,L,ratio)
%
% Inputs:
%           I_MS:               Four band multispectral image;
%           print:              Flag. If print == 1, print EPS image;
%           id:                 Identifier (name) of the printed EPS image;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%           th_values:          Flag. If th_values == 1, apply an hard threshold to the dynamic range;
%           L:                  Radiomatric resolution of the input image;
%           ratio:              Resize factor.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function showImage4LR(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio)

if flag_cut_bounds
    I_MS = I_MS(round(dim_cut/ratio):end-round(dim_cut/ratio),round(dim_cut/ratio):end-round(dim_cut/ratio),:);
end

if th_values
    I_MS(I_MS > 2^L) = 2^L;
    I_MS(I_MS < 0) = 0;
end

IMN = viewimage(I_MS(:,:,1:3));
IMN = IMN(:,:,3:-1:1);

if print
    printImage(IMN,sprintf('Outputs/%d.eps',id));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage4LR_zoomin.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize and print the original four-band multispectral image.
% 
% Interface:
%           showImage4LR(I_F,print,id,flag_cut_bounds,dim_cut,thvalues,L,ratio)
%
% Inputs:
%           I_MS:               Four band multispectral image;
%           print:              Flag. If print == 1, print EPS image;
%           id:                 Identifier (name) of the printed EPS image;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%           th_values:          Flag. If th_values == 1, apply an hard threshold to the dynamic range;
%           L:                  Radiomatric resolution of the input image;
%           ratio:              Resize factor.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function showImage4LR_zoomin(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio, location1, location2)

if flag_cut_bounds
    I_MS = I_MS(round(dim_cut/ratio):end-round(dim_cut/ratio),round(dim_cut/ratio):end-round(dim_cut/ratio),:);
end

if th_values
    I_MS(I_MS > 2^L) = 2^L;
    I_MS(I_MS < 0) = 0;
end

IMN = viewimage(I_MS(:,:,1:3));
IMN = IMN(:,:,3:-1:1);

if isempty(location2)
    ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1);  % put close-up to up-right corner
    figure,imshow(ent,[])
else
    % type =1 (put to down-left); type =2 (put to down-right); 
    % type =3 (put to up-right); type =4 (put to up-left); 
    ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1);  % put close-up to up-right corner
    ent=rectangleonimage(ent,location2,1, 3, 2, 3, 2);   % put close-up to down-right corner
    figure,imshow(ent,[])
end

if print
    printImage(IMN,sprintf('Outputs/%d.eps',id));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage4_zoomin.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize and print a four-band multispectral image.
% 
% Interface:
%           showImage4(I_F,print,id,flag_cut_bounds,dim_cut,thvalues,L)
%
% Inputs:
%           I_MS:               Four band multispectral image;
%           print:              Flag. If print == 1, print EPS image;
%           id:                 Identifier (name) of the printed EPS image;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%           th_values:          Flag. If th_values == 1, apply an hard threshold to the dynamic range;
%           L:                  Radiomatric resolution of the input image.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function showImage4_zoomin(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L, location1, location2)

if flag_cut_bounds
    I_MS = I_MS(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:);
end

if th_values
    I_MS(I_MS > 2^L) = 2^L;
    I_MS(I_MS < 0) = 0;
end

IMN = viewimage(I_MS(:,:,1:3));
IMN = IMN(:,:,3:-1:1);


if isempty(location2)
    ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1);  % put close-up to up-right corner
    figure,imshow(ent,[])
else
    % type =1 (put to down-left); type =2 (put to down-right); 
    % type =3 (put to up-right); type =4 (put to up-left); 
    ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1);  % put close-up to up-right corner
    ent=rectangleonimage(ent,location2,1, 3, 2, 3, 2);   % put close-up to down-right corner
    figure,imshow(ent,[])
end


if print
    printImage(IMN,sprintf('Outputs/%d.eps',id));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage8.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize and print an eight-band multispectral image.
% 
% Interface:
%           showImage8(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L)
%
% Inputs:
%           I_MS:               Eight band multispectral image;
%           print:              Flag. If print == 1, print EPS image;
%           id:                 Identifier (name) of the printed EPS image;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%           th_values:          Flag. If th_values == 1, apply an hard threshold to the dynamic range;
%           L:                  Radiomatric resolution of the input image.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function showImage8(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L)

if flag_cut_bounds
    I_MS = I_MS(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:);
end

if th_values
    I_MS(I_MS > 2^L) = 2^L;
    I_MS(I_MS < 0) = 0;
end

if id == 1
    IMN = viewimage(I_MS(:,:,[1,3,5]));
    IMN = IMN(:,:,3:-1:1);
else
    IMN = viewimage(I_MS(:,:,[1,3,5]),[0.01 0.995]);
    IMN = IMN(:,:,3:-1:1);
end

if print
    printImage(IMN,sprintf('Outputs/%d.eps',id));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage8LR.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize and print an eight-band multispectral image.
% 
% Interface:
%           showImage8LR(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio)
%
% Inputs:
%           I_MS:               Eight band multispectral image;
%           print:              Flag. If print == 1, print EPS image;
%           id:                 Identifier (name) of the printed EPS image;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%           th_values:          Flag. If th_values == 1, apply an hard threshold to the dynamic range;
%           L:                  Radiomatric resolution of the input image;
%           ratio:              Resize factor.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function showImage8LR(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio)

if flag_cut_bounds
    I_MS = I_MS(round(dim_cut/ratio):end-round(dim_cut/ratio),round(dim_cut/ratio):end-round(dim_cut/ratio),:);
end

if th_values
    I_MS(I_MS > 2^L) = 2^L;
    I_MS(I_MS < 0) = 0;
end

if id == 1
    
    IMN = viewimage(I_MS(:,:,[1,3,5]));
    IMN = IMN(:,:,3:-1:1);
else
    IMN = viewimage(I_MS(:,:,[1,3,5]),[0.01 0.995]);
    IMN = IMN(:,:,3:-1:1);
end

if print
    printImage(IMN,sprintf('Outputs/%d.eps',id));
end

end


================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage8LR_zoomin.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize and print an eight-band multispectral image.
% 
% Interface:
%           showImage8LR(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio)
%
% Inputs:
%           I_MS:               Eight band multispectral image;
%           print:              Flag. If print == 1, print EPS image;
%           id:                 Identifier (name) of the printed EPS image;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%           th_values:          Flag. If th_values == 1, apply an hard threshold to the dynamic range;
%           L:                  Radiomatric resolution of the input image;
%           ratio:              Resize factor.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function showImage8LR_zoomin(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L,ratio, location1, location2)

if flag_cut_bounds
    I_MS = I_MS(round(dim_cut/ratio):end-round(dim_cut/ratio),round(dim_cut/ratio):end-round(dim_cut/ratio),:);
end

if th_values
    I_MS(I_MS > 2^L) = 2^L;
    I_MS(I_MS < 0) = 0;
end

if id == 1
    
    IMN = viewimage(I_MS(:,:,[1,3,5]));
    IMN = IMN(:,:,3:-1:1);
else
    IMN = viewimage(I_MS(:,:,[1,3,5]),[0.01 0.995]);
    IMN = IMN(:,:,3:-1:1);
end


if isempty(location2)
    ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1);  % put close-up to up-right corner
    figure,imshow(ent,[])
else
    % type =1 (put to down-left); type =2 (put to down-right); 
    % type =3 (put to up-right); type =4 (put to up-left); 
    ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1);  % put close-up to up-right corner
    ent=rectangleonimage(ent,location2,1, 3, 2, 3, 2);   % put close-up to down-right corner
    figure,imshow(ent,[])
end


if print
    printImage(IMN,sprintf('Outputs/%d.eps',id));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImage8_zoomin.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize and print an eight-band multispectral image.
% 
% Interface:
%           showImage8(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L)
%
% Inputs:
%           I_MS:               Eight band multispectral image;
%           print:              Flag. If print == 1, print EPS image;
%           id:                 Identifier (name) of the printed EPS image;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%           th_values:          Flag. If th_values == 1, apply an hard threshold to the dynamic range;
%           L:                  Radiomatric resolution of the input image.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function showImage8_zoomin(I_MS,print,id,flag_cut_bounds,dim_cut,th_values,L, location1, location2)

if flag_cut_bounds
    I_MS = I_MS(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:);
end

if th_values
    I_MS(I_MS > 2^L) = 2^L;
    I_MS(I_MS < 0) = 0;
end

if id == 1
    IMN = viewimage(I_MS(:,:,[1,3,5]));
    IMN = IMN(:,:,3:-1:1);
else
    IMN = viewimage(I_MS(:,:,[1,3,5]),[0.01 0.995]);
    IMN = IMN(:,:,3:-1:1);
end

if isempty(location2)
    ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1);  % put close-up to up-right corner
    figure,imshow(ent,[])
else
    % type =1 (put to down-left); type =2 (put to down-right); 
    % type =3 (put to up-right); type =4 (put to up-left); 
    ent=rectangleonimage(IMN,location1,1, 3, 3, 3, 1);  % put close-up to up-right corner
    ent=rectangleonimage(ent,location2,1, 3, 2, 3, 2);   % put close-up to down-right corner
    figure,imshow(ent,[])
end

if print
    printImage(IMN,sprintf('Outputs/%d.eps',id));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImagesAll.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize all the images applying the same stretching for visual comparison.
% 
% Interface:
%           MatrixPrint = showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,flagPAN)
%
% Inputs:
%           MatrixImage:        Matrix that contains all the images to visualize; Size: [M x N x B x Z], where [M x N] is the
%                               dimension of a single image band, B represents the number of bands for each image, and Z is the number of images to plot.
%           titleImages:        Vector of strings that represents the titles for each image to plot; Size: [1 x Z].
%           vect_index_RGB:     Identify the bands to plot to obtain an RGB representation of the multispectral data;
%           flag_cut_bounds:    Cut the boundaries of the images to plot;
%           dim_cut:            Define the dimension of the boundary cut;
%           flagPAN:            Flag. If flagPAN == 1, the first image to plot is the panchromatic image otherwise it is the ground-truth.
%
% Outputs:
%           MatrixPrint:        Matrix, with the same structure of MatrixImage, which contains the plotted images.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function MatrixPrint = showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,flagPAN)

if flag_cut_bounds
    MatrixImageCat = zeros(numel(dim_cut:size(MatrixImage,1)-dim_cut),numel(dim_cut:size(MatrixImage,2)-dim_cut),size(MatrixImage,3),size(MatrixImage,4));
    for ii = 1 : size(MatrixImageCat,4)
        t = MatrixImage(:,:,:,ii);
        MatrixImageCat(:,:,:,ii) = t(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:);
    end
else
    MatrixImageCat = MatrixImage;
end

[r,c,~] = size(MatrixImageCat(:,:,:,1));

if flagPAN
    T = [];
    for ii = 2 : size(MatrixImageCat,4)
        T = cat(2,T,MatrixImageCat(:,:,vect_index_RGB,ii));
    end    
else
    T = [];
    for ii = 1 : size(MatrixImageCat,4)
        T = cat(2,T,MatrixImageCat(:,:,vect_index_RGB,ii));
    end
end

IMN = viewimage2(T);

if flagPAN
    MatrixPrint = zeros(size(MatrixImageCat(:,:,vect_index_RGB,:)));
    MatrixPrint(:,:,:,1) = viewimage2(MatrixImageCat(:,:,vect_index_RGB,1));
    ind_c = 1;
    for ii = 2 : size(MatrixImageCat,4)   
        MatrixPrint(:,:,:,ii) = IMN(1 : r,ind_c : ind_c + c - 1,:);
        ind_c = ind_c + c;
    end    
else
    MatrixPrint = zeros(size(MatrixImageCat(:,:,vect_index_RGB,:)));
    ind_c = 1;
    for ii = 1 : size(MatrixImageCat,4)   
        MatrixPrint(:,:,:,ii) = IMN(1 : r,ind_c : ind_c + c - 1,:);
        ind_c = ind_c + c;
    end
end

% ha = tight_subplot(5,5,[.06 .03],[.01 .06],[.01 .01]);
ha = tight_subplot(4,7,[.02 0],[.01 .03],[.0 .0]);
for ii = 1 : size(MatrixImageCat,4)
    axes(ha(ii)); imshow(MatrixPrint(:,:,:,ii),[]);
    title(ha(ii),titleImages{ii});
end
   
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showImagesAllOld.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize all the images applying the same stretching for visual comparison.
% 
% Interface:
%           MatrixPrint = showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,flagPAN)
%
% Inputs:
%           MatrixImage:        Matrix that contains all the images to visualize; Size: [M x N x B x Z], where [M x N] is the
%                               dimension of a single image band, B represents the number of bands for each image, and Z is the number of images to plot.
%           titleImages:        Vector of strings that represents the titles for each image to plot; Size: [1 x Z].
%           vect_index_RGB:     Identify the bands to plot to obtain an RGB representation of the multispectral data;
%           flag_cut_bounds:    Cut the boundaries of the images to plot;
%           dim_cut:            Define the dimension of the boundary cut;
%           flagPAN:            Flag. If flagPAN == 1, the first image to plot is the panchromatic image otherwise it is the ground-truth.
%
% Outputs:
%           MatrixPrint:        Matrix, with the same structure of MatrixImage, which contains the plotted images.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function MatrixPrint = showImagesAll(MatrixImage,titleImages,vect_index_RGB,flag_cut_bounds,dim_cut,flagPAN)

if flag_cut_bounds
    MatrixImageCat = zeros(numel(dim_cut:size(MatrixImage,1)-dim_cut),numel(dim_cut:size(MatrixImage,2)-dim_cut),size(MatrixImage,3),size(MatrixImage,4));
    for ii = 1 : size(MatrixImageCat,4)
        t = MatrixImage(:,:,:,ii);
        MatrixImageCat(:,:,:,ii) = t(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:);
    end
else
    MatrixImageCat = MatrixImage;
end

[r,c,~] = size(MatrixImageCat(:,:,:,1));

if flagPAN
    T = [];
    for ii = 2 : size(MatrixImageCat,4)
        T = cat(2,T,MatrixImageCat(:,:,vect_index_RGB,ii));
    end    
else
    T = [];
    for ii = 1 : size(MatrixImageCat,4)
        T = cat(2,T,MatrixImageCat(:,:,vect_index_RGB,ii));
    end
end

IMN = viewimage2(T);

if flagPAN
    MatrixPrint = zeros(size(MatrixImageCat(:,:,vect_index_RGB,:)));
    MatrixPrint(:,:,:,1) = viewimage2(MatrixImageCat(:,:,vect_index_RGB,1));
    ind_c = 1;
    for ii = 2 : size(MatrixImageCat,4)   
        MatrixPrint(:,:,:,ii) = IMN(1 : r,ind_c : ind_c + c - 1,:);
        ind_c = ind_c + c;
    end    
else
    MatrixPrint = zeros(size(MatrixImageCat(:,:,vect_index_RGB,:)));
    ind_c = 1;
    for ii = 1 : size(MatrixImageCat,4)   
        MatrixPrint(:,:,:,ii) = IMN(1 : r,ind_c : ind_c + c - 1,:);
        ind_c = ind_c + c;
    end
end

ha = tight_subplot(5,5,[.06 .03],[.01 .06],[.01 .01]);
% ha = tight_subplot(5,5,[.02 0],[.01 .03],[.0 .0]);

for ii = 1 : size(MatrixImageCat,4)
    axes(ha(ii)); imshow(MatrixPrint(:,:,:,ii),[]);
    title(ha(ii),titleImages{ii});
end
   
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showPan.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize and print the panchromatic image.
% 
% Interface:
%           showPan(Pan,print,id,flag_cut_bounds,dim_cut)
%
% Inputs:
%           Pan:                Panchromatic image;
%           print:              Flag. If print == 1, print EPS image;
%           id:                 Identifier (name) of the printed EPS image;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function IN = showPan(Pan,print,id,flag_cut_bounds,dim_cut)

if flag_cut_bounds
    Pan = Pan(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:);
end

IN = viewimage(Pan);

if print
    printImage(IN,sprintf('Outputs/%d.eps',id));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/showPan_zoomin.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualize and print the panchromatic image.
% 
% Interface:
%           showPan(Pan,print,id,flag_cut_bounds,dim_cut)
%
% Inputs:
%           Pan:                Panchromatic image;
%           print:              Flag. If print == 1, print EPS image;
%           id:                 Identifier (name) of the printed EPS image;
%           flag_cut_bounds:    Cut the boundaries of the viewed Panchromatic image;
%           dim_cut:            Define the dimension of the boundary cut;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function IN = showPan_zoomin(Pan,print,id,flag_cut_bounds,dim_cut, location1, location2)

ratio = 4;
if flag_cut_bounds
    %Pan = Pan(dim_cut:end-dim_cut,dim_cut:end-dim_cut,:);
    Pan = Pan(round(dim_cut/ratio):end-round(dim_cut/ratio),round(dim_cut/ratio):end-round(dim_cut/ratio),:);

end

IN = viewimage(Pan);

if isempty(location2)
    ent=rectangleonimage(IN,location1,1, 3, 3, 3, 1);  % put close-up to up-right corner
    figure,imshow(ent,[])
else
    % type =1 (put to down-left); type =2 (put to down-right); 
    % type =3 (put to up-right); type =4 (put to up-left); 
    ent=rectangleonimage(IN,location1,1, 3, 3, 3, 1);  % put close-up to up-right corner
    ent=rectangleonimage(ent,location2,1, 3, 2, 3, 2);   % put close-up to down-right corner
    figure,imshow(ent,[])
end

if print
    printImage(IN,sprintf('Outputs/%d.eps',id));
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/tight_subplot.m
================================================
function ha = tight_subplot(Nh, Nw, gap, marg_h, marg_w)

% tight_subplot creates "subplot" axes with adjustable gaps and margins
%
% ha = tight_subplot(Nh, Nw, gap, marg_h, marg_w)
%
%   in:  Nh      number of axes in hight (vertical direction)
%        Nw      number of axes in width (horizontaldirection)
%        gap     gaps between the axes in normalized units (0...1)
%                   or [gap_h gap_w] for different gaps in height and width 
%        marg_h  margins in height in normalized units (0...1)
%                   or [lower upper] for different lower and upper margins 
%        marg_w  margins in width in normalized units (0...1)
%                   or [left right] for different left and right margins 
%
%  out:  ha     array of handles of the axes objects
%                   starting from upper left corner, going row-wise as in
%                   going row-wise as in
%
%  Example: ha = tight_subplot(3,2,[.01 .03],[.1 .01],[.01 .01])
%           for ii = 1:6; axes(ha(ii)); plot(randn(10,ii)); end
%           set(ha(1:4),'XTickLabel',''); set(ha,'YTickLabel','')

% Pekka Kumpulainen 20.6.2010   @tut.fi
% Tampere University of Technology / Automation Science and Engineering


if nargin<3; gap = .02; end
if nargin<4 || isempty(marg_h); marg_h = .05; end
if nargin<5; marg_w = .05; end

if numel(gap)==1; 
    gap = [gap gap];
end
if numel(marg_w)==1; 
    marg_w = [marg_w marg_w];
end
if numel(marg_h)==1; 
    marg_h = [marg_h marg_h];
end

axh = (1-sum(marg_h)-(Nh-1)*gap(1))/Nh; 
axw = (1-sum(marg_w)-(Nw-1)*gap(2))/Nw;

py = 1-marg_h(2)-axh; 

ha = zeros(Nh*Nw,1);
ii = 0;
for ih = 1:Nh
    px = marg_w(1);
    
    for ix = 1:Nw
        ii = ii+1;
        ha(ii) = axes('Units','normalized', ...
            'Position',[px py axw axh], ...
            'XTickLabel','', ...
            'YTickLabel','');
        px = px+axw+gap(2);
    end
    py = py-axh-gap(1);
end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/viewimage.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualization [3-2-1] of images with 3 bands by exploiting linear stretching and fixing the saturation. 
% 
% Interface:
%           ImageToView = viewimage(ImageToView,tol)
%
% Inputs:
%           ImageToView:    Image to view;
%           tol:            Saturation; Default values: [0.01 0.99] equal for all the three bands.
%
% Outputs:
%           ImageToView:    Image to view.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function ImageToView = viewimage(ImageToView,tol1,tol2,tol3)

iptsetpref('ImshowBorder', 'tight')
ImageToView = double(ImageToView);
L=size(ImageToView,3);
if (L<3)
    ImageToView=ImageToView(:,:,[1 1 1]);
end

if nargin == 1
    tol1 = [0.01 0.99];
end
if nargin <= 2
    tol = [tol1;tol1;tol1];
    ImageToView = linstretch(ImageToView,tol);
    figure,imshow(ImageToView(:,:,3:-1:1),[])
elseif nargin == 4
    if sum(tol1(2)+tol2(2)+tol3(2)) <= 3
        tol = [tol1;tol2;tol3];
        ImageToView = linstretch(ImageToView,tol);
        figure,imshow(ImageToView(:,:,3:-1:1),[])
    else
        tol = [tol1;tol2;tol3];
        [N,M,~] = size(ImageToView);
        NM = N*M;
        for i=1:3
            b = reshape(double(uint16(ImageToView(:,:,i))),NM,1);
            b(b<tol(i,1))=tol(i,1);
            b(b>tol(i,2))=tol(i,2);
            b = (b-tol(i,1))/(tol(i,2)-tol(i,1));
            ImageToView(:,:,i) = reshape(b,N,M);
        end
        figure,imshow(ImageToView(:,:,3:-1:1),[])
    end
end

iptsetpref('ImshowBorder', 'loose')

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Linear Stretching. 
% 
% Interface:
%           ImageToView = linstretch(ImageToView,tol)
%
% Inputs:
%           ImageToView:    Image to stretch;
%           tol:            ;
%
% Outputs:
%           ImageToView:    Stretched image.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function ImageToView = linstretch(ImageToView,tol)

[N,M,~] = size(ImageToView);
NM = N*M;
for i=1:3
    b = reshape(double(uint16(ImageToView(:,:,i))),NM,1);
    [hb,levelb] = hist(b,max(b)-min(b));
    chb = cumsum(hb);
    t(1)=ceil(levelb(find(chb>NM*tol(i,1), 1 )));
    t(2)=ceil(levelb(find(chb<NM*tol(i,2), 1, 'last' )));
    b(b<t(1))=t(1);
    b(b>t(2))=t(2);
    b = (b-t(1))/(t(2)-t(1));
    ImageToView(:,:,i) = reshape(b,N,M);
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/Tools/viewimage2.m
================================================
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Visualization [3-2-1] of images with 3 bands by exploiting linear stretching and fixing the saturation. 
% 
% Interface:
%           ImageToView = viewimage2(ImageToView,tol)
%
% Inputs:
%           ImageToView:    Image to view;
%           tol:            Saturation; Default values: [0.01 0.99] equal for all the three bands.
%
% Outputs:
%           ImageToView:    Image to view.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function ImageToView = viewimage2(ImageToView,tol1,tol2,tol3)

iptsetpref('ImshowBorder', 'tight')
ImageToView = double(ImageToView);
L=size(ImageToView,3);
if (L<3)
    ImageToView=ImageToView(:,:,[1 1 1]);
end

if nargin == 1
    tol1 = [0.01 0.99];
end
if nargin <= 2
    tol = [tol1;tol1;tol1];
    ImageToView = linstretch(ImageToView,tol);
elseif nargin == 4
    if sum(tol1(2)+tol2(2)+tol3(2)) <= 3
        tol = [tol1;tol2;tol3];
        ImageToView = linstretch(ImageToView,tol);
    else
        tol = [tol1;tol2;tol3];
        [N,M,~] = size(ImageToView);
        NM = N*M;
        for i=1:3
            b = reshape(double(uint16(ImageToView(:,:,i))),NM,1);
            b(b<tol(i,1))=tol(i,1);
            b(b>tol(i,2))=tol(i,2);
            b = (b-tol(i,1))/(tol(i,2)-tol(i,1));
            ImageToView(:,:,i) = reshape(b,N,M);
        end
    end
end

iptsetpref('ImshowBorder', 'loose')

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description: 
%           Linear Stretching. 
% 
% Interface:
%           ImageToView = linstretch(ImageToView,tol)
%
% Inputs:
%           ImageToView:    Image to stretch;
%           tol:            ;
%
% Outputs:
%           ImageToView:    Stretched image.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function ImageToView = linstretch(ImageToView,tol)

[N,M,~] = size(ImageToView);
NM = N*M;
for i=1:3
    b = reshape(double(uint16(ImageToView(:,:,i))),NM,1);
    [hb,levelb] = hist(b,max(b)-min(b));
    chb = cumsum(hb);
    t(1)=ceil(levelb(find(chb>NM*tol(i,1), 1 )));
    t(2)=ceil(levelb(find(chb<NM*tol(i,2), 1, 'last' )));
    b(b<t(1))=t(1);
    b(b>t(2))=t(2);
    b = (b-t(1))/(t(2)-t(1));
    ImageToView(:,:,i) = reshape(b,N,M);
end

end

================================================
FILE: 02-Test-toolbox-for-traditional-and-DL(Matlab)/readme.md
================================================
# Test toolbox for traditional and DL
"Test toolbox for traditional and DL" for simultaneously evaluating traditional and DL approaches, and finally output metrics and eps-format figures for your latex editing

[English](https://github.com/.md) | [简体中文](https://github.com.md)


This repository is the official Matlab implementation of our IEEE GRSM paper “Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks”, 2022 ([paper](https://github.com/liangjiandeng/liangjiandeng.github.io/tree/master/papers/2022/review-grsm2022.pdf) | [homepage](https://github.com/liangjiandeng/DLPan-Toolbox)).

## Features


## Requirements
* Matlab software

## Quick Start

### Full-resolution Evaluation

* Directly run ``Demo_Full_Resolution.m`` which includes an WV3 example. After running this demo, readers can understand the whole procedure.

* Note: the test dataset of full-resolution are too huge to upload to GitHub, thus we provide cloud links to readers to download them to
  successfully run this demo, including:
  - i) Download link for full-resolution WV3-NewYork example (named "NY1_WV3_FR.mat"): [[Link]](https://drive.google.com/file/d/1j1nyHuBxsNzIn-UEwZUgeziGCAFMLes9/view?usp=sharing)   (put into the folder of   "1_TestData/Datasets Testing")
  
  - ii) Download link of DL's results for full-resolution WV3-NewYork example: [[Link]](https://drive.google.com/file/d/16FSxdq6BY7STbmMzxcxJ5atNQ7ZV3mPT/view?usp=sharing)   (put into the folder of "'2_DL_Result/WV3")
  
* Once you have above datasets, you can run this demo successfully, then understand how this demo run!


### Reduced-resolution Evaluation

* Directly run ``Demo_Reduced_Resolution.m`` which includes an WV3 example. After running this demo, readers can understand the whole procedure.

* Note: the test dataset of reduced-resolution are too huge to upload to GitHub, thus we provide cloud links to readers to download them to
  successfully run this demo, including:
  - i) Download link for reduced-resolution WV3-NewYork example (named "NY1_WV3_RR.mat"): same link as above i), then put into the folder of   "1_TestData/Datasets Testing"
  
  - ii) Download link of DL's results for reduced-resolution WV3-NewYork example: same link as above ii), then put into the folder of "2_DL_Result/WV3"
  
* Once you have above datasets, you can run this demo successfully, then understand how this demo run!


### Others

* You may find the quantitative results from Tex files such as ``FR_Assessment.tex``, ``RR_Assessment.tex`` and ``Avg_RR_Assessment.tex``, then copy for your Latex editing.
* You may also find the generated high-resolution eps-format figures in the folder of "3_EPS" for your Latex editing. 


## Acknowledgement
- We appreciate the great contribution of [Xiao Wu](https://xiaoxiao-woo.github.io/) who is a graduate student in [UESTC](https://www.uestc.edu.cn/) to this toolbox.


## Citation
* If you use this toolbox, please kindly cite our paper:

```bibtex
@ARTICLE{deng2022grsm,
author={L.-J. Deng, G. Vivone, M. E. Paoletti, G. Scarpa, J. He, Y. Zhang, J. Chanussot, and A. Plaza},
booktitle={IEEE Geoscience and Remote Sensing Magazine},
title={Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks},
year={2022},
pages={},
}
```

* Also, the codes of traditional methods are from the "pansharpening toolbox for distribution", thus please cite the corresponding paper:
```bibtex
@ARTICLE{vivone2021grsm,
  author={Vivone, Gemine and Dalla Mura, Mauro and Garzelli, Andrea and Restaino, Rocco and Scarpa, Giuseppe and Ulfarsson, Magnus O. and   Alparone, Luciano and Chanussot, Jocelyn},
  journal={IEEE Geoscience and Remote Sensing Magazine}, 
  title={A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting Pansharpening With Classical and Emerging Pansharpening Methods}, 
  year={2021},
  volume={9},
  number={1},
  pages={53-81},
  doi={10.1109/MGRS.2020.3019315}
}
```

  
## License & Copyright
This project is open sourced under GNU General Public License v3.0.


================================================
FILE: 03-Data-Simulation(Matlab)/01-DataSimu/QB/readme.md.txt
================================================


================================================
FILE: 03-Data-Simulation(Matlab)/Demo_DataSimu_qb.m
================================================
%% This is a demo to segment image into small patches (and big test imgs) 
% for the training=64x64x8 (and testing=256x256x8) pansharpening in remote sensing
% L.-J. Deng(UESTC)
% 2020-10-04
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clear; close all;
% please download the QB data from the website: 
% then put into the folder of "Imgs_qb"
% at last run the demo directly to get patching examples
files = dir('Imgs_qb/*.mat');  
leng = length(files);

Pre_NumInd = 1;
Pre_NumInd_test = 1;
scale = 4;

%% ----------------------------------
for i = 1:leng 
    % load inpainted images
    str = files(i).name;
    dir = strcat('load', 32, 'Imgs_qb/', str);
    eval(dir)
    
    PAN2 = I_PAN;
    LMS2 = I_MS;
    GT2  = I_GT;
    MS2  = I_MS_LR;    
    
    maxval =  max(PAN2(:));
    figure,
    subplot(2,2,1), imshow(PAN2/maxval); title('original PAN')
    subplot(2,2,2), imshow(MS2(:,:,[3 2 1])/maxval); title('LR MS')
    subplot(2,2,3), imshow(GT2(:,:,[3 2 1])/maxval); title('GT')
    subplot(2,2,4), imshow(LMS2(:,:,[3 2 1])/maxval); title('UP MS')
    
    %% leave one half of data 1 as the test data! 
    if (i==1)  % take Indianapolis to get test imags;
        cut_num = 512;
        [a, b, c] = size(GT2);
        GT        = GT2(:, cut_num+1:end,:);  % for training dataset
        GT_test   = GT2(:, 1:cut_num, :);     % for testing dataset
        
        PAN       = PAN2(:, cut_num+1:end);
        PAN_test  = PAN2(:, 1:cut_num);
        
        LMS       = LMS2(:, cut_num+1:end, :);
        LMS_test  = LMS2(:, 1:cut_num, :);
        
        MS        = MS2(:, fix(cut_num/4)+1:end, :);
        MS_test   = MS2(:, 1:fix(cut_num/4), :);
        
        %% 1) Big Test Imgs: segment pan into big Imgs 512x512x8 (testing Exm)
        size_l_test = 128; size_h_test = 512; overlap_test = 1; % (for testing data: 0<=overlaop<=64)
        
        tic
        [gt_Oneimg_test, pan_Oneimg_test, ms_Oneimg_test, lms_Oneimg_test] = segImg_new(PAN_test, LMS_test, GT_test, MS_test, size_l_test, size_h_test, scale, overlap_test);
        toc
    
        % save the Imgs into a tensor
        [NumInd_test, ~, ~, ~] = size(gt_Oneimg_test);
        Post_NumInd_test = Pre_NumInd_test + NumInd_test - 1;

        fprintf(['%d-th Img. (test):  ', 'Pre_NumInd_test = %d;  ', ' Post_NumInd_test = %d \n'], i, Pre_NumInd_test, Post_NumInd_test)
        % save data
        gt_tmp_test(Pre_NumInd_test: Post_NumInd_test, :, :, :) = gt_Oneimg_test;  % gt tensor: Nx512x512x8
        pan_tmp_test(Pre_NumInd_test: Post_NumInd_test, :, :)   = pan_Oneimg_test;  % pan tensor: Nx512x512
        ms_tmp_test(Pre_NumInd_test: Post_NumInd_test, :, :, :) = ms_Oneimg_test;  % ms tensor: Nx128x128x8
        lms_tmp_test(Pre_NumInd_test: Post_NumInd_test, :, :, :)= lms_Oneimg_test;  % lms tensor: Nx512x512x8

        Pre_NumInd_test = Post_NumInd_test + 1; 
        
    else
        GT  = GT2;
        PAN = PAN2;
        LMS = LMS2;
        MS  = MS2;
    end
    
    %% 2) small training patches (training)
    size_l = 16; size_h = 64;  overlap = 4; % (for traning data: 0<=overlaop<=16)
    
    tic
    [gt_Oneimg, pan_Oneimg, ms_Oneimg, lms_Oneimg] = segImg_new(PAN, LMS, GT, MS, size_l, size_h, scale, overlap);
    toc

    % save the patches into a tensor
    [NumInd, ~, ~, ~] = size(gt_Oneimg);
    Post_NumInd = Pre_NumInd + NumInd - 1;
    
    fprintf(['%d-th Img.(patching for training):  ', 'Pre_NumInd = %d;  ', ' Post_NumInd = %d \n'], i, Pre_NumInd, Post_NumInd)
    % save data
    gt_tmp1(Pre_NumInd: Post_NumInd, :, :, :) = gt_Oneimg;  % gt tensor: Nx64x64x8
    pan_tmp1(Pre_NumInd: Post_NumInd, :, :)   = pan_Oneimg;  % pan tensor: Nx64x64
    ms_tmp1(Pre_NumInd: Post_NumInd, :, :, :) = ms_Oneimg;  % ms tensor: Nx16x16x8
    lms_tmp1(Pre_NumInd: Post_NumInd, :, :, :)= lms_Oneimg;  % lms tensor: Nx64x64x8
            
    Pre_NumInd = Post_NumInd + 1;
    
end

%% ==========================================================
%% ==== Increase samples to 10,000 (NxCxHxW's inverse = WxHxCxN)
%% ==========================================================

exp_num = size(gt_tmp1, 1);  

if exp_num < 10000

    % Step2: two flips (lr + ud) to add examples
    gt_tmp(1:exp_num, :, :, :)             = gt_tmp1;
    gt_tmp(exp_num+1:2*exp_num, :, :, :)   = flip(gt_tmp1, 2);  % two flips (lr + ud) to add examples
    gt_tmp(2*exp_num+1:3*exp_num, :, :, :) = flip(gt_tmp1, 3);

    ms_tmp(1:exp_num, :, :, :)             = ms_tmp1;
    ms_tmp(exp_num+1:2*exp_num, :, :, :)   = flip(ms_tmp1, 2);  % two flips (lr + ud) to add examples
    ms_tmp(2*exp_num+1:3*exp_num, :, :, :) = flip(ms_tmp1, 3);

    lms_tmp(1:exp_num, :, :, :)             = lms_tmp1;
    lms_tmp(exp_num+1:2*exp_num, :, :, :)   = flip(lms_tmp1, 2);  % two flips (lr + ud) to add examples
    lms_tmp(2*exp_num+1:3*exp_num, :, :, :) = flip(lms_tmp1, 3);

    pan_tmp(1:exp_num, :, :)             = pan_tmp1;
    pan_tmp(exp_num+1:2*exp_num, :, :)   = flip(pan_tmp1, 2);  % two flips (lr + ud) to add examples
    pan_tmp(2*exp_num+1:3*exp_num, :, :) = flip(pan_tmp1, 3);

    % Step3: only select first 10000 patches for training:
    num_cut = 10000;
    gt_tmp(num_cut+1:end, :, :, :) = []; 
    ms_tmp(num_cut+1:end, :, :, :) = []; 
    lms_tmp(num_cut+1:end, :, :, :) = []; 
    pan_tmp(num_cut+1:end, :, :) = []; 
    
else
    num_cut = exp_num;
    
    gt_tmp = gt_tmp1;
    ms_tmp = ms_tmp1;
    lms_tmp=lms_tmp1;
    pan_tmp=pan_tmp1;
end

%% ==========================================================
%% (A) generate training: 1) training data (90%); 2) validation data (10%); 
%% ==========================================================
Post_NumInd = num_cut;

nz_idx    = randperm(Post_NumInd);
num_train = fix(0.9*Post_NumInd); % # training samples
num_valid  = Post_NumInd - num_train ; % # validation samples

%% ==== save to H5 file (NxCxHxW's inverse = WxHxCxN) =====
%==========================================================
%% == generate training dataset:
gt   = gt_tmp(nz_idx(1:num_train), :, :, :); % NxHxWxC=1x2x3x4
pan  = pan_tmp(nz_idx(1:num_train), :, :);   % NxHxW = 1x2x3 (PAN)
ms   = ms_tmp(nz_idx(1:num_train), :, :, :);
lms  = lms_tmp(nz_idx(1:num_train), :, :, :);

%--- for training data:
filename_train = '01-DataSimu/QB/train_qb_10000.h5';

gt   = permute(gt,[3 2 4 1]); %  beyond 2G, have to change dimension
pan_t(1,:,:,:) = pan;  % CxNxHxW = 1x2x3x4 (PAN)
pan   = permute(pan_t,[4 3 1 2]); % WxHxCxN
ms   = permute(ms,[3 2 4 1]); 
lms   = permute(lms,[3 2 4 1]); 

gtsz = size(gt);
mssz = size(ms);
lmssz = size(lms);
pansz =size(pan);


h5create(filename_train, '/gt', gtsz(1:end), 'Datatype', 'double'); % width, height, channels, number 
h5create(filename_train, '/ms', mssz(1:end), 'Datatype', 'double'); % width, height, channels, number 
h5create(filename_train, '/lms', lmssz(1:end), 'Datatype', 'double'); % width, height, channels, number 
h5create(filename_train, '/pan', pansz(1:end), 'Datatype', 'double'); % width, height, channels, number 

h5write(filename_train, '/gt', double(gt), [1,1,1,1], size(gt));
h5write(filename_train, '/ms', double(ms), [1,1,1,1], size(ms));
h5write(filename_train, '/lms', double(lms), [1,1,1,1], size(lms));
h5write(filename_train, '/pan', double(pan), [1,1,1,1], size(pan));

clear gt ms lms pan pan_t

%% == generate validation dataset:
gt   = gt_tmp(nz_idx(num_train+1: num_train+num_valid), :, :, :);
pan  = pan_tmp(nz_idx(num_train+1: num_train+num_valid), :, :);
ms   = ms_tmp(nz_idx(num_train+1: num_train+num_valid), :, :, :);
lms  = lms_tmp(nz_idx(num_train+1: num_train+num_valid), :, :, :);

%--- for valid data:
filename_valid = '01-DataSimu/QB/valid_qb_10000.h5';

gt   = permute(gt,[3 2 4 1]); %  beyond 2G, have to change dimension
pan_t(1, :,:,:) = pan;  % NxHxWx1 = 1x2x3x4 (PAN)
pan   = permute(pan_t,[4 3 1 2]);
ms   = permute(ms,[3 2 4 1]); 
lms   = permute(lms,[3 2 4 1]); 

gtsz = size(gt);
mssz = size(ms);
pansz =size(pan);

h5create(filename_valid, '/gt', gtsz(1:end), 'Datatype', 'double'); % width, height, channels, number 
h5create(filename_valid, '/ms', mssz(1:end), 'Datatype', 'double'); % width, height, channels, number 
h5create(filename_valid, '/lms', gtsz(1:end), 'Datatype', 'double'); % width, height, channels, number 
h5create(filename_valid, '/pan', pansz(1:end), 'Datatype', 'double'); % width, height, channels, number 

h5write(filename_valid, '/gt', double(gt), [1,1,1,1], size(gt));
h5write(filename_valid, '/ms', double(ms), [1,1,1,1], size(ms));
h5write(filename_valid, '/lms', double(lms), [1,1,1,1], size(lms));
h5write(filename_valid, '/pan', double(pan), [1,1,1,1], size(pan));

clear gt ms lms pan pan_t


%% ==========================================================
%% (B) generate Testing data:
%% ==========================================================

filename_test = '01-DataSimu/QB/TestData_qb.h5';

gt    = permute(gt_tmp_test,[3 2 4 1]); %  beyond 2G, have to change dimension
pan_t(1,:,:,:) = pan_tmp_test;  % CxNxHxW = 1x2x3x4 (PAN)
pan   = permute(pan_t,[4 3 1 2]); % WxHxCxN
ms    = permute(ms_tmp_test,[3 2 4 1]); 
lms   = permute(lms_tmp_test,[3 2 4 1]); 

gtsz  = size(gt);
mssz  = size(ms);
lmssz = size(lms);
pansz = size(pan);


h5create(filename_test, '/gt', gtsz(1:end), 'Datatype', 'double'); % width, height, channels, number 
h5create(filename_test, '/ms', mssz(1:end), 'Datatype', 'double'); % width, height, channels, number 
h5create(filename_test, '/lms', lmssz(1:end), 'Datatype', 'double'); % width, height, channels, number 
h5create(filename_test, '/pan', pansz(1:end), 'Datatype', 'double'); % width, height, channels, number 

h5write(filename_test, '/gt', double(gt), [1,1,1,1], size(gt));
h5write(filename_test, '/ms', double(ms), [1,1,1,1], size(ms));
h5write(filename_test, '/lms', double(lms), [1,1,1,1], size(lms));
h5write(filename_test, '/pan', double(pan), [1,1,1,1], size(pan));

clear gt ms lms pan pan_t


================================================
FILE: 03-Data-Simulation(Matlab)/imgs/readme
================================================
You may download an original QB data to understand the usage of this toolbox from: https://www.dropbox.com/s/2ujmag14bkiw0mq/QB_Indianapolis_training_DL.mat?dl=0


================================================
FILE: 03-Data-Simulation(Matlab)/segImg_new.m
================================================
function [gt, pan, ms, lms] = segImg_new(PAN, LMS, GT, MS, size_l, size_high, scale, overlap)
% This is a core function to segment big images into small patches
% LJ Deng (UESTC); 2020-10-09

% PAN:       big PAN image
% LMS:       big upsampled MS image
% GT:        big original HRMS image
% MS:        big original LR MS image
% size_l:    the patch size LR patch
% size_high: the patch size HR patch
% scale:     spatial ration of PAN and MS, here, scale = 4
% overlap:   the overlap among segmented patches
% gt:        segmented ground-truth (gt) or labeled data 
% pan:       segmented pan data
% ms:        segmented ms data
% lms:       segmented lms data

%% --------------------------
[h, w, c]   = size(MS);   % size of LR: 
H = scale*h; W = scale*w;

size_low    = size_l; % patch size of LR: 16x16
size_h      = size_high; % patch size of LR: 64x64
overlap_low = overlap;  %  overlap of LR
overlap_h   = scale*overlap; % overlap of HR

% set patch indexs 
%---- LR indexs ---------
gridy = 1:size_low - overlap_low : w;%-(mod(w,size_low-overlap_low)+1+size_low-overlap_low);
gridy((gridy+size_low-1) > w) = [];  % delet boudary points
gridx = 1:size_low - overlap_low: h;%-(mod(h,size_low-overlap_low)+1+size_low-overlap_low);
gridx((gridx+size_low-1) > h) = [];  % delet boudary points

%---- HR indexs ---------
Gridy = 1:size_h - overlap_h : W;%-(mod(W,size_h-overlap_h)+1+size_h-overlap_h);   % is 2 or 8? ===>must be some problem here!
Gridy((Gridy+size_h-1) > W) = [];  % delet boudary points
Gridx = 1:size_h - overlap_h : H;%-(mod(H,size_h-overlap_h)+1+size_h-overlap_h);
Gridx((Gridx+size_h-1) > H) = [];  % delet boudary points

%% -----Pre-define variables' sizes--------
pan  = zeros(size(gridx,2)*size(gridy,2), size_h, size_h);
lms  = zeros(size(gridx,2)*size(gridy,2), size_h, size_h, c);
gt  = zeros(size(gridx,2)*size(gridy,2), size_h, size_h, c);
ms  = zeros(size(gridx,2)*size(gridy,2), size_low, size_low, c);

%% -----loops to segment--------
cnt = 0;
Num = 0;
for i = 1: length(gridx)
    for j = 1:length(gridy)
        cnt = cnt + 1;
        Num = Num + 1;
        xx = gridx(i);
        yy = gridy(j);
        XX = Gridx(i);
        YY = Gridy(j);    
        
        % ---start to segment------
        pan_p = PAN(XX:XX+size_h-1, YY:YY+size_h-1);% 64x64: signle pan patch
        pan(Num, :, :) = pan_p; % save single to a "pan" tensor: Nx64x64

        lms_p = LMS(XX:XX+size_h-1, YY:YY+size_h-1, :); % 64x64x8: signle lms patch
        lms(Num, :, :, :) = lms_p; % save single to a "lms" tensor: Nx64x64x8

        gt_p = GT(XX:XX+size_h-1, YY:YY+size_h-1, :); % 64x64x8: signle gt patch
        gt(Num, :, :, :) = gt_p; % save single to a "gt" tensor: Nx64x64x8
        
        ms_p  = MS(xx:xx+size_low-1, yy:yy+size_low-1, :); % 16x16x8: signle ms patch      
        ms(Num, :, :, :) = ms_p; % save single to a "ms" tensor: Nx16x16x8
              
        if Num == 1  % to see if there needs registration!
            maxval =  max(PAN(:));
            ww(:,:,1)=gt_p(:,:,3);  % gt
            ww(:,:,2)=gt_p(:,:,2);
            ww(:,:,3)=gt_p(:,:,1);
            kk(:,:,1)=lms_p(:,:,3);  % gt
            kk(:,:,2)=lms_p(:,:,2);
            kk(:,:,3)=lms_p(:,:,1);
            pp       = pan_p;   % pan        
            figure,
            subplot(1,3,1), imshow(double(ww)/maxval + 0.3); title('gt')
            subplot(1,3,2), imshow(double(pp)/maxval + 0.3); title('pan')
            subplot(1,3,3), imshow(double(kk)/maxval + 0.3); title('lms')
        end
        
     end
end
%% -----End loops--------

end


================================================
FILE: LICENSE
================================================
                    GNU GENERAL PUBLIC LICENSE
                       Version 3, 29 June 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU General Public License is a free, copyleft license for
software and other kinds of works.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.  We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors.  You can apply it to
your programs, too.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights.  Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.

  For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received.  You must make sure that they, too, receive
or can get the source code.  And you must show them these terms so they
know their rights.

  Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.

  For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software.  For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.

  Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so.  This is fundamentally incompatible with the aim of
protecting users' freedom to change the software.  The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable.  Therefore, we
have designed this version of the GPL to prohibit the practice for those
products.  If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.

  Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary.  To prevent this, the GPL assures that
patents cannot be used to render the program non-free.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Use with the GNU Affero General Public License.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time.  Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:

    <program>  Copyright (C) <year>  <name of author>
    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.

The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License.  Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.

  The GNU General Public License does not permit incorporating your program
into proprietary programs.  If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library.  If this is what you want to do, use the GNU Lesser General
Public License instead of this License.  But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.


================================================
FILE: README.md
================================================
# DLPan-Toolbox

* This toolbox is related to the paper ``Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks, IEEE Geoscience and Remote Sensing Magazine, 2022`` (see the following reference [1]). Download: [[paper]](https://github.com/liangjiandeng/liangjiandeng.github.io/tree/master/papers/2022/review-grsm2022.pdf).

* This is a deep learning (DL) toolbox for pansharpening, which can be used for training and testing getting the comparison between traditional and DL methods.
 

## Introduction
This toolbox mainly contains two parts: one is the pytorch source codes for the eight DL-based methods presented in the paper (i.e., the folder "01-DL toolbox (Pytorch)"); the other is the Matlab source codes which can simultaneously evaluate the performance of traditional and DL approaches in a uniformed framework ("02-Test toolbox for traditional and DL (Matlab)"). Please see more details:

- 01-DL-toolbox(Pytorch) contains source codes of DL methods, you may check the ``readme`` file for the usage.
- 02-Test-toolbox-for-traditional-and-DL(Matlab) contains Matlab source codes (mainly from 'G. Vivone et al., A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting pansharpening with classical and emerging pansharpening methods, IEEE GRSM, 2021', see the following reference [2]) for simultaneously evaluating traditional and DL approaches and outputing results, you may check the ``readme`` file for the usage. 
- 03-Data-Simulation(Matlab) contains Matlab source codes that are patching images to patches for training and validation. Also, you can simulate test examples by this toolbox.

Note that, readers also could check the structure and relationship of these two folders in the following ``overview figure`` (also find it in the respository).


<img src="overview.png" width = "90%" />


## Dataset
Due to the copyright issue, the datasets used in this GRSM paper are not available. Therefore, we recommend readers use the following dataset for pansharpening, both training and testing. The following dataset can be directly applied in our DLPan-Toolbox (put the data to the director for training: 01-DL-toolbox(Pytorch)/UDL/Data/pansharpening/training_data/).

- [[PanCollection](https://github.com/liangjiandeng/PanCollection)] for multispectral pansharpening
- [[HyperPanCollection](https://github.com/liangjiandeng/HyperPanCollection)] for hyperspectral pansharpening


## Citation
* [1] If you use this toolbox, please kindly cite our paper:

```bibtex
@ARTICLE{deng2022grsm,
author={L.-J. Deng, G. Vivone, M. E. Paoletti, G. Scarpa, J. He, Y. Zhang, J. Chanussot, and A. Plaza},
booktitle={IEEE Geoscience and Remote Sensing Magazine},
title={Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks},
year={2022},
pages={2-38},
doi={10.1109/MGRS.2020.3019315}
}
```


* [2] Also, the codes of traditional methods are from the "pansharpening toolbox for distribution", thus please cite the corresponding paper:
```bibtex
@ARTICLE{vivone2021grsm,
  author={Vivone, Gemine and Dalla Mura, Mauro and Garzelli, Andrea and Restaino, Rocco and Scarpa, Giuseppe and Ulfarsson, Magnus O. and   Alparone, Luciano and Chanussot, Jocelyn},
  journal={IEEE Geoscience and Remote Sensing Magazine}, 
  title={A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting Pansharpening With Classical and Emerging Pansharpening Methods}, 
  year={2021},
  volume={9},
  number={1},
  pages={53-81},
  doi={10.1109/MGRS.2020.3019315}
}
```

## Acknowledgement

- We appreciate the great contribution to this toolbox of [Xiao Wu](https://xiaoxiao-woo.github.io/) and Ran Ran, who are graduate students in [UESTC](https://www.uestc.edu.cn/).


## License & Copyright
This project is open sourced under GNU General Public License v3.0.


================================================
FILE: docs/en/DLPanToolbox/Evaluation.md
================================================
## Evaluation

================================================
FILE: docs/en/DLPanToolbox/Example.md
================================================


================================================
FILE: docs/en/DLPanToolbox/PreProcess.md
================================================
## Data PreProcess

### Image

This module provides some image processing methods, which requires `opencv` to be installed first.

#### Read/Write/Show

To read or write images files, use `imread` or `imwrite`.

```python
import mmcv

img = mmcv.imread('test.jpg')
img = mmcv.imread('test.jpg', flag='grayscale')
img_ = mmcv.imread(img)  # nothing will happen, img_ = img
mmcv.imwrite(img, 'out.jpg')
```

To read images from bytes

```python
with open('test.jpg', 'rb') as f:
    data = f.read()
img = mmcv.imfrombytes(data)
```

To show an image file or a loaded image

```python
mmcv.imshow('tests/data/color.jpg')
# this is equivalent to

for i in range(10):
    img = np.random.randint(256, size=(100, 100, 3), dtype=np.uint8)
    mmcv.imshow(img, win_name='test image', wait_time=200)
```

#### Color space conversion

Supported conversion methods:

- bgr2gray
- gray2bgr
- bgr2rgb
- rgb2bgr
- bgr2hsv
- hsv2bgr

```python
img = mmcv.imread('tests/data/color.jpg')
img1 = mmcv.bgr2rgb(img)
img2 = mmcv.rgb2gray(img1)
img3 = mmcv.bgr2hsv(img)
```

#### Resize

There are three resize methods. All `imresize_*` methods have an argument `return_scale`,
if this argument is `False`, then the return value is merely the resized image, otherwise
is a tuple `(resized_img, scale)`.

```python
# resize to a given size
mmcv.imresize(img, (1000, 600), return_scale=True)

# resize to the same size of another image
mmcv.imresize_like(img, dst_img, return_scale=False)

# resize by a ratio
mmcv.imrescale(img, 0.5)

# resize so that the max edge no longer than 1000, short edge no longer than 800
# without changing the aspect ratio
mmcv.imrescale(img, (1000, 800))
```

#### Rotate

To rotate an image by some angle, use `imrotate`. The center can be specified,
which is the center of original image by default. There are two modes of rotating,
one is to keep the image size unchanged so that some parts of the image will be
cropped after rotating, the other is to extend the image size to fit the rotated
image.

```python
img = mmcv.imread('tests/data/color.jpg')

# rotate the image clockwise by 30 degrees.
img_ = mmcv.imrotate(img, 30)

# rotate the image counterclockwise by 90 degrees.
img_ = mmcv.imrotate(img, -90)

# rotate the image clockwise by 30 degrees, and rescale it by 1.5x at the same time.
img_ = mmcv.imrotate(img, 30, scale=1.5)

# rotate the image clockwise by 30 degrees, with (100, 100) as the center.
img_ = mmcv.imrotate(img, 30, center=(100, 100))

# rotate the image clockwise by 30 degrees, and extend the image size.
img_ = mmcv.imrotate(img, 30, auto_bound=True)
```

#### Flip

To flip an image, use `imflip`.

```python
img = mmcv.imread('tests/data/color.jpg')

# flip the image horizontally
mmcv.imflip(img)

# flip the image vertically
mmcv.imflip(img, direction='vertical')
```

#### Crop

`imcrop` can crop the image with one or more regions. Each region is represented by the upper left and lower right coordinates as (x1, y1, x2, y2).

```python
import mmcv
import numpy as np

img = mmcv.imread('tests/data/color.jpg')

# crop the region (10, 10, 100, 120)
bboxes = np.array([10, 10, 100, 120])
patch = mmcv.imcrop(img, bboxes)

# crop two regions (10, 10, 100, 120) and (0, 0, 50, 50)
bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]])
patches = mmcv.imcrop(img, bboxes)

# crop two regions, and rescale the patches by 1.2x
patches = mmcv.imcrop(img, bboxes, scale=1.2)
```

#### Padding

There are two methods, `impad` and `impad_to_multiple`, to pad an image to the
specific size with given values.

```python
img = mmcv.imread('tests/data/color.jpg')

# pad the image to (1000, 1200) with all zeros
img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0)

# pad the image to (1000, 1200) with different values for three channels.
img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=(100, 50, 200))

# pad the image on left, right, top, bottom borders with all zeros
img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0)

# pad the image on left, right, top, bottom borders with different values
# for three channels.
img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=(100, 50, 200))

# pad an image so that each edge is a multiple of some value.
img_ = mmcv.impad_to_multiple(img, 32)
```

================================================
FILE: docs/en/DLPanToolbox/Simulation.md
================================================
## Simulation

================================================
FILE: docs/en/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = source
BUILDDIR      = build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

================================================
FILE: docs/en/_static/css/readthedocs.css
================================================
.header-logo {
    background-image: url("../image/logo-dlpan.png");
    background-size: 160px 40px;
    height: 40px;
    width: 160px;
}

table.colwidths-auto td {
    width: 50%
}

================================================
FILE: docs/en/_templates/classtemplate.rst
================================================
.. role:: hidden
    :class: hidden-section
.. currentmodule:: {{ module }}


{{ name | underline}}

.. autoclass:: {{ name }}
    :members:


..
  autogenerated from source/_templates/classtemplate.rst
  note it does not have :inherited-members:

================================================
FILE: docs/en/citation.md
================================================
## Cite DLPan-Toolbox
If DLPan-Toolbox is helpful for you,  you are encouraged to cite the following paper:
```bibtex
@ARTICLE{deng2022grsm,
author={L.-J. Deng, G. Vivone, M. E. Paoletti, G. Scarpa, J. He, Y. Zhang, J. Chanussot, and A. Plaza},
booktitle={IEEE Geoscience and Remote Sensing Magazine},
title={Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks},
year={2022},
pages={2-38},
doi={10.1109/MGRS.2020.3019315}
}
```

Also, the codes of traditional methods are from the "pansharpening toolbox for distribution", thus please cite the corresponding paper:
```bibtex
@ARTICLE{vivone2021grsm,
  author={Vivone, Gemine and Dalla Mura, Mauro and Garzelli, Andrea and Restaino, Rocco and Scarpa, Giuseppe and Ulfarsson, Magnus O. and   Alparone, Luciano and Chanussot, Jocelyn},
  journal={IEEE Geoscience and Remote Sensing Magazine}, 
  title={A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting Pansharpening With Classical and Emerging Pansharpening Methods}, 
  year={2021},
  volume={9},
  number={1},
  pages={53-81},
  doi={10.1109/MGRS.2020.3019315}
}
```

================================================
FILE: docs/en/conf.py
================================================
#  GPL v3.0 License
#  Copyright (C) UESTC
#  All Rights Reserved
#  @Time    : 2023/9/21
#  @Author  : Xiao Wu
#  @reference:
#

#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys

import pytorch_sphinx_theme
from sphinx.builders.html import StandaloneHTMLBuilder

sys.path.insert(0, os.path.abspath('../..'))

# version_file = '../../mmcv/version.py'
# with open(version_file) as f:
#     exec(compile(f.read(), version_file, 'exec'))
# __version__ = locals()['__version__']
__version__ = "0.3.6"

# -- Project information -----------------------------------------------------

project = 'DLPan-Toolbox'
copyright = '2023, UESTC'
author = 'Xiao Wu'

# The short X.Y version
version = __version__
# The full version, including alpha/beta/rc tags
release = __version__

# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.

extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.autosummary',
    'sphinx.ext.intersphinx',
    'sphinx.ext.napoleon',
    'sphinx.ext.viewcode',
    'sphinx_markdown_tables',
    'myst_parser',
    'sphinx_copybutton',
]  # yapf: disable

myst_heading_anchors = 4

myst_enable_extensions = ['colon_fence']

# Configuration for intersphinx
intersphinx_mapping = {
    'python': ('https://docs.python.org/3', None),
    'numpy': ('https://numpy.org/doc/stable', None),
    'torch': ('https://pytorch.org/docs/stable/', None)
}

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {
    '.rst': 'restructuredtext',
    '.md': 'markdown',
}

# The master toctree document.
master_doc = 'index'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
# html_theme = 'sphinx_rtd_theme'
html_theme = 'pytorch_sphinx_theme'
html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
    'menu': [
        {
            'name': 'GitHub',
            'url': 'https://github.com/liangjiandeng/DLPan-Toolbox'
        },
    ],
    # Specify the language of shared menu
    'menu_lang': 'en',
}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = ['css/readthedocs.css']

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}

# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'dlpantoolboxndoc'

# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',

    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',

    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',

    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'dlpantoolbox.tex', 'dlpantoolbox Documentation', 'DLPanToolbox Contributors',
     'manual'),
]

# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, 'dlpantoolbox', 'dlpantoolbox Documentation', [author], 1)]

# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (master_doc, 'dlpantoolbox', 'dlpantoolbox Documentation', author, 'dlpantoolbox',
     'One line description of project.', 'Miscellaneous'),
]

# -- Options for Epub output -------------------------------------------------

# Bibliographic Dublin Core info.
epub_title = project

# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''

# A unique identification for the text.
#
# epub_uid = ''

# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']

# set priority when building html
StandaloneHTMLBuilder.supported_image_types = [
    'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
]
# -- Extension configuration -------------------------------------------------
# Ignore >>> when copying code
copybutton_prompt_text = r'>>> |\.\.\. '
copybutton_prompt_is_regexp = True

================================================
FILE: docs/en/docutils.conf
================================================
[html writers]
table_style: colwidths-auto

================================================
FILE: docs/en/faq.md
================================================
## Frequently Asked Questions

We list some common troubles faced by many users and their corresponding solutions here.
Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them.

### Installation

- KeyError: "xxx: 'yyy is not in the zzz registry'"

  The registry mechanism will be triggered only when the file of the module is imported.
  So you need to import that file somewhere. More details can be found at [KeyError: "MaskRCNN: 'RefineRoIHead is not in the models registry'"](https://github.com/open-mmlab/mmdetection/issues/5974).

- "No module named 'mmcv.ops'"; "No module named 'mmcv.\_ext'"

  1. Uninstall existing mmcv in the environment using `pip uninstall mmcv`
  2. Install mmcv-full following the [installation instruction](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) or [Build MMCV from source](https://mmcv.readthedocs.io/en/latest/get_started/build.html)

- "invalid device function" or "no kernel image is available for execution"

  1. Check the CUDA compute capability of you GPU
  2. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built for the correct GPU architecture. You may need to set `TORCH_CUDA_ARCH_LIST` to reinstall MMCV. The compatibility issue could happen when  using old GPUS, e.g., Tesla K80 (3.7) on colab.
  3. Check whether the running environment is the same as that when mmcv/mmdet is compiled. For example, you may compile mmcv using CUDA 10.0 bug run it on CUDA9.0 environments

- "undefined symbol" or "cannot open xxx.so"

  1. If those symbols are CUDA/C++ symbols (e.g., libcudart.so or GLIBCXX), check
     whether the CUDA/GCC runtimes are the same as those used for compiling mmcv
  2. If those symbols are Pytorch symbols (e.g., symbols containing caffe, aten, and TH), check whether the Pytorch version is the same as that used for compiling mmcv
  3. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built by and running on the same environment

- "RuntimeError: CUDA error: invalid configuration argument"

  This error may be caused by the poor performance of GPU. Try to decrease the value of [THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10)
  and recompile mmcv.

- "RuntimeError: nms is not compiled with GPU support"

  This error is because your CUDA environment is not installed correctly.
  You may try to re-install your CUDA environment and then delete the build/ folder before re-compile mmcv.

- "Segmentation fault"

  1. Check your GCC version and use GCC >= 5.4. This usually caused by the incompatibility between PyTorch and the environment (e.g., GCC \< 4.9 for PyTorch). We also recommend the users to avoid using GCC 5.5 because many feedbacks report that GCC 5.5 will cause "segmentation fault" and simply changing it to GCC 5.4 could solve the problem
  2. Check whether PyTorch is correctly installed and could use CUDA op, e.g. type the following command in your terminal and see whether they could correctly output results
     ```shell
     python -c 'import torch; print(torch.cuda.is_available())'
     ```
  3. If PyTorch is correctly installed, check whether MMCV is correctly installed. If MMCV is correctly installed, then there will be no issue of the command
     ```shell
     python -c 'import mmcv; import mmcv.ops'
     ```
  4. If MMCV and PyTorch are correctly installed, you can use `ipdb` to set breakpoints or directly add `print` to debug and see which part leads the `segmentation fault`

- "libtorch_cuda_cu.so: cannot open shared object file"

  `mmcv-full` depends on the share object but it can not be found. We can check whether the object exists in `~/miniconda3/envs/{environment-name}/lib/python3.7/site-packages/torch/lib` or try to re-install the PyTorch.

- "fatal error C1189: #error:  -- unsupported Microsoft Visual Studio version!"

  If you are building mmcv-full on Windows and the version of CUDA is 9.2, you will probably encounter the error `"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include\crt/host_config.h(133): fatal error C1189: #error:  -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!"`, in which case you can use a lower version of Microsoft Visual Studio like vs2017.

- "error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized"

  If your version of PyTorch is 1.5.0 and you are building mmcv-full on Windows, you will probably encounter the error `- torch/csrc/jit/api/module.h(474): error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized`. The way to solve the error is to replace all the `static constexpr bool all_slots = false;` with `static bool all_slots = false;` at this file `https://github.com/pytorch/pytorch/blob/v1.5.0/torch/csrc/jit/api/module.h`. More details can be found at [member "torch::jit::detail::AttributePolicy::all_slots" may not be initialized](https://github.com/pytorch/pytorch/issues/39394).

- "error: a member with an in-class initializer must be const"

  If your version of PyTorch is 1.6.0 and you are building mmcv-full on Windows, you will probably encounter the error `"- torch/include\torch/csrc/jit/api/module.h(483): error: a member with an in-class initializer must be const"`. The way to solve the error is to replace all the `CONSTEXPR_EXCEPT_WIN_CUDA ` with `const` at `torch/include\torch/csrc/jit/api/module.h`. More details can be found at [Ninja: build stopped: subcommand failed](https://github.com/open-mmlab/mmcv/issues/575).

- "error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized"

  If your version of PyTorch is 1.7.0 and you are building mmcv-full on Windows, you will probably encounter the error `torch/include\torch/csrc/jit/ir/ir.h(1347): error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized`. The way to solve the error needs to modify several local files of PyTorch:

  - delete `static constexpr Symbol Kind = ::c10::prim::profile;` and `tatic constexpr Symbol Kind = ::c10::prim::profile_optional;` at `torch/include\torch/csrc/jit/ir/ir.h`
  - replace `explicit operator type&() { return *(this->value); }` with `explicit operator type&() { return *((type*)this->value); }` at `torch\include\pybind11\cast.h`
  - replace all the `CONSTEXPR_EXCEPT_WIN_CUDA` with `const` at `torch/include\torch/csrc/jit/api/module.h`

  More details can be found at [Ensure default extra_compile_args](https://github.com/pytorch/pytorch/pull/45956).

- Compatibility issue between MMCV and MMDetection; "ConvWS is already registered in conv layer"

  Please install the correct version of MMCV for the version of your MMDetection following the [installation instruction](https://mmdetection.readthedocs.io/en/latest/get_started.html#installation).

### Usage

- "RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one"

  1. This error indicates that your module has parameters that were not used in producing loss. This phenomenon may be caused by running different branches in your code in DDP mode. More datails at [Expected to have finished reduction in the prior iteration before starting a new one](https://github.com/pytorch/pytorch/issues/55582).
  2. You can set ` find_unused_parameters = True` in the config to solve the above problems or find those unused parameters manually

- "RuntimeError: Trying to backward through the graph a second time"

  `GradientCumulativeOptimizerHook` and `OptimizerHook` are both set which causes the `loss.backward()` to be called twice so `RuntimeError` was raised. We can only use one of these. More datails at [Trying to backward through the graph a second time](https://github.com/open-mmlab/mmcv/issues/1379).

================================================
FILE: docs/en/get_started/Installation.md
================================================
## Installation

There are two versions of MMCV:

- **mmcv**: comprehensive, with full features and various CUDA ops out of box. It takes longer time to build.
- **mmcv-lite**: lite, without CUDA ops but all other features, similar to mmcv\<1.0.0. It is useful when you do not need those CUDA ops.

```{warning}
Do not install both versions in the same environment, otherwise you may encounter errors like `ModuleNotFound`. You need to uninstall one before installing the other. `Installing the full version is highly recommended if CUDA is avaliable`.
```

### Install mmcv

Before installing mmcv, make sure that PyTorch has been successfully installed following the [PyTorch official installation guide](https://pytorch.org/get-started/locally/#start-locally). This can be verified using the following command

```bash
python -c 'import torch;print(torch.__version__)'
```

If version information is output, then PyTorch is installed.

#### Install with mim (recommended)

[mim](https://github.com/open-mmlab/mim) is the package management tool for the OpenMMLab projects, which makes it easy to install mmcv

```bash
pip install -U openmim
mim install mmcv
```

If you find that the above installation command does not use a pre-built package ending with `.whl` but a source package ending with `.tar.gz`, you may not have a pre-build package corresponding to the PyTorch or CUDA or mmcv version, in which case you can [build mmcv from source](build.md).

<details>
<summary>Installation log using pre-built packages</summary>

Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html<br />
Collecting mmcv<br />
<b>Downloading https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/mmcv-2.0.0-cp38-cp38-manylinux1_x86_64.whl</b>

</details>

<details>
<summary>Installation log using source packages</summary>

Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html<br />
Collecting mmcv==2.0.0<br />
<b>Downloading mmcv-2.0.0.tar.gz</b>

</details>

To install a specific version of mmcv, for example, mmcv version 2.0.0, you can use the following command

```bash
mim install mmcv==2.0.0
```

:::{note}
If you would like to use `opencv-python-headless` instead of `opencv-python`,
e.g., in a minimum container environment or servers without GUI,
you can first install it before installing MMCV to skip the installation of `opencv-python`.

Alternatively, if it takes too long to install a dependency library, you can specify the pypi source

```bash
mim install mmcv -i https://pypi.tuna.tsinghua.edu.cn/simple
```

:::

You can run [check_installation.py](https://github.com/open-mmlab/mmcv/blob/main/.dev_scripts/check_installation.py) to check the installation of mmcv-full after running the installation commands.

#### Install with pip

Use the following command to check the version of CUDA and PyTorch

```bash
python -c 'import torch;print(torch.__version__);print(torch.version.cuda)'
```

Select the appropriate installation command depending on the type of system, CUDA version, PyTorch version, and MMCV version

<html>
<body>
    <style>
      select {
          z-index: 1000;
          position: absolute;
          top: 10px;
          width: 6.7rem;
      }
      #select-container {
          position: relative;
          height: 30px;
      }
      #select-cmd {
          background-color: #f5f6f7;
          font-size: 14px;
          margin-top: 20px;
      }
      /* 让每一个都间隔1.3rem */
      #select-os {
          /* left: 1.375rem; */
          left: 0;
      }
      #select-cuda {
          /* left: 9.375rem;    9.375 = 1.375 + 6.7 + 1.3 */
          left: 8rem;
      }
      #select-torch {
          /* left: 17.375rem;    17.375 = 9.375 + 6.7 + 1.3 */
          left: 16rem;
      }
      #select-mmcv {
          /* left: 25.375rem;    25.375 = 17.375 + 6.7 + 1.3 */
          left: 24rem;
      }
    </style>
    <div id="select-container">
        <select
            onmousedown="handleSelectMouseDown(this.id)"
            onblur="handleSelectBlur(this.id)"
            onchange="changeOS(this.value)"
            id="select-os">
        </select>
        <select
            onmousedown="handleSelectMouseDown(this.id)"
            onblur="handleSelectBlur(this.id)"
            onchange="changeCUDA(this.value)"
            id="select-cuda">
        </select>
        <select
            onmousedown="handleSelectMouseDown(this.id)"
            onblur="handleSelectBlur(this.id)"
            onchange="changeTorch(this.value)"
            id="select-torch">
        </select>
        <select
            onmousedown="handleSelectMouseDown(this.id)"
            onblur="handleSelectBlur(this.id)"
            onchange="changeMMCV(this.value)"
            id="select-mmcv">
        </select>
    </div>
    <pre id="select-cmd"></pre>
</body>
<script>
    let osVal, cudaVal, torchVal, mmcvVal;
    function changeMMCV(val) {
        mmcvVal = val;
        change("select-mmcv");
    }
    function changeTorch(val) {
        torchVal = val;
        change("select-torch");
    }
    function changeCUDA(val) {
        cudaVal = val;
        change("select-cuda");
    }
    function changeOS(val) {
        osVal = val;
        change("select-os");
    }
    function handleSelectMouseDown(id) {
        const dom = document.getElementById(id);
        if (!dom) return;
        const len = dom?.options?.length;
        if (len >= 9) {
            dom.size = 10;
            dom.style.zIndex = 100;
        }
    }
    function handleSelectClick() {
        const selects = Array.from(document.getElementsByTagName("select"));
        selects.forEach(select => {
            select.size = 1;
        });
    }
    function handleSelectBlur(id) {
        const dom = document.getElementById(id);
        if (!dom) {
            handleSelectClick();
            return;
        }
        dom.size = 1;
        dom.style.zIndex = 1;
    }
    function changeCmd() {
        const cmd = document.getElementById("select-cmd");
        let cmdString = "pip install mmcv=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html";
        // e.g: pip install mmcv==2.0.0rc1 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9/index.html
        let cudaVersion;
        if (cudaVal === "cpu" || cudaVal === "mps") {
            cudaVersion = "cpu";
        } else {
            cudaVersion = `cu${cudaVal.split(".").join("")}`;
        }
        const torchVersion = `torch${torchVal.substring(0, torchVal.length - 2)}`;
        cmdString = cmdString.replace("{cu_version}", cudaVersion).replace("{mmcv_version}", mmcvVal).replace("{torch_version}", torchVersion);
        cmd.textContent = cmdString;
    }
    function unique(arr) {
        if (!arr || !Array.isArray(arr)) return [];
        return [...new Set(arr)];
    }
    function genOptionFragment(data, id) {
        const name = id.includes("-")? id.split("-")[1] : id;
        const fragment = new DocumentFragment();
        data.forEach(option => {
            const ele = document.createElement("option");
            let text = `${name} ${option}`;
            if (name === "os" || option.toUpperCase() === "CPU" || option.toUpperCase() === "MPS") {
                text = `${option}`;
            }
            ele.textContent = text;
            ele.value = option;
            ele.addEventListener('click', handleSelectClick);
            fragment.appendChild(ele);
        });
        return fragment;
    }
    function findAndAppend(data, id) {
        const fragment = genOptionFragment(data, id);
        const dom = document.getElementById(id);
        if (dom) dom.replaceChildren(fragment);
    }
    function change(id) {
        const order = ["select-mmcv", "select-torch", "select-cuda", "select-os"];
        const idx = order.indexOf(id);
        if (idx === -1) return;
        const versionDetail = version[osVal];
        if (idx >= 3) {
            let cuda = [];
            versionDetail.forEach(v => {
                cuda.push(v.cuda);
            });
            cuda = unique(cuda);
            cudaVal = cuda[0];
            findAndAppend(cuda, "select-cuda");
        }
        if (idx >= 2) {
            const torch = [];
            versionDetail.forEach(v => {
                if (v.cuda === cudaVal) torch.push(v.torch);
            });
            torchVal = torch[0];
            findAndAppend(torch, "select-torch");
        }
        if (idx >= 1) {
            let mmcv = [];
            versionDetail.forEach(v => {
                if (v.cuda === cudaVal && v.torch === torchVal) mmcv = v.mmcv;
            });
            mmcvVal = mmcv[0];
            findAndAppend(mmcv, "select-mmcv");
        }
        changeCmd();
    }
    function init() {
        document.addEventListener("click", handleSelectBlur);
        const version = window.version;
        const os = Object.keys(version);
        osVal = os[0];
        findAndAppend(os, "select-os");
        change("select-os");
        changeCmd();
    }
    window.onload = function () {
        const url = "../_static/version.json"
        const request = new XMLHttpRequest();
        request.open("get", url);
        request.send(null);
        request.onload = function () {
            if (request.status !== 200) return;
            const data = JSON.parse(request.responseText);
            window.version = data;
            init();
        }
    }
</script>
</html>

If you do not find a corresponding version in the dropdown box above, you probably do not have a pre-built package corresponding to the PyTorch or CUDA or mmcv version, at which point you can [build mmcv from source](build.md).

:::{note}
mmcv is only compiled on PyTorch 1.x.0 because the compatibility
usually holds between 1.x.0 and 1.x.1. If your PyTorch version is 1.x.1, you
can install mmcv compiled with PyTorch 1.x.0 and it usually works well.
For example, if your PyTorch version is 1.8.1, you can feel free to choose 1.8.x.
:::

:::{note}
If you would like to use `opencv-python-headless` instead of `opencv-python`,
e.g., in a minimum container environment or servers without GUI,
you can first install it before installing MMCV to skip the installation of `opencv-python`.

Alternatively, if it takes too long to install a dependency library, you can specify the pypi source

```bash
mim install mmcv -i https://pypi.tuna.tsinghua.edu.cn/simple
```

:::

You can run [check_installation.py](https://github.com/open-mmlab/mmcv/blob/main/.dev_scripts/check_installation.py) to check the installation of mmcv after running the installation commands.

#### Using mmcv with Docker

Build with local repository

```bash
git clone https://github.com/open-mmlab/mmcv.git && cd mmcv
docker build -t mmcv -f docker/release/Dockerfile .
```

Or build with remote repository

```bash
docker build -t mmcv https://github.com/open-mmlab/mmcv.git#main:docker/release
```

The [Dockerfile](release/Dockerfile) installs latest released version of mmcv-full by default, but you can specify mmcv versions to install expected versions.

```bash
docker image build -t mmcv -f docker/release/Dockerfile --build-arg MMCV=2.0.0 .
```

If you also want to use other versions of PyTorch and CUDA, you can also pass them when building docker images.

An example to build an image with PyTorch 1.11 and CUDA 11.3.

```bash
docker build -t mmcv -f docker/release/Dockerfile \
    --build-arg PYTORCH=1.11.0 \
    --build-arg CUDA=11.3 \
    --build-arg CUDNN=8 \
    --build-arg MMCV=2.0.0 .
```

More available versions of PyTorch and CUDA can be found at [dockerhub/pytorch](https://hub.docker.com/r/pytorch/pytorch/tags).

### Install mmcv-lite

If you need to use PyTorch-related modules, make sure PyTorch has been successfully installed in your environment by referring to the [PyTorch official installation guide](https://github.com/pytorch/pytorch#installation).

```python
pip install mmcv-lite
```

================================================
FILE: docs/en/get_started/Introduction.md
================================================
## Introduction

MMCV is a foundational library for computer vision research and provides the following functionalities.

- [Image/Video processing](../understand_mmcv/data_process.md)
- [Image and annotation visualization](../understand_mmcv/visualization.md)
- [Image transformation](../understand_mmcv/data_transform.md)
- [Various CNN architectures](../understand_mmcv/cnn.md)
- [High-quality implementation of common CUDA ops](../understand_mmcv/ops.md)

It supports the following systems:

- Linux
- Windows
- macOS

It supports many research projects as below:

- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark.
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark.
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection.
- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark.
- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO series toolbox and benchmark.
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark.
- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox.
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark.
- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning toolbox and benchmark.
- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab model compression toolbox and benchmark.
- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab fewshot learning toolbox and benchmark.
- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark.
- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark.
- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark.
- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework.

================================================
FILE: docs/en/index.rst
================================================
Welcome to DLPanToolbox's documentation!
================================


You can switch between Chinese and English documents in the lower-left corner of the layout.

.. toctree::
   :caption: Switch Language

   switch_language.md

.. toctree::
   :glob:
   :caption: Get Started

   get_started/Introduction.md
   get_started/Installation.md

.. toctree::
   :glob:
   :caption: PanCollection

   PanCollection/Simulation.md
   PanCollection/PreProcess.md
   PanCollection/Example.md
   PanCollection/Evaluation.md

.. toctree::
   :caption: Utilization

   citation.md
   faq.md

.. toctree::
   :glob:
   :maxdepth: 2
   :caption: Python API


Indices and tables
==================

* :ref:`genindex`
* :ref:`search`

================================================
FILE: docs/en/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd

================================================
FILE: docs/en/switch_language.md
================================================
## <a href='https://DLPan-Toolbox.readthedocs.io/en/latest/'>Change to English</a>

## <a href='https://DLPan-Toolbox.readthedocs.io/zh_CN/latest/'>切换到简体中文</a>

================================================
FILE: docs/requirements.txt
================================================
-e git+https://github.com/XiaoXiao-Woo/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
sphinx==4.0.2
sphinx-copybutton
sphinx_markdown_tables>=0.0.16
myst-parser
sphinx-autobuild

================================================
FILE: docs/run.sh
================================================
sphinx-autobuild en build/html

================================================
FILE: docs/zh-cn/DLPanToolbox/Evaluation.md
================================================
## Evaluation

================================================
FILE: docs/zh-cn/DLPanToolbox/Example.md
================================================


================================================
FILE: docs/zh-cn/DLPanToolbox/PreProcess.md
================================================
## Data PreProcess

### Image

This module provides some image processing methods, which requires `opencv` to be installed first.

#### Read/Write/Show

To read or write images files, use `imread` or `imwrite`.

```python
import mmcv

img = mmcv.imread('test.jpg')
img = mmcv.imread('test.jpg', flag='grayscale')
img_ = mmcv.imread(img)  # nothing will happen, img_ = img
mmcv.imwrite(img, 'out.jpg')
```

To read images from bytes

```python
with open('test.jpg', 'rb') as f:
    data = f.read()
img = mmcv.imfrombytes(data)
```

To show an image file or a loaded image

```python
mmcv.imshow('tests/data/color.jpg')
# this is equivalent to

for i in range(10):
    img = np.random.randint(256, size=(100, 100, 3), dtype=np.uint8)
    mmcv.imshow(img, win_name='test image', wait_time=200)
```

#### Color space conversion

Supported conversion methods:

- bgr2gray
- gray2bgr
- bgr2rgb
- rgb2bgr
- bgr2hsv
- hsv2bgr

```python
img = mmcv.imread('tests/data/color.jpg')
img1 = mmcv.bgr2rgb(img)
img2 = mmcv.rgb2gray(img1)
img3 = mmcv.bgr2hsv(img)
```

#### Resize

There are three resize methods. All `imresize_*` methods have an argument `return_scale`,
if this argument is `False`, then the return value is merely the resized image, otherwise
is a tuple `(resized_img, scale)`.

```python
# resize to a given size
mmcv.imresize(img, (1000, 600), return_scale=True)

# resize to the same size of another image
mmcv.imresize_like(img, dst_img, return_scale=False)

# resize by a ratio
mmcv.imrescale(img, 0.5)

# resize so that the max edge no longer than 1000, short edge no longer than 800
# without changing the aspect ratio
mmcv.imrescale(img, (1000, 800))
```

#### Rotate

To rotate an image by some angle, use `imrotate`. The center can be specified,
which is the center of original image by default. There are two modes of rotating,
one is to keep the image size unchanged so that some parts of the image will be
cropped after rotating, the other is to extend the image size to fit the rotated
image.

```python
img = mmcv.imread('tests/data/color.jpg')

# rotate the image clockwise by 30 degrees.
img_ = mmcv.imrotate(img, 30)

# rotate the image counterclockwise by 90 degrees.
img_ = mmcv.imrotate(img, -90)

# rotate the image clockwise by 30 degrees, and rescale it by 1.5x at the same time.
img_ = mmcv.imrotate(img, 30, scale=1.5)

# rotate the image clockwise by 30 degrees, with (100, 100) as the center.
img_ = mmcv.imrotate(img, 30, center=(100, 100))

# rotate the image clockwise by 30 degrees, and extend the image size.
img_ = mmcv.imrotate(img, 30, auto_bound=True)
```

#### Flip

To flip an image, use `imflip`.

```python
img = mmcv.imread('tests/data/color.jpg')

# flip the image horizontally
mmcv.imflip(img)

# flip the image vertically
mmcv.imflip(img, direction='vertical')
```

#### Crop

`imcrop` can crop the image with one or more regions. Each region is represented by the upper left and lower right coordinates as (x1, y1, x2, y2).

```python
import mmcv
import numpy as np

img = mmcv.imread('tests/data/color.jpg')

# crop the region (10, 10, 100, 120)
bboxes = np.array([10, 10, 100, 120])
patch = mmcv.imcrop(img, bboxes)

# crop two regions (10, 10, 100, 120) and (0, 0, 50, 50)
bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]])
patches = mmcv.imcrop(img, bboxes)

# crop two regions, and rescale the patches by 1.2x
patches = mmcv.imcrop(img, bboxes, scale=1.2)
```

#### Padding

There are two methods, `impad` and `impad_to_multiple`, to pad an image to the
specific size with given values.

```python
img = mmcv.imread('tests/data/color.jpg')

# pad the image to (1000, 1200) with all zeros
img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0)

# pad the image to (1000, 1200) with different values for three channels.
img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=(100, 50, 200))

# pad the image on left, right, top, bottom borders with all zeros
img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0)

# pad the image on left, right, top, bottom borders with different values
# for three channels.
img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=(100, 50, 200))

# pad an image so that each edge is a multiple of some value.
img_ = mmcv.impad_to_multiple(img, 32)
```

================================================
FILE: docs/zh-cn/DLPanToolbox/Simulation.md
================================================
## Simulation

================================================
FILE: docs/zh-cn/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = source
BUILDDIR      = build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

================================================
FILE: docs/zh-cn/_static/css/readthedocs.css
================================================
.header-logo {
    background-image: url("../image/logo-dlpan.png");
    background-size: 85px 40px;
    height: 40px;
    width: 85px;
}

table.colwidths-auto td {
    width: 50%
}

================================================
FILE: docs/zh-cn/_templates/classtemplate.rst
================================================
.. role:: hidden
    :class: hidden-section
.. currentmodule:: {{ module }}


{{ name | underline}}

.. autoclass:: {{ name }}
    :members:


..
  autogenerated from source/_templates/classtemplate.rst
  note it does not have :inherited-members:

================================================
FILE: docs/zh-cn/citation.md
================================================
## Cite PanCollecton
If PanCollection is helpful for you,  you are encouraged to cite the following paper:
```bibtex
@misc{PanCollection,
    author = {Xiao Wu, Liang-Jian Deng and Ran Ran},
    title = {"PanCollection" for Remote Sensing Pansharpening},
    url = {https://github.com/XiaoXiao-Woo/PanCollection/},
    year = {2022},
}
```
```bibtex
@ARTICLE{deng2022grsm,
author={L.-J. Deng, G. Vivone, M. E. Paoletti, G. Scarpa, J. He, Y. Zhang, J. Chanussot, and A. Plaza},
booktitle={IEEE Geoscience and Remote Sensing Magazine},
title={Machine Learning in Pansharpening: A Benchmark, from Shallow to Deep Networks},
year={2022},
pages={2-38},
doi={10.1109/MGRS.2020.3019315}
}
```
For Chinese Paper,
```bibtex
@ARTICLE{dengjig2022,
	author={邓良剑，冉燃，吴潇，张添敬},
	journal={中国图象图形学报},
	title={遥感图像全色锐化的卷积神经网络方法研究进展},
 	year={2022},
  	volume={},
  	number={9},
  	pages={},
  	doi={10.11834/jig.220540}
   }
```
Also, the codes of traditional methods are from the "pansharpening toolbox for distribution", thus please cite the corresponding paper:
```bibtex
@ARTICLE{vivone2021grsm,
  author={Vivone, Gemine and Dalla Mura, Mauro and Garzelli, Andrea and Restaino, Rocco and Scarpa, Giuseppe and Ulfarsson, Magnus O. and   Alparone, Luciano and Chanussot, Jocelyn},
  journal={IEEE Geoscience and Remote Sensing Magazine}, 
  title={A New Benchmark Based on Recent Advances in Multispectral Pansharpening: Revisiting Pansharpening With Classical and Emerging Pansharpening Methods}, 
  year={2021},
  volume={9},
  number={1},
  pages={53-81},
  doi={10.1109/MGRS.2020.3019315}
}
```

================================================
FILE: docs/zh-cn/conf.py
================================================
#  GPL v3.0 License
#  Copyright (C) UESTC
#  All Rights Reserved
#  @Time    : 2023/9/21
#  @Author  : Xiao Wu
#  @reference:
#

#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys

import pytorch_sphinx_theme
from sphinx.builders.html import StandaloneHTMLBuilder

sys.path.insert(0, os.path.abspath('../..'))

# version_file = '../../mmcv/version.py'
# with open(version_file) as f:
#     exec(compile(f.read(), version_file, 'exec'))
# __version__ = locals()['__version__']
__version__ = "0.3.6"

# -- Project information -----------------------------------------------------

project = 'pancollection'
copyright = '2023, UESTC'
author = 'Xiao Wu'

# The short X.Y version
version = __version__
# The full version, including alpha/beta/rc tags
release = __version__

# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.

extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.autosummary',
    'sphinx.ext.intersphinx',
    'sphinx.ext.napoleon',
    'sphinx.ext.viewcode',
    'sphinx_markdown_tables',
    'myst_parser',
    'sphinx_copybutton',
]  # yapf: disable

myst_heading_anchors = 4

myst_enable_extensions = ['colon_fence']

# Configuration for intersphinx
intersphinx_mapping = {
    'python': ('https://docs.python.org/3', None),
    'numpy': ('https://numpy.org/doc/stable', None),
    'torch': ('https://pytorch.org/docs/stable/', None)
}

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {
    '.rst': 'restructuredtext',
    '.md': 'markdown',
}

# The master toctree document.
master_doc = 'index'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
# html_theme = 'sphinx_rtd_theme'
html_theme = 'pytorch_sphinx_theme'
html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
    'menu': [
        {
            'name': 'GitHub',
            'url': 'https://github.com/XiaoXiao-Woo/PanCollection'
        },
    ],
    # Specify the language of shared menu
    'menu_lang': 'en',
}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = ['css/readthedocs.css']

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}

# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'pancollectiondoc'

# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',

    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',

    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',

    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'pancollection.tex', 'pancollection Documentation', 'PanCollection Contributors',
     'manual'),
]

# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, 'pancollection', 'pancollection Documentation', [author], 1)]

# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (master_doc, 'pancollection', 'pancollection Documentation', author, 'pancollection',
     'One line description of project.', 'Miscellaneous'),
]

# -- Options for Epub output -------------------------------------------------

# Bibliographic Dublin Core info.
epub_title = project

# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''

# A unique identification for the text.
#
# epub_uid = ''

# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']

# set priority when building html
StandaloneHTMLBuilder.supported_image_types = [
    'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
]
# -- Extension configuration -------------------------------------------------
# Ignore >>> when copying code
copybutton_prompt_text = r'>>> |\.\.\. '
copybutton_prompt_is_regexp = True

================================================
FILE: docs/zh-cn/docutils.conf
================================================
[html writers]
table_style: colwidths-auto

================================================
FILE: docs/zh-cn/faq.md
================================================
## Frequently Asked Questions

We list some common troubles faced by many users and their corresponding solutions here.
Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them.

### Installation

- KeyError: "xxx: 'yyy is not in the zzz registry'"

  The registry mechanism will be triggered only when the file of the module is imported.
  So you need to import that file somewhere. More details can be found at [KeyError: "MaskRCNN: 'RefineRoIHead is not in the models registry'"](https://github.com/open-mmlab/mmdetection/issues/5974).

- "No module named 'mmcv.ops'"; "No module named 'mmcv.\_ext'"

  1. Uninstall existing mmcv in the environment using `pip uninstall mmcv`
  2. Install mmcv-full following the [installation instruction](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) or [Build MMCV from source](https://mmcv.readthedocs.io/en/latest/get_started/build.html)

- "invalid device function" or "no kernel image is available for execution"

  1. Check the CUDA compute capability of you GPU
  2. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built for the correct GPU architecture. You may need to set `TORCH_CUDA_ARCH_LIST` to reinstall MMCV. The compatibility issue could happen when  using old GPUS, e.g., Tesla K80 (3.7) on colab.
  3. Check whether the running environment is the same as that when mmcv/mmdet is compiled. For example, you may compile mmcv using CUDA 10.0 bug run it on CUDA9.0 environments

- "undefined symbol" or "cannot open xxx.so"

  1. If those symbols are CUDA/C++ symbols (e.g., libcudart.so or GLIBCXX), check
     whether the CUDA/GCC runtimes are the same as those used for compiling mmcv
  2. If those symbols are Pytorch symbols (e.g., symbols containing caffe, aten, and TH), check whether the Pytorch version is the same as that used for compiling mmcv
  3. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built by and running on the same environment

- "RuntimeError: CUDA error: invalid configuration argument"

  This error may be caused by the poor performance of GPU. Try to decrease the value of [THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10)
  and recompile mmcv.

- "RuntimeError: nms is not compiled with GPU support"

  This error is because your CUDA environment is not installed correctly.
  You may try to re-install your CUDA environment and then delete the build/ folder before re-compile mmcv.

- "Segmentation fault"

  1. Check your GCC version and use GCC >= 5.4. This usually caused by the incompatibility between PyTorch and the environment (e.g., GCC \< 4.9 for PyTorch). We also recommend the users to avoid using GCC 5.5 because many feedbacks report that GCC 5.5 will cause "segmentation fault" and simply changing it to GCC 5.4 could solve the problem
  2. Check whether PyTorch is correctly installed and could use CUDA op, e.g. type the following command in your terminal and see whether they could correctly output results
     ```shell
     python -c 'import torch; print(torch.cuda.is_available())'
     ```
  3. If PyTorch is correctly installed, check whether MMCV is correctly installed. If MMCV is correctly installed, then there will be no issue of the command
     ```shell
     python -c 'import mmcv; import mmcv.ops'
     ```
  4. If MMCV and PyTorch are correctly installed, you can use `ipdb` to set breakpoints or directly add `print` to debug and see which part leads the `segmentation fault`

- "libtorch_cuda_cu.so: cannot open shared object file"

  `mmcv-full` depends on the share object but it can not be found. We can check whether the object exists in `~/miniconda3/envs/{environment-name}/lib/python3.7/site-packages/torch/lib` or try to re-install the PyTorch.

- "fatal error C1189: #error:  -- unsupported Microsoft Visual Studio version!"

  If you are building mmcv-full on Windows and the version of CUDA is 9.2, you will probably encounter the error `"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include\crt/host_config.h(133): fatal error C1189: #error:  -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!"`, in which case you can use a lower version of Microsoft Visual Studio like vs2017.

- "error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized"

  If your version of PyTorch is 1.5.0 and you are building mmcv-full on Windows, you will probably encounter the error `- torch/csrc/jit/api/module.h(474): error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized`. The way to solve the error is to replace all the `static constexpr bool all_slots = false;` with `static bool all_slots = false;` at this file `https://github.com/pytorch/pytorch/blob/v1.5.0/torch/csrc/jit/api/module.h`. More details can be found at [member "torch::jit::detail::AttributePolicy::all_slots" may not be initialized](https://github.com/pytorch/pytorch/issues/39394).

- "error: a member with an in-class initializer must be const"

  If your version of PyTorch is 1.6.0 and you are building mmcv-full on Windows, you will probably encounter the error `"- torch/include\torch/csrc/jit/api/module.h(483): error: a member with an in-class initializer must be const"`. The way to solve the error is to replace all the `CONSTEXPR_EXCEPT_WIN_CUDA ` with `const` at `torch/include\torch/csrc/jit/api/module.h`. More details can be found at [Ninja: build stopped: subcommand failed](https://github.com/open-mmlab/mmcv/issues/575).

- "error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized"

  If your version of PyTorch is 1.7.0 and you are building mmcv-full on Windows, you will probably encounter the error `torch/include\torch/csrc/jit/ir/ir.h(1347): error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized`. The way to solve the error needs to modify several local files of PyTorch:

  - delete `static constexpr Symbol Kind = ::c10::prim::profile;` and `tatic constexpr Symbol Kind = ::c10::prim::profile_optional;` at `torch/include\torch/csrc/jit/ir/ir.h`
  - replace `explicit operator type&() { return *(this->value); }` with `explicit operator type&() { return *((type*)this->value); }` at `torch\include\pybind11\cast.h`
  - replace all the `CONSTEXPR_EXCEPT_WIN_CUDA` with `const` at `torch/include\torch/csrc/jit/api/module.h`

  More details can be found at [Ensure default extra_compile_args](https://github.com/pytorch/pytorch/pull/45956).

- Compatibility issue between MMCV and MMDetection; "ConvWS is already registered in conv layer"

  Please install the correct version of MMCV for the version of your MMDetection following the [installation instruction](https://mmdetection.readthedocs.io/en/latest/get_started.html#installation).

### Usage

- "RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one"

  1. This error indicates that your module has parameters that were not used in producing loss. This phenomenon may be caused by running different branches in your code in DDP mode. More datails at [Expected to have finished reduction in the prior iteration before starting a new one](https://github.com/pytorch/pytorch/issues/55582).
  2. You can set ` find_unused_parameters = True` in the config to solve the above problems or find those unused parameters manually

- "RuntimeError: Trying to backward through the graph a second time"

  `GradientCumulativeOptimizerHook` and `OptimizerHook` are both set which causes the `loss.backward()` to be called twice so `RuntimeError` was raised. We can only use one of these. More datails at [Trying to backward through the graph a second time](https://github.com/open-mmlab/mmcv/issues/1379).

================================================
FILE: docs/zh-cn/get_started/Installation.md
================================================
## Installation

There are two versions of MMCV:

- **mmcv**: comprehensive, with full features and various CUDA ops out of box. It takes longer time to build.
- **mmcv-lite**: lite, without CUDA ops but all other features, similar to mmcv\<1.0.0. It is useful when you do not need those CUDA ops.

```{warning}
Do not install both versions in the same environment, otherwise you may encounter errors like `ModuleNotFound`. You need to uninstall one before installing the other. `Installing the full version is highly recommended if CUDA is avaliable`.
```

### Install mmcv

Before installing mmcv, make sure that PyTorch has been successfully installed following the [PyTorch official installation guide](https://pytorch.org/get-started/locally/#start-locally). This can be verified using the following command

```bash
python -c 'import torch;print(torch.__version__)'
```

If version information is output, then PyTorch is installed.

#### Install with mim (recommended)

[mim](https://github.com/open-mmlab/mim) is the package management tool for the OpenMMLab projects, which makes it easy to install mmcv

```bash
pip install -U openmim
mim install mmcv
```

If you find that the above installation command does not use a pre-built package ending with `.whl` but a source package ending with `.tar.gz`, you may not have a pre-build package corresponding to the PyTorch or CUDA or mmcv version, in which case you can [build mmcv from source](build.md).

<details>
<summary>Installation log using pre-built packages</summary>

Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html<br />
Collecting mmcv<br />
<b>Downloading https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/mmcv-2.0.0-cp38-cp38-manylinux1_x86_64.whl</b>

</details>

<details>
<summary>Installation log using source packages</summary>

Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html<br />
Collecting mmcv==2.0.0<br />
<b>Downloading mmcv-2.0.0.tar.gz</b>

</details>

To install a specific version of mmcv, for example, mmcv version 2.0.0, you can use the following command

```bash
mim install mmcv==2.0.0
```

:::{note}
If you would like to use `opencv-python-headless` instead of `opencv-python`,
e.g., in a minimum container environment or servers without GUI,
you can first install it before installing MMCV to skip the installation of `opencv-python`.

Alternatively, if it takes too long to install a dependency library, you can specify the pypi source

```bash
mim install mmcv -i https://pypi.tuna.tsinghua.edu.cn/simple
```

:::

You can run [check_installation.py](https://github.com/open-mmlab/mmcv/blob/main/.dev_scripts/check_installation.py) to check the installation of mmcv-full after running the installation commands.

#### Install with pip

Use the following command to check the version of CUDA and PyTorch

```bash
python -c 'import torch;print(torch.__version__);print(torch.version.cuda)'
```

Select the appropriate installation command depending on the type of system, CUDA version, PyTorch version, and MMCV version

<html>
<body>
    <style>
      select {
          z-index: 1000;
          position: absolute;
          top: 10px;
          width: 6.7rem;
      }
      #select-container {
          position: relative;
          height: 30px;
      }
      #select-cmd {
          background-color: #f5f6f7;
          font-size: 14px;
          margin-top: 20px;
      }
      /* 让每一个都间隔1.3rem */
      #select-os {
          /* left: 1.375rem; */
          left: 0;
      }
      #select-cuda {
          /* left: 9.375rem;    9.375 = 1.375 + 6.7 + 1.3 */
          left: 8rem;
      }
      #select-torch {
          /* left: 17.375rem;    17.375 = 9.375 + 6.7 + 1.3 */
          left: 16rem;
      }
      #select-mmcv {
          /* left: 25.375rem;    25.375 = 17.375 + 6.7 + 1.3 */
          left: 24rem;
      }
    </style>
    <div id="select-container">
        <select
            onmousedown="handleSelectMouseDown(this.id)"
            onblur="handleSelectBlur(this.id)"
            onchange="changeOS(this.value)"
            id="select-os">
        </select>
        <select
            onmousedown="handleSelectMouseDown(this.id)"
            onblur="handleSelectBlur(this.id)"
            onchange="changeCUDA(this.value)"
            id="select-cuda">
        </select>
        <select
            onmousedown="handleSelectMouseDown(this.id)"
            onblur="handleSelectBlur(this.id)"
            onchange="changeTorch(this.value)"
            id="select-torch">
        </select>
        <select
            onmousedown="handleSelectMouseDown(this.id)"
            onblur="handleSelectBlur(this.id)"
            onchange="changeMMCV(this.value)"
            id="select-mmcv">
        </select>
    </div>
    <pre id="select-cmd"></pre>
</body>
<script>
    let osVal, cudaVal, torchVal, mmcvVal;
    function changeMMCV(val) {
        mmcvVal = val;
        change("select-mmcv");
    }
    function changeTorch(val) {
        torchVal = val;
        change("select-torch");
    }
    function changeCUDA(val) {
        cudaVal = val;
        change("select-cuda");
    }
    function changeOS(val) {
        osVal = val;
        change("select-os");
    }
    function handleSelectMouseDown(id) {
        const dom = document.getElementById(id);
        if (!dom) return;
        const len = dom?.options?.length;
        if (len >= 9) {
            dom.size = 10;
            dom.style.zIndex = 100;
        }
    }
    function handleSelectClick() {
        const selects = Array.from(document.getElementsByTagName("select"));
        selects.forEach(select => {
            select.size = 1;
        });
    }
    function handleSelectBlur(id) {
        const dom = document.getElementById(id);
        if (!dom) {
            handleSelectClick();
            return;
        }
        dom.size = 1;
        dom.style.zIndex = 1;
    }
    function changeCmd() {
        const cmd = document.getElementById("select-cmd");
        let cmdString = "pip install mmcv=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html";
        // e.g: pip install mmcv==2.0.0rc1 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9/index.html
        let cudaVersion;
        if (cudaVal === "cpu" || cudaVal === "mps") {
            cudaVersion = "cpu";
        } else {
            cudaVersion = `cu${cudaVal.split(".").join("")}`;
        }
        const torchVersion = `torch${torchVal.substring(0, torchVal.length - 2)}`;
        cmdString = cmdString.replace("{cu_version}", cudaVersion).replace("{mmcv_version}", mmcvVal).replace("{torch_version}", torchVersion);
        cmd.textContent = cmdString;
    }
    function unique(arr) {
        if (!arr || !Array.isArray(arr)) return [];
        return [...new Set(arr)];
    }
    function genOptionFragment(data, id) {
        const name = id.includes("-")? id.split("-")[1] : id;
        const fragment = new DocumentFragment();
        data.forEach(option => {
            const ele = document.createElement("option");
            let text = `${name} ${option}`;
            if (name === "os" || option.toUpperCase() === "CPU" || option.toUpperCase() === "MPS") {
                text = `${option}`;
            }
            ele.textContent = text;
            ele.value = option;
            ele.addEventListener('click', handleSelectClick);
            fragment.appendChild(ele);
        });
        return fragment;
    }
    function findAndAppend(data, id) {
        const fragment = genOptionFragment(data, id);
        const dom = document.getElementById(id);
        if (dom) dom.replaceChildren(fragment);
    }
    function change(id) {
        const order = ["select-mmcv", "select-torch", "select-cuda", "select-os"];
        const idx = order.indexOf(id);
        if (idx === -1) return;
        const versionDetail = version[osVal];
        if (idx >= 3) {
            let cuda = [];
            versionDetail.forEach(v => {
                cuda.push(v.cuda);
            });
            cuda = unique(cuda);
            cudaVal = cuda[0];
            findAndAppend(cuda, "select-cuda");
        }
        if (idx >= 2) {
            const torch = [];
            versionDetail.forEach(v => {
                if (v.cuda === cudaVal) torch.push(v.torch);
            });
            torchVal = torch[0];
            findAndAppend(torch, "select-torch");
        }
        if (idx >= 1) {
            let mmcv = [];
            versionDetail.forEach(v => {
                if (v.cuda === cudaVal && v.torch === torchVal) mmcv = v.mmcv;
            });
            mmcvVal = mmcv[0];
            findAndAppend(mmcv, "select-mmcv");
        }
        changeCmd();
    }
    function init() {
        document.addEventListener("click", handleSelectBlur);
        const version = window.version;
        const os = Object.keys(version);
        osVal = os[0];
        findAndAppend(os, "select-os");
        change("select-os");
        changeCmd();
    }
    window.onload = function () {
        const url = "../_static/version.json"
        const request = new XMLHttpRequest();
        request.open("get", url);
        request.send(null);
        request.onload = function () {
            if (request.status !== 200) return;
            const data = JSON.parse(request.responseText);
            window.version = data;
            init();
        }
    }
</script>
</html>

If you do not find a corresponding version in the dropdown box above, you probably do not have a pre-built package corresponding to the PyTorch or CUDA or mmcv version, at which point you can [build mmcv from source](build.md).

:::{note}
mmcv is only compiled on PyTorch 1.x.0 because the compatibility
usually holds between 1.x.0 and 1.x.1. If your PyTorch version is 1.x.1, you
can install mmcv compiled with PyTorch 1.x.0 and it usually works well.
For example, if your PyTorch version is 1.8.1, you can feel free to choose 1.8.x.
:::

:::{note}
If you would like to use `opencv-python-headless` instead of `opencv-python`,
e.g., in a minimum container environment or servers without GUI,
you can first install it before installing MMCV to skip the installation of `opencv-python`.

Alternatively, if it takes too long to install a dependency library, you can specify the pypi source

```bash
mim install mmcv -i https://pypi.tuna.tsinghua.edu.cn/simple
```

:::

You can run [check_installation.py](https://github.com/open-mmlab/mmcv/blob/main/.dev_scripts/check_installation.py) to check the installation of mmcv after running the installation commands.

#### Using mmcv with Docker

Build with local repository

```bash
git clone https://github.com/open-mmlab/mmcv.git && cd mmcv
docker build -t mmcv -f docker/release/Dockerfile .
```

Or build with remote repository

```bash
docker build -t mmcv https://github.com/open-mmlab/mmcv.git#main:docker/release
```

The [Dockerfile](release/Dockerfile) installs latest released version of mmcv-full by default, but you can specify mmcv versions to install expected versions.

```bash
docker image build -t mmcv -f docker/release/Dockerfile --build-arg MMCV=2.0.0 .
```

If you also want to use other versions of PyTorch and CUDA, you can also pass them when building docker images.

An example to build an image with PyTorch 1.11 and CUDA 11.3.

```bash
docker build -t mmcv -f docker/release/Dockerfile \
    --build-arg PYTORCH=1.11.0 \
    --build-arg CUDA=11.3 \
    --build-arg CUDNN=8 \
    --build-arg MMCV=2.0.0 .
```

More available versions of PyTorch and CUDA can be found at [dockerhub/pytorch](https://hub.docker.com/r/pytorch/pytorch/tags).

### Install mmcv-lite

If you need to use PyTorch-related modules, make sure PyTorch has been successfully installed in your environment by referring to the [PyTorch official installation guide](https://github.com/pytorch/pytorch#installation).

```python
pip install mmcv-lite
```

================================================
FILE: docs/zh-cn/get_started/Introduction.md
================================================
## Introduction

MMCV is a foundational library for computer vision research and provides the following functionalities.

- [Image/Video processing](../understand_mmcv/data_process.md)
- [Image and annotation visualization](../understand_mmcv/visualization.md)
- [Image transformation](../understand_mmcv/data_transform.md)
- [Various CNN architectures](../understand_mmcv/cnn.md)
- [High-quality implementation of common CUDA ops](../understand_mmcv/ops.md)

It supports the following systems:

- Linux
- Windows
- macOS

It supports many research projects as below:

- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark.
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark.
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection.
- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark.
- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO series toolbox and benchmark.
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark.
- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox.
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark.
- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning toolbox and benchmark.
- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab model compression toolbox and benchmark.
- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab fewshot learning toolbox and benchmark.
- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark.
- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark.
- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark.
- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework.

================================================
FILE: docs/zh-cn/index.rst
================================================
Welcome to PanCollection's documentation!
================================

You can switch between Chinese and English documents in the lower-left corner of the layout.

.. toctree::
   :maxdepth: 2
   :caption: Get Started

   get_started/introduction.md
   get_started/installation.md

.. toctree::
   :maxdepth: 2
   :caption: PanCollection

   PanCollection/PreProcess.md
   PanCollection/Evaluation.md

.. toctree::
   :caption: Switch Language

   switch_language.md

.. toctree::
   :maxdepth: 2
   :caption: Related Toolbox

   related.md

.. toctree::

   faq.md

Indices and tables
==================

* :ref:`genindex`
* :ref:`search`

================================================
FILE: docs/zh-cn/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd

================================================
FILE: docs/zh-cn/related.md
================================================


================================================
FILE: docs/zh-cn/switch_language.md
================================================
## <a href='https://pancollection.readthedocs.io/en/latest/'>English</a>

## <a href='https://pancollection.readthedocs.io/zh_CN/latest/'>简体中文</a>